aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwoojiq2024-01-14 14:46:32 +0000
committerGitHub2024-01-14 14:46:32 +0000
commit3f88a3f4e6f75bf04246a8015652931e640e0821 (patch)
treeb5e457d8172f105eb3edb51470e11d29cb3c1d38
parenta0b02106c35ede95438bd23069d2b7f999ed8684 (diff)
Change path normalization strategy to not resolve symlinks (#9330)
-rw-r--r--Cargo.lock1
-rw-r--r--helix-core/Cargo.toml1
-rw-r--r--helix-core/src/path.rs71
-rw-r--r--helix-core/tests/path.rs124
4 files changed, 171 insertions, 26 deletions
diff --git a/Cargo.lock b/Cargo.lock
index a7e06b6e..3643b953 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1074,6 +1074,7 @@ dependencies = [
"slotmap",
"smallvec",
"smartstring",
+ "tempfile",
"textwrap",
"toml",
"tree-sitter",
diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml
index d7fff6c6..be41fa01 100644
--- a/helix-core/Cargo.toml
+++ b/helix-core/Cargo.toml
@@ -55,3 +55,4 @@ parking_lot = "0.12"
[dev-dependencies]
quickcheck = { version = "1", default-features = false }
indoc = "2.0.4"
+tempfile = "3.7.0"
diff --git a/helix-core/src/path.rs b/helix-core/src/path.rs
index ede37e04..0cf6f812 100644
--- a/helix-core/src/path.rs
+++ b/helix-core/src/path.rs
@@ -30,31 +30,10 @@ pub fn expand_tilde(path: &Path) -> PathBuf {
path.to_path_buf()
}
-/// Normalize a path, removing things like `.` and `..`.
-///
-/// CAUTION: This does not resolve symlinks (unlike
-/// [`std::fs::canonicalize`]). This may cause incorrect or surprising
-/// behavior at times. This should be used carefully. Unfortunately,
-/// [`std::fs::canonicalize`] can be hard to use correctly, since it can often
-/// fail, or on Windows returns annoying device paths. This is a problem Cargo
-/// needs to improve on.
-/// Copied from cargo: <https://github.com/rust-lang/cargo/blob/070e459c2d8b79c5b2ac5218064e7603329c92ae/crates/cargo-util/src/paths.rs#L81>
+/// Normalize a path without resolving symlinks.
+// Strategy: start from the first component and move up. Cannonicalize previous path,
+// join component, cannonicalize new path, strip prefix and join to the final result.
pub fn get_normalized_path(path: &Path) -> PathBuf {
- // normalization strategy is to canonicalize first ancestor path that exists (i.e., canonicalize as much as possible),
- // then run handrolled normalization on the non-existent remainder
- let (base, path) = path
- .ancestors()
- .find_map(|base| {
- let canonicalized_base = dunce::canonicalize(base).ok()?;
- let remainder = path.strip_prefix(base).ok()?.into();
- Some((canonicalized_base, remainder))
- })
- .unwrap_or_else(|| (PathBuf::new(), PathBuf::from(path)));
-
- if path.as_os_str().is_empty() {
- return base;
- }
-
let mut components = path.components().peekable();
let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() {
components.next();
@@ -70,20 +49,60 @@ pub fn get_normalized_path(path: &Path) -> PathBuf {
ret.push(component.as_os_str());
}
Component::CurDir => {}
+ #[cfg(not(windows))]
Component::ParentDir => {
ret.pop();
}
+ #[cfg(windows)]
+ Component::ParentDir => {
+ if let Some(head) = ret.components().next_back() {
+ match head {
+ Component::Prefix(_) | Component::RootDir => {}
+ Component::CurDir => unreachable!(),
+ // If we left previous component as ".." it means we met a symlink before and we can't pop path.
+ Component::ParentDir => {
+ ret.push("..");
+ }
+ Component::Normal(_) => {
+ if ret.is_symlink() {
+ ret.push("..");
+ } else {
+ ret.pop();
+ }
+ }
+ }
+ }
+ }
+ #[cfg(not(windows))]
Component::Normal(c) => {
ret.push(c);
}
+ #[cfg(windows)]
+ Component::Normal(c) => 'normal: {
+ use std::fs::canonicalize;
+
+ let new_path = ret.join(c);
+ if new_path.is_symlink() {
+ ret = new_path;
+ break 'normal;
+ }
+ let (can_new, can_old) = (canonicalize(&new_path), canonicalize(&ret));
+ match (can_new, can_old) {
+ (Ok(can_new), Ok(can_old)) => {
+ let striped = can_new.strip_prefix(can_old);
+ ret.push(striped.unwrap_or_else(|_| c.as_ref()));
+ }
+ _ => ret.push(c),
+ }
+ }
}
}
- base.join(ret)
+ dunce::simplified(&ret).to_path_buf()
}
/// Returns the canonical, absolute form of a path with all intermediate components normalized.
///
-/// This function is used instead of `std::fs::canonicalize` because we don't want to verify
+/// This function is used instead of [`std::fs::canonicalize`] because we don't want to verify
/// here if the path exists, just normalize it's components.
pub fn get_canonicalized_path(path: &Path) -> PathBuf {
let path = expand_tilde(path);
diff --git a/helix-core/tests/path.rs b/helix-core/tests/path.rs
new file mode 100644
index 00000000..cbda5e1a
--- /dev/null
+++ b/helix-core/tests/path.rs
@@ -0,0 +1,124 @@
+#![cfg(windows)]
+
+use std::{
+ env::set_current_dir,
+ error::Error,
+ path::{Component, Path, PathBuf},
+};
+
+use helix_core::path::get_normalized_path;
+use tempfile::Builder;
+
+// Paths on Windows are almost always case-insensitive.
+// Normalization should return the original path.
+// E.g. mkdir `CaSe`, normalize(`case`) = `CaSe`.
+#[test]
+fn test_case_folding_windows() -> Result<(), Box<dyn Error>> {
+ // tmp/root/case
+ let tmp_prefix = std::env::temp_dir();
+ set_current_dir(&tmp_prefix)?;
+
+ let root = Builder::new().prefix("root-").tempdir()?;
+ let case = Builder::new().prefix("CaSe-").tempdir_in(&root)?;
+
+ let root_without_prefix = root.path().strip_prefix(&tmp_prefix)?;
+
+ let lowercase_case = format!(
+ "case-{}",
+ case.path()
+ .file_name()
+ .unwrap()
+ .to_string_lossy()
+ .split_at(5)
+ .1
+ );
+ let test_path = root_without_prefix.join(lowercase_case);
+ assert_eq!(
+ get_normalized_path(&test_path),
+ case.path().strip_prefix(&tmp_prefix)?
+ );
+
+ Ok(())
+}
+
+#[test]
+fn test_normalize_path() -> Result<(), Box<dyn Error>> {
+ /*
+ tmp/root/
+ ├── link -> dir1/orig_file
+ ├── dir1/
+ │ └── orig_file
+ └── dir2/
+ └── dir_link -> ../dir1/
+ */
+
+ let tmp_prefix = std::env::temp_dir();
+ set_current_dir(&tmp_prefix)?;
+
+ // Create a tree structure as shown above
+ let root = Builder::new().prefix("root-").tempdir()?;
+ let dir1 = Builder::new().prefix("dir1-").tempdir_in(&root)?;
+ let orig_file = Builder::new().prefix("orig_file-").tempfile_in(&dir1)?;
+ let dir2 = Builder::new().prefix("dir2-").tempdir_in(&root)?;
+
+ // Create path and delete existing file
+ let dir_link = Builder::new()
+ .prefix("dir_link-")
+ .tempfile_in(&dir2)?
+ .path()
+ .to_owned();
+ let link = Builder::new()
+ .prefix("link-")
+ .tempfile_in(&root)?
+ .path()
+ .to_owned();
+
+ use std::os::windows;
+ windows::fs::symlink_dir(&dir1, &dir_link)?;
+ windows::fs::symlink_file(&orig_file, &link)?;
+
+ // root/link
+ let path = link.strip_prefix(&tmp_prefix)?;
+ assert_eq!(
+ get_normalized_path(path),
+ path,
+ "input {:?} and symlink last component shouldn't be resolved",
+ path
+ );
+
+ // root/dir2/dir_link/orig_file/../..
+ let path = dir_link
+ .strip_prefix(&tmp_prefix)
+ .unwrap()
+ .join(orig_file.path().file_name().unwrap())
+ .join(Component::ParentDir)
+ .join(Component::ParentDir);
+ let expected = dir_link
+ .strip_prefix(&tmp_prefix)
+ .unwrap()
+ .join(Component::ParentDir);
+ assert_eq!(
+ get_normalized_path(&path),
+ expected,
+ "input {:?} and \"..\" should not erase the simlink that goes ahead",
+ &path
+ );
+
+ // root/link/.././../dir2/../
+ let path = link
+ .strip_prefix(&tmp_prefix)
+ .unwrap()
+ .join(Component::ParentDir)
+ .join(Component::CurDir)
+ .join(Component::ParentDir)
+ .join(dir2.path().file_name().unwrap())
+ .join(Component::ParentDir);
+ let expected = link
+ .strip_prefix(&tmp_prefix)
+ .unwrap()
+ .join(Component::ParentDir)
+ .join(Component::ParentDir);
+ assert_eq!(get_normalized_path(&path), expected, "input {:?}", &path);
+
+ Ok(())
+}