From 1f916e65cff4459698d465b2f4558da1e1bf6e44 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Tue, 16 Jan 2024 13:59:48 -0500 Subject: Create helix-stdx crate for stdlib extensions helix-stdx is meant to carry extensions to the stdlib or low-level dependencies that are useful in all other crates. This commit starts with all of the path functions from helix-core and the CWD tracking that lived in helix-loader. The CWD tracking in helix-loader was previously unable to call the canonicalization functions in helix-core. Switching to our custom canonicalization code should make no noticeable difference though since `std::env::current_dir` returns a canonicalized path with symlinks resolved (at least on unix). --- helix-stdx/Cargo.toml | 19 +++++ helix-stdx/src/env.rs | 48 ++++++++++++ helix-stdx/src/lib.rs | 2 + helix-stdx/src/path.rs | 185 +++++++++++++++++++++++++++++++++++++++++++++++ helix-stdx/tests/path.rs | 124 +++++++++++++++++++++++++++++++ 5 files changed, 378 insertions(+) create mode 100644 helix-stdx/Cargo.toml create mode 100644 helix-stdx/src/env.rs create mode 100644 helix-stdx/src/lib.rs create mode 100644 helix-stdx/src/path.rs create mode 100644 helix-stdx/tests/path.rs (limited to 'helix-stdx') diff --git a/helix-stdx/Cargo.toml b/helix-stdx/Cargo.toml new file mode 100644 index 00000000..216a3b40 --- /dev/null +++ b/helix-stdx/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "helix-stdx" +description = "Standard library extensions" +include = ["src/**/*", "README.md"] +version.workspace = true +authors.workspace = true +edition.workspace = true +license.workspace = true +rust-version.workspace = true +categories.workspace = true +repository.workspace = true +homepage.workspace = true + +[dependencies] +dunce = "1.0" +etcetera = "0.8" + +[dev-dependencies] +tempfile = "3.9" diff --git a/helix-stdx/src/env.rs b/helix-stdx/src/env.rs new file mode 100644 index 00000000..864ba828 --- /dev/null +++ b/helix-stdx/src/env.rs @@ -0,0 +1,48 @@ +use std::{ + path::{Path, PathBuf}, + sync::RwLock, +}; + +static CWD: RwLock> = RwLock::new(None); + +// Get the current working directory. +// This information is managed internally as the call to std::env::current_dir +// might fail if the cwd has been deleted. +pub fn current_working_dir() -> PathBuf { + if let Some(path) = &*CWD.read().unwrap() { + return path.clone(); + } + + let path = std::env::current_dir() + .map(crate::path::normalize) + .expect("Couldn't determine current working directory"); + let mut cwd = CWD.write().unwrap(); + *cwd = Some(path.clone()); + + path +} + +pub fn set_current_working_dir(path: impl AsRef) -> std::io::Result<()> { + let path = crate::path::canonicalize(path); + std::env::set_current_dir(&path)?; + let mut cwd = CWD.write().unwrap(); + *cwd = Some(path); + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::{current_working_dir, set_current_working_dir}; + + #[test] + fn current_dir_is_set() { + let new_path = dunce::canonicalize(std::env::temp_dir()).unwrap(); + let cwd = current_working_dir(); + assert_ne!(cwd, new_path); + + set_current_working_dir(&new_path).expect("Couldn't set new path"); + + let cwd = current_working_dir(); + assert_eq!(cwd, new_path); + } +} diff --git a/helix-stdx/src/lib.rs b/helix-stdx/src/lib.rs new file mode 100644 index 00000000..ae3c3a98 --- /dev/null +++ b/helix-stdx/src/lib.rs @@ -0,0 +1,2 @@ +pub mod env; +pub mod path; diff --git a/helix-stdx/src/path.rs b/helix-stdx/src/path.rs new file mode 100644 index 00000000..5746657c --- /dev/null +++ b/helix-stdx/src/path.rs @@ -0,0 +1,185 @@ +pub use etcetera::home_dir; + +use std::path::{Component, Path, PathBuf}; + +use crate::env::current_working_dir; + +/// Replaces users home directory from `path` with tilde `~` if the directory +/// is available, otherwise returns the path unchanged. +pub fn fold_home_dir(path: &Path) -> PathBuf { + if let Ok(home) = home_dir() { + if let Ok(stripped) = path.strip_prefix(&home) { + return PathBuf::from("~").join(stripped); + } + } + + path.to_path_buf() +} + +/// Expands tilde `~` into users home directory if available, otherwise returns the path +/// unchanged. The tilde will only be expanded when present as the first component of the path +/// and only slash follows it. +pub fn expand_tilde(path: impl AsRef) -> PathBuf { + let path = path.as_ref(); + let mut components = path.components().peekable(); + if let Some(Component::Normal(c)) = components.peek() { + if c == &"~" { + if let Ok(home) = home_dir() { + // it's ok to unwrap, the path starts with `~` + return home.join(path.strip_prefix("~").unwrap()); + } + } + } + + path.to_path_buf() +} + +/// Normalize a path without resolving symlinks. +// Strategy: start from the first component and move up. Cannonicalize previous path, +// join component, cannonicalize new path, strip prefix and join to the final result. +pub fn normalize(path: impl AsRef) -> PathBuf { + let mut components = path.as_ref().components().peekable(); + let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() { + components.next(); + PathBuf::from(c.as_os_str()) + } else { + PathBuf::new() + }; + + for component in components { + match component { + Component::Prefix(..) => unreachable!(), + Component::RootDir => { + ret.push(component.as_os_str()); + } + Component::CurDir => {} + #[cfg(not(windows))] + Component::ParentDir => { + ret.pop(); + } + #[cfg(windows)] + Component::ParentDir => { + if let Some(head) = ret.components().next_back() { + match head { + Component::Prefix(_) | Component::RootDir => {} + Component::CurDir => unreachable!(), + // If we left previous component as ".." it means we met a symlink before and we can't pop path. + Component::ParentDir => { + ret.push(".."); + } + Component::Normal(_) => { + if ret.is_symlink() { + ret.push(".."); + } else { + ret.pop(); + } + } + } + } + } + #[cfg(not(windows))] + Component::Normal(c) => { + ret.push(c); + } + #[cfg(windows)] + Component::Normal(c) => 'normal: { + use std::fs::canonicalize; + + let new_path = ret.join(c); + if new_path.is_symlink() { + ret = new_path; + break 'normal; + } + let (can_new, can_old) = (canonicalize(&new_path), canonicalize(&ret)); + match (can_new, can_old) { + (Ok(can_new), Ok(can_old)) => { + let striped = can_new.strip_prefix(can_old); + ret.push(striped.unwrap_or_else(|_| c.as_ref())); + } + _ => ret.push(c), + } + } + } + } + dunce::simplified(&ret).to_path_buf() +} + +/// Returns the canonical, absolute form of a path with all intermediate components normalized. +/// +/// This function is used instead of [`std::fs::canonicalize`] because we don't want to verify +/// here if the path exists, just normalize it's components. +pub fn canonicalize(path: impl AsRef) -> PathBuf { + let path = expand_tilde(path); + let path = if path.is_relative() { + current_working_dir().join(path) + } else { + path + }; + + normalize(path) +} + +pub fn get_relative_path(path: impl AsRef) -> PathBuf { + let path = PathBuf::from(path.as_ref()); + let path = if path.is_absolute() { + let cwdir = normalize(current_working_dir()); + normalize(&path) + .strip_prefix(cwdir) + .map(PathBuf::from) + .unwrap_or(path) + } else { + path + }; + fold_home_dir(&path) +} + +/// Returns a truncated filepath where the basepart of the path is reduced to the first +/// char of the folder and the whole filename appended. +/// +/// Also strip the current working directory from the beginning of the path. +/// Note that this function does not check if the truncated path is unambiguous. +/// +/// ``` +/// use helix_stdx::path::get_truncated_path; +/// use std::path::Path; +/// +/// assert_eq!( +/// get_truncated_path("/home/cnorris/documents/jokes.txt").as_path(), +/// Path::new("/h/c/d/jokes.txt") +/// ); +/// assert_eq!( +/// get_truncated_path("jokes.txt").as_path(), +/// Path::new("jokes.txt") +/// ); +/// assert_eq!( +/// get_truncated_path("/jokes.txt").as_path(), +/// Path::new("/jokes.txt") +/// ); +/// assert_eq!( +/// get_truncated_path("/h/c/d/jokes.txt").as_path(), +/// Path::new("/h/c/d/jokes.txt") +/// ); +/// assert_eq!(get_truncated_path("").as_path(), Path::new("")); +/// ``` +/// +pub fn get_truncated_path(path: impl AsRef) -> PathBuf { + let cwd = current_working_dir(); + let path = path + .as_ref() + .strip_prefix(cwd) + .unwrap_or_else(|_| path.as_ref()); + let file = path.file_name().unwrap_or_default(); + let base = path.parent().unwrap_or_else(|| Path::new("")); + let mut ret = PathBuf::new(); + for d in base { + ret.push( + d.to_string_lossy() + .chars() + .next() + .unwrap_or_default() + .to_string(), + ); + } + ret.push(file); + ret +} diff --git a/helix-stdx/tests/path.rs b/helix-stdx/tests/path.rs new file mode 100644 index 00000000..cc3c15cb --- /dev/null +++ b/helix-stdx/tests/path.rs @@ -0,0 +1,124 @@ +#![cfg(windows)] + +use std::{ + env::set_current_dir, + error::Error, + path::{Component, Path, PathBuf}, +}; + +use helix_stdx::path; +use tempfile::Builder; + +// Paths on Windows are almost always case-insensitive. +// Normalization should return the original path. +// E.g. mkdir `CaSe`, normalize(`case`) = `CaSe`. +#[test] +fn test_case_folding_windows() -> Result<(), Box> { + // tmp/root/case + let tmp_prefix = std::env::temp_dir(); + set_current_dir(&tmp_prefix)?; + + let root = Builder::new().prefix("root-").tempdir()?; + let case = Builder::new().prefix("CaSe-").tempdir_in(&root)?; + + let root_without_prefix = root.path().strip_prefix(&tmp_prefix)?; + + let lowercase_case = format!( + "case-{}", + case.path() + .file_name() + .unwrap() + .to_string_lossy() + .split_at(5) + .1 + ); + let test_path = root_without_prefix.join(lowercase_case); + assert_eq!( + path::normalize(&test_path), + case.path().strip_prefix(&tmp_prefix)? + ); + + Ok(()) +} + +#[test] +fn test_normalize_path() -> Result<(), Box> { + /* + tmp/root/ + ├── link -> dir1/orig_file + ├── dir1/ + │ └── orig_file + └── dir2/ + └── dir_link -> ../dir1/ + */ + + let tmp_prefix = std::env::temp_dir(); + set_current_dir(&tmp_prefix)?; + + // Create a tree structure as shown above + let root = Builder::new().prefix("root-").tempdir()?; + let dir1 = Builder::new().prefix("dir1-").tempdir_in(&root)?; + let orig_file = Builder::new().prefix("orig_file-").tempfile_in(&dir1)?; + let dir2 = Builder::new().prefix("dir2-").tempdir_in(&root)?; + + // Create path and delete existing file + let dir_link = Builder::new() + .prefix("dir_link-") + .tempfile_in(&dir2)? + .path() + .to_owned(); + let link = Builder::new() + .prefix("link-") + .tempfile_in(&root)? + .path() + .to_owned(); + + use std::os::windows; + windows::fs::symlink_dir(&dir1, &dir_link)?; + windows::fs::symlink_file(&orig_file, &link)?; + + // root/link + let path = link.strip_prefix(&tmp_prefix)?; + assert_eq!( + path::normalize(path), + path, + "input {:?} and symlink last component shouldn't be resolved", + path + ); + + // root/dir2/dir_link/orig_file/../.. + let path = dir_link + .strip_prefix(&tmp_prefix) + .unwrap() + .join(orig_file.path().file_name().unwrap()) + .join(Component::ParentDir) + .join(Component::ParentDir); + let expected = dir_link + .strip_prefix(&tmp_prefix) + .unwrap() + .join(Component::ParentDir); + assert_eq!( + path::normalize(&path), + expected, + "input {:?} and \"..\" should not erase the simlink that goes ahead", + &path + ); + + // root/link/.././../dir2/../ + let path = link + .strip_prefix(&tmp_prefix) + .unwrap() + .join(Component::ParentDir) + .join(Component::CurDir) + .join(Component::ParentDir) + .join(dir2.path().file_name().unwrap()) + .join(Component::ParentDir); + let expected = link + .strip_prefix(&tmp_prefix) + .unwrap() + .join(Component::ParentDir) + .join(Component::ParentDir); + assert_eq!(path::normalize(&path), expected, "input {:?}", &path); + + Ok(()) +} -- cgit v1.2.3-70-g09d2