aboutsummaryrefslogtreecommitdiff
path: root/helix-core
diff options
context:
space:
mode:
authorGalen Abell2024-02-11 17:24:20 +0000
committerGitHub2024-02-11 17:24:20 +0000
commit581a1ebf5d327c1128fe6c283578e8f36a4b5fb5 (patch)
tree52cea1829f5e5cdf54280eaf631e7ae17feae39f /helix-core
parentd570c29ce37ffbb46a9c49708c31dfd81daa27cf (diff)
Add glob file type support (#8006)
* Replace FileType::Suffix with FileType::Glob Suffix is rather limited and cannot be used to match files which have semantic meaning based on location + file type (for example, Github Action workflow files). This patch adds support for a Glob FileType to replace Suffix, which encompasses the existing behavior & adds additional file matching functionality. Globs are standard Unix-style path globs, which are matched against the absolute path of the file. If the configured glob for a language is a relative glob (that is, it isn't an absolute path or already starts with a glob pattern), a glob pattern will be prepended to allow matching relative paths from any directory. The order of file type matching is also updated to first match on globs and then on extension. This is necessary as most cases where glob-matching is useful will have already been matched by an extension if glob matching is done last. * Convert file-types suffixes to globs * Use globs for filename matching Trying to match the file-type raw strings against both filename and extension leads to files with the same name as the extension having the incorrect syntax. * Match dockerfiles with suffixes It's common practice to add a suffix to dockerfiles based on their context, e.g. `Dockerfile.dev`, `Dockerfile.prod`, etc. * Make env filetype matching more generic Match on `.env` or any `.env.*` files. * Update docs * Use GlobSet to match all file type globs at once * Update todo.txt glob patterns * Consolidate language Configuration and Loader creation This is a refactor that improves the error handling for creating the `helix_core::syntax::Loader` from the default and user language configuration. * Fix integration tests * Add additional starlark file-type glob --------- Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
Diffstat (limited to 'helix-core')
-rw-r--r--helix-core/Cargo.toml1
-rw-r--r--helix-core/src/config.rs45
-rw-r--r--helix-core/src/syntax.rs150
-rw-r--r--helix-core/tests/indent.rs2
4 files changed, 138 insertions, 60 deletions
diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml
index 8c63af8e..bdc879ca 100644
--- a/helix-core/Cargo.toml
+++ b/helix-core/Cargo.toml
@@ -49,6 +49,7 @@ chrono = { version = "0.4", default-features = false, features = ["alloc", "std"
etcetera = "0.8"
textwrap = "0.16.0"
+globset = "0.4.14"
nucleo.workspace = true
parking_lot = "0.12"
diff --git a/helix-core/src/config.rs b/helix-core/src/config.rs
index 2076fc22..27cd4e29 100644
--- a/helix-core/src/config.rs
+++ b/helix-core/src/config.rs
@@ -1,10 +1,45 @@
-/// Syntax configuration loader based on built-in languages.toml.
-pub fn default_syntax_loader() -> crate::syntax::Configuration {
+use crate::syntax::{Configuration, Loader, LoaderError};
+
+/// Language configuration based on built-in languages.toml.
+pub fn default_lang_config() -> Configuration {
helix_loader::config::default_lang_config()
.try_into()
- .expect("Could not serialize built-in languages.toml")
+ .expect("Could not deserialize built-in languages.toml")
}
-/// Syntax configuration loader based on user configured languages.toml.
-pub fn user_syntax_loader() -> Result<crate::syntax::Configuration, toml::de::Error> {
+
+/// Language configuration loader based on built-in languages.toml.
+pub fn default_lang_loader() -> Loader {
+ Loader::new(default_lang_config()).expect("Could not compile loader for default config")
+}
+
+#[derive(Debug)]
+pub enum LanguageLoaderError {
+ DeserializeError(toml::de::Error),
+ LoaderError(LoaderError),
+}
+
+impl std::fmt::Display for LanguageLoaderError {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Self::DeserializeError(err) => write!(f, "Failed to parse language config: {err}"),
+ Self::LoaderError(err) => write!(f, "Failed to compile language config: {err}"),
+ }
+ }
+}
+
+impl std::error::Error for LanguageLoaderError {}
+
+/// Language configuration based on user configured languages.toml.
+pub fn user_lang_config() -> Result<Configuration, toml::de::Error> {
helix_loader::config::user_lang_config()?.try_into()
}
+
+/// Language configuration loader based on user configured languages.toml.
+pub fn user_lang_loader() -> Result<Loader, LanguageLoaderError> {
+ let config: Configuration = helix_loader::config::user_lang_config()
+ .map_err(LanguageLoaderError::DeserializeError)?
+ .try_into()
+ .map_err(LanguageLoaderError::DeserializeError)?;
+
+ Loader::new(config).map_err(LanguageLoaderError::LoaderError)
+}
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index 24de1a33..99b5a3d1 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -82,12 +82,6 @@ pub struct Configuration {
pub language_server: HashMap<String, LanguageServerConfiguration>,
}
-impl Default for Configuration {
- fn default() -> Self {
- crate::config::default_syntax_loader()
- }
-}
-
// largely based on tree-sitter/cli/src/loader.rs
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case", deny_unknown_fields)]
@@ -164,9 +158,11 @@ pub enum FileType {
/// The extension of the file, either the `Path::extension` or the full
/// filename if the file does not have an extension.
Extension(String),
- /// The suffix of a file. This is compared to a given file's absolute
- /// path, so it can be used to detect files based on their directories.
- Suffix(String),
+ /// A Unix-style path glob. This is compared to the file's absolute path, so
+ /// it can be used to detect files based on their directories. If the glob
+ /// is not an absolute path and does not already start with a glob pattern,
+ /// a glob pattern will be prepended to it.
+ Glob(globset::Glob),
}
impl Serialize for FileType {
@@ -178,9 +174,9 @@ impl Serialize for FileType {
match self {
FileType::Extension(extension) => serializer.serialize_str(extension),
- FileType::Suffix(suffix) => {
+ FileType::Glob(glob) => {
let mut map = serializer.serialize_map(Some(1))?;
- map.serialize_entry("suffix", &suffix.replace(std::path::MAIN_SEPARATOR, "/"))?;
+ map.serialize_entry("glob", glob.glob())?;
map.end()
}
}
@@ -213,9 +209,20 @@ impl<'de> Deserialize<'de> for FileType {
M: serde::de::MapAccess<'de>,
{
match map.next_entry::<String, String>()? {
- Some((key, suffix)) if key == "suffix" => Ok(FileType::Suffix({
- suffix.replace('/', std::path::MAIN_SEPARATOR_STR)
- })),
+ Some((key, mut glob)) if key == "glob" => {
+ // If the glob isn't an absolute path or already starts
+ // with a glob pattern, add a leading glob so we
+ // properly match relative paths.
+ if !glob.starts_with('/') && !glob.starts_with("*/") {
+ glob.insert_str(0, "*/");
+ }
+
+ globset::Glob::new(glob.as_str())
+ .map(FileType::Glob)
+ .map_err(|err| {
+ serde::de::Error::custom(format!("invalid `glob` pattern: {}", err))
+ })
+ }
Some((key, _value)) => Err(serde::de::Error::custom(format!(
"unknown key in `file-types` list: {}",
key
@@ -752,6 +759,47 @@ pub struct SoftWrap {
pub wrap_at_text_width: Option<bool>,
}
+#[derive(Debug)]
+struct FileTypeGlob {
+ glob: globset::Glob,
+ language_id: usize,
+}
+
+impl FileTypeGlob {
+ fn new(glob: globset::Glob, language_id: usize) -> Self {
+ Self { glob, language_id }
+ }
+}
+
+#[derive(Debug)]
+struct FileTypeGlobMatcher {
+ matcher: globset::GlobSet,
+ file_types: Vec<FileTypeGlob>,
+}
+
+impl FileTypeGlobMatcher {
+ fn new(file_types: Vec<FileTypeGlob>) -> Result<Self, globset::Error> {
+ let mut builder = globset::GlobSetBuilder::new();
+ for file_type in &file_types {
+ builder.add(file_type.glob.clone());
+ }
+
+ Ok(Self {
+ matcher: builder.build()?,
+ file_types,
+ })
+ }
+
+ fn language_id_for_path(&self, path: &Path) -> Option<&usize> {
+ self.matcher
+ .matches(path)
+ .iter()
+ .filter_map(|idx| self.file_types.get(*idx))
+ .max_by_key(|file_type| file_type.glob.glob().len())
+ .map(|file_type| &file_type.language_id)
+ }
+}
+
// Expose loader as Lazy<> global since it's always static?
#[derive(Debug)]
@@ -759,7 +807,7 @@ pub struct Loader {
// highlight_names ?
language_configs: Vec<Arc<LanguageConfiguration>>,
language_config_ids_by_extension: HashMap<String, usize>, // Vec<usize>
- language_config_ids_by_suffix: HashMap<String, usize>,
+ language_config_ids_glob_matcher: FileTypeGlobMatcher,
language_config_ids_by_shebang: HashMap<String, usize>,
language_server_configs: HashMap<String, LanguageServerConfiguration>,
@@ -767,66 +815,57 @@ pub struct Loader {
scopes: ArcSwap<Vec<String>>,
}
+pub type LoaderError = globset::Error;
+
impl Loader {
- pub fn new(config: Configuration) -> Self {
- let mut loader = Self {
- language_configs: Vec::new(),
- language_server_configs: config.language_server,
- language_config_ids_by_extension: HashMap::new(),
- language_config_ids_by_suffix: HashMap::new(),
- language_config_ids_by_shebang: HashMap::new(),
- scopes: ArcSwap::from_pointee(Vec::new()),
- };
+ pub fn new(config: Configuration) -> Result<Self, LoaderError> {
+ let mut language_configs = Vec::new();
+ let mut language_config_ids_by_extension = HashMap::new();
+ let mut language_config_ids_by_shebang = HashMap::new();
+ let mut file_type_globs = Vec::new();
for config in config.language {
// get the next id
- let language_id = loader.language_configs.len();
+ let language_id = language_configs.len();
for file_type in &config.file_types {
// entry().or_insert(Vec::new).push(language_id);
match file_type {
- FileType::Extension(extension) => loader
- .language_config_ids_by_extension
- .insert(extension.clone(), language_id),
- FileType::Suffix(suffix) => loader
- .language_config_ids_by_suffix
- .insert(suffix.clone(), language_id),
+ FileType::Extension(extension) => {
+ language_config_ids_by_extension.insert(extension.clone(), language_id);
+ }
+ FileType::Glob(glob) => {
+ file_type_globs.push(FileTypeGlob::new(glob.to_owned(), language_id));
+ }
};
}
for shebang in &config.shebangs {
- loader
- .language_config_ids_by_shebang
- .insert(shebang.clone(), language_id);
+ language_config_ids_by_shebang.insert(shebang.clone(), language_id);
}
- loader.language_configs.push(Arc::new(config));
+ language_configs.push(Arc::new(config));
}
- loader
+ Ok(Self {
+ language_configs,
+ language_config_ids_by_extension,
+ language_config_ids_glob_matcher: FileTypeGlobMatcher::new(file_type_globs)?,
+ language_config_ids_by_shebang,
+ language_server_configs: config.language_server,
+ scopes: ArcSwap::from_pointee(Vec::new()),
+ })
}
pub fn language_config_for_file_name(&self, path: &Path) -> Option<Arc<LanguageConfiguration>> {
// Find all the language configurations that match this file name
// or a suffix of the file name.
- let configuration_id = path
- .file_name()
- .and_then(|n| n.to_str())
- .and_then(|file_name| self.language_config_ids_by_extension.get(file_name))
+ let configuration_id = self
+ .language_config_ids_glob_matcher
+ .language_id_for_path(path)
.or_else(|| {
path.extension()
.and_then(|extension| extension.to_str())
.and_then(|extension| self.language_config_ids_by_extension.get(extension))
- })
- .or_else(|| {
- self.language_config_ids_by_suffix
- .iter()
- .find_map(|(file_type, id)| {
- if path.to_str()?.ends_with(file_type) {
- Some(id)
- } else {
- None
- }
- })
});
configuration_id.and_then(|&id| self.language_configs.get(id).cloned())
@@ -2592,7 +2631,8 @@ mod test {
let loader = Loader::new(Configuration {
language: vec![],
language_server: HashMap::new(),
- });
+ })
+ .unwrap();
let language = get_language("rust").unwrap();
let query = Query::new(language, query_str).unwrap();
@@ -2654,7 +2694,8 @@ mod test {
let loader = Loader::new(Configuration {
language: vec![],
language_server: HashMap::new(),
- });
+ })
+ .unwrap();
let language = get_language("rust").unwrap();
let config = HighlightConfiguration::new(
@@ -2760,7 +2801,8 @@ mod test {
let loader = Loader::new(Configuration {
language: vec![],
language_server: HashMap::new(),
- });
+ })
+ .unwrap();
let language = get_language(language_name).unwrap();
let config = HighlightConfiguration::new(language, "", "", "").unwrap();
diff --git a/helix-core/tests/indent.rs b/helix-core/tests/indent.rs
index faf845c0..de1434f7 100644
--- a/helix-core/tests/indent.rs
+++ b/helix-core/tests/indent.rs
@@ -186,7 +186,7 @@ fn test_treesitter_indent(
lang_scope: &str,
ignored_lines: Vec<std::ops::Range<usize>>,
) {
- let loader = Loader::new(indent_tests_config());
+ let loader = Loader::new(indent_tests_config()).unwrap();
// set runtime path so we can find the queries
let mut runtime = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));