From 17daf6ac0a1a7ef4a44078ef11cc150a8fa41ff0 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Fri, 21 Oct 2022 19:34:15 -0500 Subject: Change syntax for suffix file-types configurations (#4414) The change in d801a6693c3d475b3942f705d3ef48d7966bdf65 to search for suffixes in `file-types` is too permissive: files like the tutor or `*.txt` files are now mistakenly interpreted as R or perl, respectively. This change changes the syntax for specifying a file-types entry that matches by suffix: ```toml file-types = [{ suffix = ".git/config" }] ``` And changes the file-type detection to first search for any non-suffix patterns and then search for suffixes only with the file-types entries marked explicitly as suffixes.--- helix-core/src/syntax.rs | 102 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 90 insertions(+), 12 deletions(-) (limited to 'helix-core/src') diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 21d19ce7..c17655a9 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -73,11 +73,11 @@ impl Default for Configuration { pub struct LanguageConfiguration { #[serde(rename = "name")] pub language_id: String, // c-sharp, rust - pub scope: String, // source.rust - pub file_types: Vec, // filename ends_with? + pub scope: String, // source.rust + pub file_types: Vec, // filename extension or ends_with? #[serde(default)] pub shebangs: Vec, // interpreter(s) associated with language - pub roots: Vec, // these indicate project roots <.git, Cargo.toml> + pub roots: Vec, // these indicate project roots <.git, Cargo.toml> pub comment_token: Option, pub max_line_length: Option, @@ -125,6 +125,78 @@ pub struct LanguageConfiguration { pub rulers: Option>, // if set, override editor's rulers } +#[derive(Debug, PartialEq, Eq, Hash)] +pub enum FileType { + /// The extension of the file, either the `Path::extension` or the full + /// filename if the file does not have an extension. + Extension(String), + /// The suffix of a file. This is compared to a given file's absolute + /// path, so it can be used to detect files based on their directories. + Suffix(String), +} + +impl Serialize for FileType { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + use serde::ser::SerializeMap; + + match self { + FileType::Extension(extension) => serializer.serialize_str(extension), + FileType::Suffix(suffix) => { + let mut map = serializer.serialize_map(Some(1))?; + map.serialize_entry("suffix", &suffix.replace(std::path::MAIN_SEPARATOR, "/"))?; + map.end() + } + } + } +} + +impl<'de> Deserialize<'de> for FileType { + fn deserialize(deserializer: D) -> Result + where + D: serde::de::Deserializer<'de>, + { + struct FileTypeVisitor; + + impl<'de> serde::de::Visitor<'de> for FileTypeVisitor { + type Value = FileType; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("string or table") + } + + fn visit_str(self, value: &str) -> Result + where + E: serde::de::Error, + { + Ok(FileType::Extension(value.to_string())) + } + + fn visit_map(self, mut map: M) -> Result + where + M: serde::de::MapAccess<'de>, + { + match map.next_entry::()? { + Some((key, suffix)) if key == "suffix" => Ok(FileType::Suffix( + suffix.replace('/', &std::path::MAIN_SEPARATOR.to_string()), + )), + Some((key, _value)) => Err(serde::de::Error::custom(format!( + "unknown key in `file-types` list: {}", + key + ))), + None => Err(serde::de::Error::custom( + "expected a `suffix` key in the `file-types` entry", + )), + } + } + } + + deserializer.deserialize_any(FileTypeVisitor) + } +} + #[derive(Debug, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] pub struct LanguageServerConfiguration { @@ -454,7 +526,8 @@ impl LanguageConfiguration { pub struct Loader { // highlight_names ? language_configs: Vec>, - language_config_ids_by_file_type: HashMap, // Vec + language_config_ids_by_extension: HashMap, // Vec + language_config_ids_by_suffix: HashMap, language_config_ids_by_shebang: HashMap, scopes: ArcSwap>, @@ -464,7 +537,8 @@ impl Loader { pub fn new(config: Configuration) -> Self { let mut loader = Self { language_configs: Vec::new(), - language_config_ids_by_file_type: HashMap::new(), + language_config_ids_by_extension: HashMap::new(), + language_config_ids_by_suffix: HashMap::new(), language_config_ids_by_shebang: HashMap::new(), scopes: ArcSwap::from_pointee(Vec::new()), }; @@ -475,10 +549,14 @@ impl Loader { for file_type in &config.file_types { // entry().or_insert(Vec::new).push(language_id); - let file_type = file_type.replace('/', &std::path::MAIN_SEPARATOR.to_string()); - loader - .language_config_ids_by_file_type - .insert(file_type, language_id); + match file_type { + FileType::Extension(extension) => loader + .language_config_ids_by_extension + .insert(extension.clone(), language_id), + FileType::Suffix(suffix) => loader + .language_config_ids_by_suffix + .insert(suffix.clone(), language_id), + }; } for shebang in &config.shebangs { loader @@ -498,14 +576,14 @@ impl Loader { let configuration_id = path .file_name() .and_then(|n| n.to_str()) - .and_then(|file_name| self.language_config_ids_by_file_type.get(file_name)) + .and_then(|file_name| self.language_config_ids_by_extension.get(file_name)) .or_else(|| { path.extension() .and_then(|extension| extension.to_str()) - .and_then(|extension| self.language_config_ids_by_file_type.get(extension)) + .and_then(|extension| self.language_config_ids_by_extension.get(extension)) }) .or_else(|| { - self.language_config_ids_by_file_type + self.language_config_ids_by_suffix .iter() .find_map(|(file_type, id)| { if path.to_str()?.ends_with(file_type) { -- cgit v1.2.3-70-g09d2