aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Davis2022-10-22 00:34:15 +0000
committerGitHub2022-10-22 00:34:15 +0000
commit17daf6ac0a1a7ef4a44078ef11cc150a8fa41ff0 (patch)
treede02ccf9ca841688d73b3e7e5e4c4aca90cf0dcd
parent131d8392bbc3301ac4e0a392d92d08b08757b720 (diff)
Change syntax for suffix file-types configurations (#4414)
The change in d801a6693c3d475b3942f705d3ef48d7966bdf65 to search for suffixes in `file-types` is too permissive: files like the tutor or `*.txt` files are now mistakenly interpreted as R or perl, respectively. This change changes the syntax for specifying a file-types entry that matches by suffix: ```toml file-types = [{ suffix = ".git/config" }] ``` And changes the file-type detection to first search for any non-suffix patterns and then search for suffixes only with the file-types entries marked explicitly as suffixes.
-rw-r--r--book/src/languages.md28
-rw-r--r--helix-core/src/syntax.rs102
-rw-r--r--languages.toml5
3 files changed, 119 insertions, 16 deletions
diff --git a/book/src/languages.md b/book/src/languages.md
index 9b90a211..133e6447 100644
--- a/book/src/languages.md
+++ b/book/src/languages.md
@@ -50,7 +50,7 @@ These configuration keys are available:
| `name` | The name of the language |
| `scope` | A string like `source.js` that identifies the language. Currently, we strive to match the scope names used by popular TextMate grammars and by the Linguist library. Usually `source.<name>` or `text.<name>` in case of markup languages |
| `injection-regex` | regex pattern that will be tested against a language name in order to determine whether this language should be used for a potential [language injection][treesitter-language-injection] site. |
-| `file-types` | The filetypes of the language, for example `["yml", "yaml"]`. This attempts to match by exact file name (`.zshrc`), then by file extension (`toml`), then by path suffix (`.git/config`). |
+| `file-types` | The filetypes of the language, for example `["yml", "yaml"]`. See the file-type detection section below. |
| `shebangs` | The interpreters from the shebang line, for example `["sh", "bash"]` |
| `roots` | A set of marker files to look for when trying to find the workspace root. For example `Cargo.lock`, `yarn.lock` |
| `auto-format` | Whether to autoformat this language when saving |
@@ -63,6 +63,32 @@ These configuration keys are available:
| `formatter` | The formatter for the language, it will take precedence over the lsp when defined. The formatter must be able to take the original file as input from stdin and write the formatted file to stdout |
| `max-line-length` | Maximum line length. Used for the `:reflow` command |
+### File-type detection and the `file-types` key
+
+Helix determines which language configuration to use with the `file-types` key
+from the above section. `file-types` is a list of strings or tables, for
+example:
+
+```toml
+file-types = ["Makefile", "toml", { suffix = ".git/config" }]
+```
+
+When determining a language configuration to use, Helix searches the file-types
+with the following priorities:
+
+1. Exact match: if the filename of a file is an exact match of a string in a
+ `file-types` list, that language wins. In the example above, `"Makefile"`
+ will match against `Makefile` files.
+2. Extension: if there are no exact matches, any `file-types` string that
+ matches the file extension of a given file wins. In the example above, the
+ `"toml"` matches files like `Cargo.toml` or `languages.toml`.
+3. Suffix: if there are still no matches, any values in `suffix` tables
+ are checked against the full path of the given file. In the example above,
+ the `{ suffix = ".git/config" }` would match against any `config` files
+ in `.git` directories. Note: `/` is used as the directory separator but is
+ replaced at runtime with the appropriate path separator for the operating
+ system, so this rule would match against `.git\config` files on Windows.
+
### Language Server configuration
The `language-server` field takes the following keys:
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index 21d19ce7..c17655a9 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -73,11 +73,11 @@ impl Default for Configuration {
pub struct LanguageConfiguration {
#[serde(rename = "name")]
pub language_id: String, // c-sharp, rust
- pub scope: String, // source.rust
- pub file_types: Vec<String>, // filename ends_with? <Gemfile, rb, etc>
+ pub scope: String, // source.rust
+ pub file_types: Vec<FileType>, // filename extension or ends_with? <Gemfile, rb, etc>
#[serde(default)]
pub shebangs: Vec<String>, // interpreter(s) associated with language
- pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml>
+ pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml>
pub comment_token: Option<String>,
pub max_line_length: Option<usize>,
@@ -125,6 +125,78 @@ pub struct LanguageConfiguration {
pub rulers: Option<Vec<u16>>, // if set, override editor's rulers
}
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub enum FileType {
+ /// The extension of the file, either the `Path::extension` or the full
+ /// filename if the file does not have an extension.
+ Extension(String),
+ /// The suffix of a file. This is compared to a given file's absolute
+ /// path, so it can be used to detect files based on their directories.
+ Suffix(String),
+}
+
+impl Serialize for FileType {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ use serde::ser::SerializeMap;
+
+ match self {
+ FileType::Extension(extension) => serializer.serialize_str(extension),
+ FileType::Suffix(suffix) => {
+ let mut map = serializer.serialize_map(Some(1))?;
+ map.serialize_entry("suffix", &suffix.replace(std::path::MAIN_SEPARATOR, "/"))?;
+ map.end()
+ }
+ }
+ }
+}
+
+impl<'de> Deserialize<'de> for FileType {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: serde::de::Deserializer<'de>,
+ {
+ struct FileTypeVisitor;
+
+ impl<'de> serde::de::Visitor<'de> for FileTypeVisitor {
+ type Value = FileType;
+
+ fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
+ formatter.write_str("string or table")
+ }
+
+ fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
+ where
+ E: serde::de::Error,
+ {
+ Ok(FileType::Extension(value.to_string()))
+ }
+
+ fn visit_map<M>(self, mut map: M) -> Result<Self::Value, M::Error>
+ where
+ M: serde::de::MapAccess<'de>,
+ {
+ match map.next_entry::<String, String>()? {
+ Some((key, suffix)) if key == "suffix" => Ok(FileType::Suffix(
+ suffix.replace('/', &std::path::MAIN_SEPARATOR.to_string()),
+ )),
+ Some((key, _value)) => Err(serde::de::Error::custom(format!(
+ "unknown key in `file-types` list: {}",
+ key
+ ))),
+ None => Err(serde::de::Error::custom(
+ "expected a `suffix` key in the `file-types` entry",
+ )),
+ }
+ }
+ }
+
+ deserializer.deserialize_any(FileTypeVisitor)
+ }
+}
+
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct LanguageServerConfiguration {
@@ -454,7 +526,8 @@ impl LanguageConfiguration {
pub struct Loader {
// highlight_names ?
language_configs: Vec<Arc<LanguageConfiguration>>,
- language_config_ids_by_file_type: HashMap<String, usize>, // Vec<usize>
+ language_config_ids_by_extension: HashMap<String, usize>, // Vec<usize>
+ language_config_ids_by_suffix: HashMap<String, usize>,
language_config_ids_by_shebang: HashMap<String, usize>,
scopes: ArcSwap<Vec<String>>,
@@ -464,7 +537,8 @@ impl Loader {
pub fn new(config: Configuration) -> Self {
let mut loader = Self {
language_configs: Vec::new(),
- language_config_ids_by_file_type: HashMap::new(),
+ language_config_ids_by_extension: HashMap::new(),
+ language_config_ids_by_suffix: HashMap::new(),
language_config_ids_by_shebang: HashMap::new(),
scopes: ArcSwap::from_pointee(Vec::new()),
};
@@ -475,10 +549,14 @@ impl Loader {
for file_type in &config.file_types {
// entry().or_insert(Vec::new).push(language_id);
- let file_type = file_type.replace('/', &std::path::MAIN_SEPARATOR.to_string());
- loader
- .language_config_ids_by_file_type
- .insert(file_type, language_id);
+ match file_type {
+ FileType::Extension(extension) => loader
+ .language_config_ids_by_extension
+ .insert(extension.clone(), language_id),
+ FileType::Suffix(suffix) => loader
+ .language_config_ids_by_suffix
+ .insert(suffix.clone(), language_id),
+ };
}
for shebang in &config.shebangs {
loader
@@ -498,14 +576,14 @@ impl Loader {
let configuration_id = path
.file_name()
.and_then(|n| n.to_str())
- .and_then(|file_name| self.language_config_ids_by_file_type.get(file_name))
+ .and_then(|file_name| self.language_config_ids_by_extension.get(file_name))
.or_else(|| {
path.extension()
.and_then(|extension| extension.to_str())
- .and_then(|extension| self.language_config_ids_by_file_type.get(extension))
+ .and_then(|extension| self.language_config_ids_by_extension.get(extension))
})
.or_else(|| {
- self.language_config_ids_by_file_type
+ self.language_config_ids_by_suffix
.iter()
.find_map(|(file_type, id)| {
if path.to_str()?.ends_with(file_type) {
diff --git a/languages.toml b/languages.toml
index 5ad5c6e6..a639ccad 100644
--- a/languages.toml
+++ b/languages.toml
@@ -1053,8 +1053,7 @@ source = { git = "https://github.com/tree-sitter/tree-sitter-regex", rev = "e1cf
name = "git-config"
scope = "source.gitconfig"
roots = []
-# TODO: allow specifying file-types as a regex so we can read directory names (e.g. `.git/config`)
-file-types = [".gitmodules", ".gitconfig", ".git/config", ".config/git/config"]
+file-types = [".gitmodules", ".gitconfig", { suffix = ".git/config" }, { suffix = ".config/git/config" }]
injection-regex = "git-config"
comment-token = "#"
indent = { tab-width = 4, unit = "\t" }
@@ -1491,7 +1490,7 @@ source = { git = "https://github.com/bearcove/tree-sitter-meson", rev = "feea83b
[[language]]
name = "sshclientconfig"
scope = "source.sshclientconfig"
-file-types = [".ssh/config", "/etc/ssh/ssh_config"]
+file-types = [{ suffix = ".ssh/config" }, { suffix = "/etc/ssh/ssh_config" }]
roots = []
[[grammar]]