From 77dbbc73f9c9b6599bc39b18625285685fe2e4b1 Mon Sep 17 00:00:00 2001 From: ath3 Date: Mon, 8 Nov 2021 16:19:44 +0100 Subject: Detect filetype from shebang line (#1001) --- helix-core/src/indent.rs | 1 + helix-core/src/syntax.rs | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'helix-core') diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index 20f034ea..b6f5081a 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -450,6 +450,7 @@ where language: vec![LanguageConfiguration { scope: "source.rust".to_string(), file_types: vec!["rs".to_string()], + shebangs: vec![], language_id: "Rust".to_string(), highlight_config: OnceCell::new(), config: None, diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index f3e3f238..84952248 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -14,6 +14,8 @@ use std::{ cell::RefCell, collections::{HashMap, HashSet}, fmt, + fs::File, + io::Read, path::Path, sync::Arc, }; @@ -52,6 +54,7 @@ pub struct LanguageConfiguration { pub language_id: String, pub scope: String, // source.rust pub file_types: Vec, // filename ends_with? + pub shebangs: Vec, // interpreter(s) associated with language pub roots: Vec, // these indicate project roots <.git, Cargo.toml> pub comment_token: Option, @@ -254,6 +257,7 @@ pub struct Loader { // highlight_names ? language_configs: Vec>, language_config_ids_by_file_type: HashMap, // Vec + language_config_ids_by_shebang: HashMap, } impl Loader { @@ -261,6 +265,7 @@ impl Loader { let mut loader = Self { language_configs: Vec::new(), language_config_ids_by_file_type: HashMap::new(), + language_config_ids_by_shebang: HashMap::new(), }; for config in config.language { @@ -273,6 +278,11 @@ impl Loader { .language_config_ids_by_file_type .insert(file_type.clone(), language_id); } + for shebang in &config.shebangs { + loader + .language_config_ids_by_shebang + .insert(shebang.clone(), language_id); + } loader.language_configs.push(Arc::new(config)); } @@ -298,6 +308,20 @@ impl Loader { // TODO: content_regex handling conflict resolution } + pub fn language_config_for_shebang(&self, path: &Path) -> Option> { + // Read the first 128 bytes of the file. If its a shebang line, try to find the language + let file = File::open(path).ok()?; + let mut buf = String::with_capacity(128); + file.take(128).read_to_string(&mut buf).ok()?; + static SHEBANG_REGEX: Lazy = + Lazy::new(|| Regex::new(r"^#!\s*(?:\S*[/\\](?:env\s+)?)?([^\s\.\d]+)").unwrap()); + let configuration_id = SHEBANG_REGEX + .captures(&buf) + .and_then(|cap| self.language_config_ids_by_shebang.get(&cap[1])); + + configuration_id.and_then(|&id| self.language_configs.get(id).cloned()) + } + pub fn language_config_for_scope(&self, scope: &str) -> Option> { self.language_configs .iter() -- cgit v1.2.3-70-g09d2