aboutsummaryrefslogtreecommitdiff
path: root/helix-core/src
diff options
context:
space:
mode:
authorTimothy DeHerrera2023-04-28 09:21:34 +0000
committerGitHub2023-04-28 09:21:34 +0000
commit9c6c63a2be30252a6207f4aebb5e0f76f746b4c8 (patch)
treecb1882ee0b719e2bf5fc8444b494ba0948dea178 /helix-core/src
parent204d1eba4b1bb09d2e861986d6b6e8b868d16afe (diff)
inject language based on file extension & shebang (#3970)
* inject language based on file extension Nodes can now be captured with "injection.filename". If this capture contains a valid file extension known to Helix, then the content will be highlighted as that language. * inject language by shebang Nodes can now be captured with "injection.shebang". If this capture contains a valid shebang line known to Helix, then the content will be highlighted as the language the shebang calls for. * add documentation for language injection * nix: fix highlights The `@` is now highlighted properly on either side of the function arg. Also, extending the phases with `buildPhase = prev.buildPhase + ''''` is now highlighted properly. Fix highlighting of `''$` style escapes (requires tree-sitter-nix bump) Fix `inherit` highlighting. * simplify injection_for_match Split out injection pair logic into its own method to make the overall flow easier to follow. Also transform the top-level function into a method on a HighlightConfiguration. * markdown: add shebang injection query
Diffstat (limited to 'helix-core/src')
-rw-r--r--helix-core/src/syntax.rs207
1 files changed, 130 insertions, 77 deletions
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index c34ea81a..6514b40f 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -662,9 +662,8 @@ impl Loader {
pub fn language_config_for_shebang(&self, source: &Rope) -> Option<Arc<LanguageConfiguration>> {
let line = Cow::from(source.line(0));
- static SHEBANG_REGEX: Lazy<Regex> = Lazy::new(|| {
- Regex::new(r"^#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)").unwrap()
- });
+ static SHEBANG_REGEX: Lazy<Regex> =
+ Lazy::new(|| Regex::new(&["^", SHEBANG].concat()).unwrap());
let configuration_id = SHEBANG_REGEX
.captures(&line)
.and_then(|cap| self.language_config_ids_by_shebang.get(&cap[1]));
@@ -686,15 +685,14 @@ impl Loader {
.cloned()
}
- pub fn language_configuration_for_injection_string(
- &self,
- string: &str,
- ) -> Option<Arc<LanguageConfiguration>> {
+ /// Unlike language_config_for_language_id, which only returns Some for an exact id, this
+ /// function will perform a regex match on the given string to find the closest language match.
+ pub fn language_config_for_name(&self, name: &str) -> Option<Arc<LanguageConfiguration>> {
let mut best_match_length = 0;
let mut best_match_position = None;
for (i, configuration) in self.language_configs.iter().enumerate() {
if let Some(injection_regex) = &configuration.injection_regex {
- if let Some(mat) = injection_regex.find(string) {
+ if let Some(mat) = injection_regex.find(name) {
let length = mat.end() - mat.start();
if length > best_match_length {
best_match_position = Some(i);
@@ -704,11 +702,20 @@ impl Loader {
}
}
- if let Some(i) = best_match_position {
- let configuration = &self.language_configs[i];
- return Some(configuration.clone());
+ best_match_position.map(|i| self.language_configs[i].clone())
+ }
+
+ pub fn language_configuration_for_injection_string(
+ &self,
+ capture: &InjectionLanguageMarker,
+ ) -> Option<Arc<LanguageConfiguration>> {
+ match capture {
+ InjectionLanguageMarker::Name(string) => self.language_config_for_name(string),
+ InjectionLanguageMarker::Filename(file) => self.language_config_for_file_name(file),
+ InjectionLanguageMarker::Shebang(shebang) => {
+ self.language_config_for_language_id(shebang)
+ }
}
- None
}
pub fn language_configs(&self) -> impl Iterator<Item = &Arc<LanguageConfiguration>> {
@@ -800,7 +807,7 @@ impl Syntax {
queue.push_back(self.root);
let scopes = self.loader.scopes.load();
- let injection_callback = |language: &str| {
+ let injection_callback = |language: &InjectionLanguageMarker| {
self.loader
.language_configuration_for_injection_string(language)
.and_then(|language_config| language_config.highlight_config(&scopes))
@@ -961,12 +968,9 @@ impl Syntax {
);
let mut injections = Vec::new();
for mat in matches {
- let (language_name, content_node, included_children) = injection_for_match(
- &layer.config,
- &layer.config.injections_query,
- &mat,
- source_slice,
- );
+ let (injection_capture, content_node, included_children) = layer
+ .config
+ .injection_for_match(&layer.config.injections_query, &mat, source_slice);
// Explicitly remove this match so that none of its other captures will remain
// in the stream of captures.
@@ -974,9 +978,10 @@ impl Syntax {
// If a language is found with the given name, then add a new language layer
// to the highlighted document.
- if let (Some(language_name), Some(content_node)) = (language_name, content_node)
+ if let (Some(injection_capture), Some(content_node)) =
+ (injection_capture, content_node)
{
- if let Some(config) = (injection_callback)(&language_name) {
+ if let Some(config) = (injection_callback)(&injection_capture) {
let ranges =
intersect_ranges(&layer.ranges, &[content_node], included_children);
@@ -1001,14 +1006,11 @@ impl Syntax {
);
for mat in matches {
let entry = &mut injections_by_pattern_index[mat.pattern_index];
- let (language_name, content_node, included_children) = injection_for_match(
- &layer.config,
- combined_injections_query,
- &mat,
- source_slice,
- );
- if language_name.is_some() {
- entry.0 = language_name;
+ let (injection_capture, content_node, included_children) = layer
+ .config
+ .injection_for_match(combined_injections_query, &mat, source_slice);
+ if injection_capture.is_some() {
+ entry.0 = injection_capture;
}
if let Some(content_node) = content_node {
entry.1.push(content_node);
@@ -1395,6 +1397,8 @@ pub struct HighlightConfiguration {
non_local_variable_patterns: Vec<bool>,
injection_content_capture_index: Option<u32>,
injection_language_capture_index: Option<u32>,
+ injection_filename_capture_index: Option<u32>,
+ injection_shebang_capture_index: Option<u32>,
local_scope_capture_index: Option<u32>,
local_def_capture_index: Option<u32>,
local_def_value_capture_index: Option<u32>,
@@ -1538,6 +1542,8 @@ impl HighlightConfiguration {
// Store the numeric ids for all of the special captures.
let mut injection_content_capture_index = None;
let mut injection_language_capture_index = None;
+ let mut injection_filename_capture_index = None;
+ let mut injection_shebang_capture_index = None;
let mut local_def_capture_index = None;
let mut local_def_value_capture_index = None;
let mut local_ref_capture_index = None;
@@ -1558,6 +1564,8 @@ impl HighlightConfiguration {
match name.as_str() {
"injection.content" => injection_content_capture_index = i,
"injection.language" => injection_language_capture_index = i,
+ "injection.filename" => injection_filename_capture_index = i,
+ "injection.shebang" => injection_shebang_capture_index = i,
_ => {}
}
}
@@ -1573,6 +1581,8 @@ impl HighlightConfiguration {
non_local_variable_patterns,
injection_content_capture_index,
injection_language_capture_index,
+ injection_filename_capture_index,
+ injection_shebang_capture_index,
local_scope_capture_index,
local_def_capture_index,
local_def_value_capture_index,
@@ -1631,6 +1641,90 @@ impl HighlightConfiguration {
self.highlight_indices.store(Arc::new(indices));
}
+
+ fn injection_pair<'a>(
+ &self,
+ query_match: &QueryMatch<'a, 'a>,
+ source: RopeSlice<'a>,
+ ) -> (Option<InjectionLanguageMarker<'a>>, Option<Node<'a>>) {
+ let mut injection_capture = None;
+ let mut content_node = None;
+
+ for capture in query_match.captures {
+ let index = Some(capture.index);
+ if index == self.injection_language_capture_index {
+ let name = byte_range_to_str(capture.node.byte_range(), source);
+ injection_capture = Some(InjectionLanguageMarker::Name(name));
+ } else if index == self.injection_filename_capture_index {
+ let name = byte_range_to_str(capture.node.byte_range(), source);
+ let path = Path::new(name.as_ref()).to_path_buf();
+ injection_capture = Some(InjectionLanguageMarker::Filename(path.into()));
+ } else if index == self.injection_shebang_capture_index {
+ let node_slice = source.byte_slice(capture.node.byte_range());
+
+ // some languages allow space and newlines before the actual string content
+ // so a shebang could be on either the first or second line
+ let lines = if let Ok(end) = node_slice.try_line_to_byte(2) {
+ node_slice.byte_slice(..end)
+ } else {
+ node_slice
+ };
+
+ static SHEBANG_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(SHEBANG).unwrap());
+
+ injection_capture = SHEBANG_REGEX
+ .captures(&Cow::from(lines))
+ .map(|cap| InjectionLanguageMarker::Shebang(cap[1].to_owned()))
+ } else if index == self.injection_content_capture_index {
+ content_node = Some(capture.node);
+ }
+ }
+ (injection_capture, content_node)
+ }
+
+ fn injection_for_match<'a>(
+ &self,
+ query: &'a Query,
+ query_match: &QueryMatch<'a, 'a>,
+ source: RopeSlice<'a>,
+ ) -> (
+ Option<InjectionLanguageMarker<'a>>,
+ Option<Node<'a>>,
+ IncludedChildren,
+ ) {
+ let (mut injection_capture, content_node) = self.injection_pair(query_match, source);
+
+ let mut included_children = IncludedChildren::default();
+ for prop in query.property_settings(query_match.pattern_index) {
+ match prop.key.as_ref() {
+ // In addition to specifying the language name via the text of a
+ // captured node, it can also be hard-coded via a `#set!` predicate
+ // that sets the injection.language key.
+ "injection.language" if injection_capture.is_none() => {
+ injection_capture = prop
+ .value
+ .as_ref()
+ .map(|s| InjectionLanguageMarker::Name(s.as_ref().into()));
+ }
+
+ // By default, injections do not include the *children* of an
+ // `injection.content` node - only the ranges that belong to the
+ // node itself. This can be changed using a `#set!` predicate that
+ // sets the `injection.include-children` key.
+ "injection.include-children" => included_children = IncludedChildren::All,
+
+ // Some queries might only exclude named children but include unnamed
+ // children in their `injection.content` node. This can be enabled using
+ // a `#set!` predicate that sets the `injection.include-unnamed-children` key.
+ "injection.include-unnamed-children" => {
+ included_children = IncludedChildren::Unnamed
+ }
+ _ => {}
+ }
+ }
+
+ (injection_capture, content_node, included_children)
+ }
}
impl<'a> HighlightIterLayer<'a> {
@@ -2042,56 +2136,15 @@ impl<'a> Iterator for HighlightIter<'a> {
}
}
-fn injection_for_match<'a>(
- config: &HighlightConfiguration,
- query: &'a Query,
- query_match: &QueryMatch<'a, 'a>,
- source: RopeSlice<'a>,
-) -> (Option<Cow<'a, str>>, Option<Node<'a>>, IncludedChildren) {
- let content_capture_index = config.injection_content_capture_index;
- let language_capture_index = config.injection_language_capture_index;
-
- let mut language_name = None;
- let mut content_node = None;
- for capture in query_match.captures {
- let index = Some(capture.index);
- if index == language_capture_index {
- let name = byte_range_to_str(capture.node.byte_range(), source);
- language_name = Some(name);
- } else if index == content_capture_index {
- content_node = Some(capture.node);
- }
- }
-
- let mut included_children = IncludedChildren::default();
- for prop in query.property_settings(query_match.pattern_index) {
- match prop.key.as_ref() {
- // In addition to specifying the language name via the text of a
- // captured node, it can also be hard-coded via a `#set!` predicate
- // that sets the injection.language key.
- "injection.language" => {
- if language_name.is_none() {
- language_name = prop.value.as_ref().map(|s| s.as_ref().into())
- }
- }
-
- // By default, injections do not include the *children* of an
- // `injection.content` node - only the ranges that belong to the
- // node itself. This can be changed using a `#set!` predicate that
- // sets the `injection.include-children` key.
- "injection.include-children" => included_children = IncludedChildren::All,
-
- // Some queries might only exclude named children but include unnamed
- // children in their `injection.content` node. This can be enabled using
- // a `#set!` predicate that sets the `injection.include-unnamed-children` key.
- "injection.include-unnamed-children" => included_children = IncludedChildren::Unnamed,
- _ => {}
- }
- }
-
- (language_name, content_node, included_children)
+#[derive(Debug, Clone)]
+pub enum InjectionLanguageMarker<'a> {
+ Name(Cow<'a, str>),
+ Filename(Cow<'a, Path>),
+ Shebang(String),
}
+const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";
+
pub struct Merge<I> {
iter: I,
spans: Box<dyn Iterator<Item = (usize, std::ops::Range<usize>)>>,