From a106be94f140918fa392bea660a87197b66390f0 Mon Sep 17 00:00:00 2001 From: Blaž Hrastnik Date: Sat, 12 Sep 2020 17:44:57 +0900 Subject: Refactor a little bit. --- helix-core/src/language_mode.rs | 993 -------------------------------------- helix-core/src/lib.rs | 2 +- helix-core/src/syntax.rs | 1014 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 1015 insertions(+), 994 deletions(-) delete mode 100644 helix-core/src/language_mode.rs create mode 100644 helix-core/src/syntax.rs (limited to 'helix-core') diff --git a/helix-core/src/language_mode.rs b/helix-core/src/language_mode.rs deleted file mode 100644 index 3f104be4..00000000 --- a/helix-core/src/language_mode.rs +++ /dev/null @@ -1,993 +0,0 @@ -pub struct LanguageMode { - parser: Parser, -} - -impl LanguageMode { - // buffer, grammar, config, grammars, sync_timeout? - pub fn new() -> Self { - unimplemented!() - // make a new root layer - // track markers of injections - // - // track scope_descriptor: a Vec of scopes for item in tree - // - // fetch grammar for parser based on language string - // update root layer - } - - // fn buffer_changed -> call layer.update(range, new_text) on root layer and then all marker layers - - // call this on transaction.apply() -> buffer_changed(changes) - // - // fn parse(language, old_tree, ranges) - // - // fn tree() -> Tree - // - // - - // Highlighting - // fn highlight_iter() -> iterates over all the scopes - // on_tokenize - // on_change_highlighting - - // Commenting - // comment_strings_for_pos - // is_commented - - // Indentation - // suggested_indent_for_line_at_buffer_row - // suggested_indent_for_buffer_row - // indent_level_for_line - - // TODO: Folding - - // Syntax APIs - // get_syntax_node_containing_range -> - // ... - // get_syntax_node_at_pos - // buffer_range_for_scope_at_pos -} - -pub struct LanguageLayer { - // mode -// grammar -// depth -// tree: Tree, -} - -impl LanguageLayer { - // fn highlight_iter() -> same as Mode but for this layer. Mode composits these - // fn buffer_changed - // fn update(range) - // fn update_injections() -} - -// -- refactored from tree-sitter-highlight to be able to retain state -// TODO: add seek() to iter - -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::{iter, mem, ops, str, usize}; -use tree_sitter::{ - Language, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, QueryMatch, - Range, Tree, -}; - -const CANCELLATION_CHECK_INTERVAL: usize = 100; -const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024; -const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000; - -/// Indicates which highlight should be applied to a region of source code. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub struct Highlight(pub usize); - -/// Represents the reason why syntax highlighting failed. -#[derive(Debug, PartialEq, Eq)] -pub enum Error { - Cancelled, - InvalidLanguage, - Unknown, -} - -/// Represents a single step in rendering a syntax-highlighted document. -#[derive(Copy, Clone, Debug)] -pub enum HighlightEvent { - Source { start: usize, end: usize }, - HighlightStart(Highlight), - HighlightEnd, -} - -/// Contains the data neeeded to higlight code written in a particular language. -/// -/// This struct is immutable and can be shared between threads. -pub struct HighlightConfiguration { - pub language: Language, - pub query: Query, - combined_injections_query: Option, - locals_pattern_index: usize, - highlights_pattern_index: usize, - highlight_indices: Vec>, - non_local_variable_patterns: Vec, - injection_content_capture_index: Option, - injection_language_capture_index: Option, - local_scope_capture_index: Option, - local_def_capture_index: Option, - local_def_value_capture_index: Option, - local_ref_capture_index: Option, -} - -/// Performs syntax highlighting, recognizing a given list of highlight names. -/// -/// For the best performance `Highlighter` values should be reused between -/// syntax highlighting calls. A separate highlighter is needed for each thread that -/// is performing highlighting. -pub struct Highlighter { - parser: Parser, - cursors: Vec, -} - -#[derive(Debug)] -struct LocalDef<'a> { - name: &'a str, - value_range: ops::Range, - highlight: Option, -} - -#[derive(Debug)] -struct LocalScope<'a> { - inherits: bool, - range: ops::Range, - local_defs: Vec>, -} - -struct HighlightIter<'a, F> -where - F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, -{ - source: &'a [u8], - byte_offset: usize, - highlighter: &'a mut Highlighter, - injection_callback: F, - cancellation_flag: Option<&'a AtomicUsize>, - layers: Vec>, - iter_count: usize, - next_event: Option, - last_highlight_range: Option<(usize, usize, usize)>, -} - -struct HighlightIterLayer<'a> { - _tree: Tree, - cursor: QueryCursor, - captures: iter::Peekable>, - config: &'a HighlightConfiguration, - highlight_end_stack: Vec, - scope_stack: Vec>, - ranges: Vec, - depth: usize, -} - -impl Highlighter { - pub fn new() -> Self { - Highlighter { - parser: Parser::new(), - cursors: Vec::new(), - } - } - - pub fn parser(&mut self) -> &mut Parser { - &mut self.parser - } - - /// Iterate over the highlighted regions for a given slice of source code. - pub fn highlight<'a>( - &'a mut self, - config: &'a HighlightConfiguration, - source: &'a [u8], - cancellation_flag: Option<&'a AtomicUsize>, - mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, - ) -> Result> + 'a, Error> { - let layers = HighlightIterLayer::new( - source, - self, - cancellation_flag, - &mut injection_callback, - config, - 0, - vec![Range { - start_byte: 0, - end_byte: usize::MAX, - start_point: Point::new(0, 0), - end_point: Point::new(usize::MAX, usize::MAX), - }], - )?; - assert_ne!(layers.len(), 0); - let mut result = HighlightIter { - source, - byte_offset: 0, - injection_callback, - cancellation_flag, - highlighter: self, - iter_count: 0, - layers: layers, - next_event: None, - last_highlight_range: None, - }; - result.sort_layers(); - Ok(result) - } -} - -impl HighlightConfiguration { - /// Creates a `HighlightConfiguration` for a given `Language` and set of highlighting - /// queries. - /// - /// # Parameters - /// - /// * `language` - The Tree-sitter `Language` that should be used for parsing. - /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This - /// should be non-empty, otherwise no syntax highlights will be added. - /// * `injections_query` - A string containing tree patterns for injecting other languages - /// into the document. This can be empty if no injections are desired. - /// * `locals_query` - A string containing tree patterns for tracking local variable - /// definitions and references. This can be empty if local variable tracking is not needed. - /// - /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method. - pub fn new( - language: Language, - highlights_query: &str, - injection_query: &str, - locals_query: &str, - ) -> Result { - // Concatenate the query strings, keeping track of the start offset of each section. - let mut query_source = String::new(); - query_source.push_str(injection_query); - let locals_query_offset = query_source.len(); - query_source.push_str(locals_query); - let highlights_query_offset = query_source.len(); - query_source.push_str(highlights_query); - - // Construct a single query by concatenating the three query strings, but record the - // range of pattern indices that belong to each individual string. - let mut query = Query::new(language, &query_source)?; - let mut locals_pattern_index = 0; - let mut highlights_pattern_index = 0; - for i in 0..(query.pattern_count()) { - let pattern_offset = query.start_byte_for_pattern(i); - if pattern_offset < highlights_query_offset { - if pattern_offset < highlights_query_offset { - highlights_pattern_index += 1; - } - if pattern_offset < locals_query_offset { - locals_pattern_index += 1; - } - } - } - - // Construct a separate query just for dealing with the 'combined injections'. - // Disable the combined injection patterns in the main query. - let mut combined_injections_query = Query::new(language, injection_query)?; - let mut has_combined_queries = false; - for pattern_index in 0..locals_pattern_index { - let settings = query.property_settings(pattern_index); - if settings.iter().any(|s| &*s.key == "injection.combined") { - has_combined_queries = true; - query.disable_pattern(pattern_index); - } else { - combined_injections_query.disable_pattern(pattern_index); - } - } - let combined_injections_query = if has_combined_queries { - Some(combined_injections_query) - } else { - None - }; - - // Find all of the highlighting patterns that are disabled for nodes that - // have been identified as local variables. - let non_local_variable_patterns = (0..query.pattern_count()) - .map(|i| { - query - .property_predicates(i) - .iter() - .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local") - }) - .collect(); - - // Store the numeric ids for all of the special captures. - let mut injection_content_capture_index = None; - let mut injection_language_capture_index = None; - let mut local_def_capture_index = None; - let mut local_def_value_capture_index = None; - let mut local_ref_capture_index = None; - let mut local_scope_capture_index = None; - for (i, name) in query.capture_names().iter().enumerate() { - let i = Some(i as u32); - match name.as_str() { - "injection.content" => injection_content_capture_index = i, - "injection.language" => injection_language_capture_index = i, - "local.definition" => local_def_capture_index = i, - "local.definition-value" => local_def_value_capture_index = i, - "local.reference" => local_ref_capture_index = i, - "local.scope" => local_scope_capture_index = i, - _ => {} - } - } - - let highlight_indices = vec![None; query.capture_names().len()]; - Ok(HighlightConfiguration { - language, - query, - combined_injections_query, - locals_pattern_index, - highlights_pattern_index, - highlight_indices, - non_local_variable_patterns, - injection_content_capture_index, - injection_language_capture_index, - local_def_capture_index, - local_def_value_capture_index, - local_ref_capture_index, - local_scope_capture_index, - }) - } - - /// Get a slice containing all of the highlight names used in the configuration. - pub fn names(&self) -> &[String] { - self.query.capture_names() - } - - /// Set the list of recognized highlight names. - /// - /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated - /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of - /// these queries can choose to recognize highlights with different levels of specificity. - /// For example, the string `function.builtin` will match against `function.method.builtin` - /// and `function.builtin.constructor`, but will not match `function.method`. - /// - /// When highlighting, results are returned as `Highlight` values, which contain the index - /// of the matched highlight this list of highlight names. - pub fn configure(&mut self, recognized_names: &[String]) { - let mut capture_parts = Vec::new(); - self.highlight_indices.clear(); - self.highlight_indices - .extend(self.query.capture_names().iter().map(move |capture_name| { - capture_parts.clear(); - capture_parts.extend(capture_name.split('.')); - - let mut best_index = None; - let mut best_match_len = 0; - for (i, recognized_name) in recognized_names.iter().enumerate() { - let mut len = 0; - let mut matches = true; - for part in recognized_name.split('.') { - len += 1; - if !capture_parts.contains(&part) { - matches = false; - break; - } - } - if matches && len > best_match_len { - best_index = Some(i); - best_match_len = len; - } - } - best_index.map(Highlight) - })); - } -} - -impl<'a> HighlightIterLayer<'a> { - /// Create a new 'layer' of highlighting for this document. - /// - /// In the even that the new layer contains "combined injections" (injections where multiple - /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and - /// added to the returned vector. - fn new Option<&'a HighlightConfiguration> + 'a>( - source: &'a [u8], - highlighter: &mut Highlighter, - cancellation_flag: Option<&'a AtomicUsize>, - injection_callback: &mut F, - mut config: &'a HighlightConfiguration, - mut depth: usize, - mut ranges: Vec, - ) -> Result, Error> { - let mut result = Vec::with_capacity(1); - let mut queue = Vec::new(); - loop { - if highlighter.parser.set_included_ranges(&ranges).is_ok() { - highlighter - .parser - .set_language(config.language) - .map_err(|_| Error::InvalidLanguage)?; - - unsafe { highlighter.parser.set_cancellation_flag(cancellation_flag) }; - let tree = highlighter - .parser - .parse(source, None) - .ok_or(Error::Cancelled)?; - unsafe { highlighter.parser.set_cancellation_flag(None) }; - let mut cursor = highlighter.cursors.pop().unwrap_or(QueryCursor::new()); - - // Process combined injections. - if let Some(combined_injections_query) = &config.combined_injections_query { - let mut injections_by_pattern_index = - vec![(None, Vec::new(), false); combined_injections_query.pattern_count()]; - let matches = - cursor.matches(combined_injections_query, tree.root_node(), |n: Node| { - &source[n.byte_range()] - }); - for mat in matches { - let entry = &mut injections_by_pattern_index[mat.pattern_index]; - let (language_name, content_node, include_children) = - injection_for_match(config, combined_injections_query, &mat, source); - if language_name.is_some() { - entry.0 = language_name; - } - if let Some(content_node) = content_node { - entry.1.push(content_node); - } - entry.2 = include_children; - } - for (lang_name, content_nodes, includes_children) in injections_by_pattern_index - { - if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) { - if let Some(next_config) = (injection_callback)(lang_name) { - let ranges = Self::intersect_ranges( - &ranges, - &content_nodes, - includes_children, - ); - if !ranges.is_empty() { - queue.push((next_config, depth + 1, ranges)); - } - } - } - } - } - - // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which - // prevents them from being moved. But both of these values are really just - // pointers, so it's actually ok to move them. - let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(&tree) }; - let cursor_ref = - unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; - let captures = cursor_ref - .captures(&config.query, tree_ref.root_node(), move |n: Node| { - &source[n.byte_range()] - }) - .peekable(); - - result.push(HighlightIterLayer { - highlight_end_stack: Vec::new(), - scope_stack: vec![LocalScope { - inherits: false, - range: 0..usize::MAX, - local_defs: Vec::new(), - }], - cursor, - depth, - _tree: tree, - captures, - config, - ranges, - }); - } - - if queue.is_empty() { - break; - } else { - let (next_config, next_depth, next_ranges) = queue.remove(0); - config = next_config; - depth = next_depth; - ranges = next_ranges; - } - } - - Ok(result) - } - - // Compute the ranges that should be included when parsing an injection. - // This takes into account three things: - // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. - // * `nodes` - Every injection takes place within a set of nodes. The injection ranges - // are the ranges of those nodes. - // * `includes_children` - For some injections, the content nodes' children should be - // excluded from the nested document, so that only the content nodes' *own* content - // is reparsed. For other injections, the content nodes' entire ranges should be - // reparsed, including the ranges of their children. - fn intersect_ranges( - parent_ranges: &[Range], - nodes: &[Node], - includes_children: bool, - ) -> Vec { - let mut cursor = nodes[0].walk(); - let mut result = Vec::new(); - let mut parent_range_iter = parent_ranges.iter(); - let mut parent_range = parent_range_iter - .next() - .expect("Layers should only be constructed with non-empty ranges vectors"); - for node in nodes.iter() { - let mut preceding_range = Range { - start_byte: 0, - start_point: Point::new(0, 0), - end_byte: node.start_byte(), - end_point: node.start_position(), - }; - let following_range = Range { - start_byte: node.end_byte(), - start_point: node.end_position(), - end_byte: usize::MAX, - end_point: Point::new(usize::MAX, usize::MAX), - }; - - for excluded_range in node - .children(&mut cursor) - .filter_map(|child| { - if includes_children { - None - } else { - Some(child.range()) - } - }) - .chain([following_range].iter().cloned()) - { - let mut range = Range { - start_byte: preceding_range.end_byte, - start_point: preceding_range.end_point, - end_byte: excluded_range.start_byte, - end_point: excluded_range.start_point, - }; - preceding_range = excluded_range; - - if range.end_byte < parent_range.start_byte { - continue; - } - - while parent_range.start_byte <= range.end_byte { - if parent_range.end_byte > range.start_byte { - if range.start_byte < parent_range.start_byte { - range.start_byte = parent_range.start_byte; - range.start_point = parent_range.start_point; - } - - if parent_range.end_byte < range.end_byte { - if range.start_byte < parent_range.end_byte { - result.push(Range { - start_byte: range.start_byte, - start_point: range.start_point, - end_byte: parent_range.end_byte, - end_point: parent_range.end_point, - }); - } - range.start_byte = parent_range.end_byte; - range.start_point = parent_range.end_point; - } else { - if range.start_byte < range.end_byte { - result.push(range); - } - break; - } - } - - if let Some(next_range) = parent_range_iter.next() { - parent_range = next_range; - } else { - return result; - } - } - } - } - result - } - - // First, sort scope boundaries by their byte offset in the document. At a - // given position, emit scope endings before scope beginnings. Finally, emit - // scope boundaries from deeper layers first. - fn sort_key(&mut self) -> Option<(usize, bool, isize)> { - let depth = -(self.depth as isize); - let next_start = self - .captures - .peek() - .map(|(m, i)| m.captures[*i].node.start_byte()); - let next_end = self.highlight_end_stack.last().cloned(); - match (next_start, next_end) { - (Some(start), Some(end)) => { - if start < end { - Some((start, true, depth)) - } else { - Some((end, false, depth)) - } - } - (Some(i), None) => Some((i, true, depth)), - (None, Some(j)) => Some((j, false, depth)), - _ => None, - } - } -} - -impl<'a, F> HighlightIter<'a, F> -where - F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, -{ - fn emit_event( - &mut self, - offset: usize, - event: Option, - ) -> Option> { - let result; - if self.byte_offset < offset { - result = Some(Ok(HighlightEvent::Source { - start: self.byte_offset, - end: offset, - })); - self.byte_offset = offset; - self.next_event = event; - } else { - result = event.map(Ok); - } - self.sort_layers(); - result - } - - fn sort_layers(&mut self) { - while !self.layers.is_empty() { - if let Some(sort_key) = self.layers[0].sort_key() { - let mut i = 0; - while i + 1 < self.layers.len() { - if let Some(next_offset) = self.layers[i + 1].sort_key() { - if next_offset < sort_key { - i += 1; - continue; - } - } - break; - } - if i > 0 { - &self.layers[0..(i + 1)].rotate_left(1); - } - break; - } else { - let layer = self.layers.remove(0); - self.highlighter.cursors.push(layer.cursor); - } - } - } - - fn insert_layer(&mut self, mut layer: HighlightIterLayer<'a>) { - if let Some(sort_key) = layer.sort_key() { - let mut i = 1; - while i < self.layers.len() { - if let Some(sort_key_i) = self.layers[i].sort_key() { - if sort_key_i > sort_key { - self.layers.insert(i, layer); - return; - } - i += 1; - } else { - self.layers.remove(i); - } - } - self.layers.push(layer); - } - } -} - -impl<'a, F> Iterator for HighlightIter<'a, F> -where - F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, -{ - type Item = Result; - - fn next(&mut self) -> Option { - 'main: loop { - // If we've already determined the next highlight boundary, just return it. - if let Some(e) = self.next_event.take() { - return Some(Ok(e)); - } - - // Periodically check for cancellation, returning `Cancelled` error if the - // cancellation flag was flipped. - if let Some(cancellation_flag) = self.cancellation_flag { - self.iter_count += 1; - if self.iter_count >= CANCELLATION_CHECK_INTERVAL { - self.iter_count = 0; - if cancellation_flag.load(Ordering::Relaxed) != 0 { - return Some(Err(Error::Cancelled)); - } - } - } - - // If none of the layers have any more highlight boundaries, terminate. - if self.layers.is_empty() { - return if self.byte_offset < self.source.len() { - let result = Some(Ok(HighlightEvent::Source { - start: self.byte_offset, - end: self.source.len(), - })); - self.byte_offset = self.source.len(); - result - } else { - None - }; - } - - // Get the next capture from whichever layer has the earliest highlight boundary. - let range; - let layer = &mut self.layers[0]; - if let Some((next_match, capture_index)) = layer.captures.peek() { - let next_capture = next_match.captures[*capture_index]; - range = next_capture.node.byte_range(); - - // If any previous highlight ends before this node starts, then before - // processing this capture, emit the source code up until the end of the - // previous highlight, and an end event for that highlight. - if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { - if end_byte <= range.start { - layer.highlight_end_stack.pop(); - return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); - } - } - } - // If there are no more captures, then emit any remaining highlight end events. - // And if there are none of those, then just advance to the end of the document. - else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { - layer.highlight_end_stack.pop(); - return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); - } else { - return self.emit_event(self.source.len(), None); - }; - - let (mut match_, capture_index) = layer.captures.next().unwrap(); - let mut capture = match_.captures[capture_index]; - - // If this capture represents an injection, then process the injection. - if match_.pattern_index < layer.config.locals_pattern_index { - let (language_name, content_node, include_children) = - injection_for_match(&layer.config, &layer.config.query, &match_, &self.source); - - // Explicitly remove this match so that none of its other captures will remain - // in the stream of captures. - match_.remove(); - - // If a language is found with the given name, then add a new language layer - // to the highlighted document. - if let (Some(language_name), Some(content_node)) = (language_name, content_node) { - if let Some(config) = (self.injection_callback)(language_name) { - let ranges = HighlightIterLayer::intersect_ranges( - &self.layers[0].ranges, - &[content_node], - include_children, - ); - if !ranges.is_empty() { - match HighlightIterLayer::new( - self.source, - self.highlighter, - self.cancellation_flag, - &mut self.injection_callback, - config, - self.layers[0].depth + 1, - ranges, - ) { - Ok(layers) => { - for layer in layers { - self.insert_layer(layer); - } - } - Err(e) => return Some(Err(e)), - } - } - } - } - - self.sort_layers(); - continue 'main; - } - - // Remove from the local scope stack any local scopes that have already ended. - while range.start > layer.scope_stack.last().unwrap().range.end { - layer.scope_stack.pop(); - } - - // If this capture is for tracking local variables, then process the - // local variable info. - let mut reference_highlight = None; - let mut definition_highlight = None; - while match_.pattern_index < layer.config.highlights_pattern_index { - // If the node represents a local scope, push a new local scope onto - // the scope stack. - if Some(capture.index) == layer.config.local_scope_capture_index { - definition_highlight = None; - let mut scope = LocalScope { - inherits: true, - range: range.clone(), - local_defs: Vec::new(), - }; - for prop in layer.config.query.property_settings(match_.pattern_index) { - match prop.key.as_ref() { - "local.scope-inherits" => { - scope.inherits = - prop.value.as_ref().map_or(true, |r| r.as_ref() == "true"); - } - _ => {} - } - } - layer.scope_stack.push(scope); - } - // If the node represents a definition, add a new definition to the - // local scope at the top of the scope stack. - else if Some(capture.index) == layer.config.local_def_capture_index { - reference_highlight = None; - definition_highlight = None; - let scope = layer.scope_stack.last_mut().unwrap(); - - let mut value_range = 0..0; - for capture in match_.captures { - if Some(capture.index) == layer.config.local_def_value_capture_index { - value_range = capture.node.byte_range(); - } - } - - if let Ok(name) = str::from_utf8(&self.source[range.clone()]) { - scope.local_defs.push(LocalDef { - name, - value_range, - highlight: None, - }); - definition_highlight = - scope.local_defs.last_mut().map(|s| &mut s.highlight); - } - } - // If the node represents a reference, then try to find the corresponding - // definition in the scope stack. - else if Some(capture.index) == layer.config.local_ref_capture_index { - if definition_highlight.is_none() { - definition_highlight = None; - if let Ok(name) = str::from_utf8(&self.source[range.clone()]) { - for scope in layer.scope_stack.iter().rev() { - if let Some(highlight) = - scope.local_defs.iter().rev().find_map(|def| { - if def.name == name && range.start >= def.value_range.end { - Some(def.highlight) - } else { - None - } - }) - { - reference_highlight = highlight; - break; - } - if !scope.inherits { - break; - } - } - } - } - } - - // Continue processing any additional matches for the same node. - if let Some((next_match, next_capture_index)) = layer.captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - capture = next_capture; - match_ = layer.captures.next().unwrap().0; - continue; - } - } - - self.sort_layers(); - continue 'main; - } - - // Otherwise, this capture must represent a highlight. - // If this exact range has already been highlighted by an earlier pattern, or by - // a different layer, then skip over this one. - if let Some((last_start, last_end, last_depth)) = self.last_highlight_range { - if range.start == last_start && range.end == last_end && layer.depth < last_depth { - self.sort_layers(); - continue 'main; - } - } - - // If the current node was found to be a local variable, then skip over any - // highlighting patterns that are disabled for local variables. - if definition_highlight.is_some() || reference_highlight.is_some() { - while layer.config.non_local_variable_patterns[match_.pattern_index] { - if let Some((next_match, next_capture_index)) = layer.captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - capture = next_capture; - match_ = layer.captures.next().unwrap().0; - continue; - } - } - - self.sort_layers(); - continue 'main; - } - } - - // Once a highlighting pattern is found for the current node, skip over - // any later highlighting patterns that also match this node. Captures - // for a given node are ordered by pattern index, so these subsequent - // captures are guaranteed to be for highlighting, not injections or - // local variables. - while let Some((next_match, next_capture_index)) = layer.captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - layer.captures.next(); - } else { - break; - } - } - - let current_highlight = layer.config.highlight_indices[capture.index as usize]; - - // If this node represents a local definition, then store the current - // highlight value on the local scope entry representing this node. - if let Some(definition_highlight) = definition_highlight { - *definition_highlight = current_highlight; - } - - // Emit a scope start event and push the node's end position to the stack. - if let Some(highlight) = reference_highlight.or(current_highlight) { - self.last_highlight_range = Some((range.start, range.end, layer.depth)); - layer.highlight_end_stack.push(range.end); - return self - .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight))); - } - - self.sort_layers(); - } - } -} - -fn injection_for_match<'a>( - config: &HighlightConfiguration, - query: &'a Query, - query_match: &QueryMatch<'a>, - source: &'a [u8], -) -> (Option<&'a str>, Option>, bool) { - let content_capture_index = config.injection_content_capture_index; - let language_capture_index = config.injection_language_capture_index; - - let mut language_name = None; - let mut content_node = None; - for capture in query_match.captures { - let index = Some(capture.index); - if index == language_capture_index { - language_name = capture.node.utf8_text(source).ok(); - } else if index == content_capture_index { - content_node = Some(capture.node); - } - } - - let mut include_children = false; - for prop in query.property_settings(query_match.pattern_index) { - match prop.key.as_ref() { - // In addition to specifying the language name via the text of a - // captured node, it can also be hard-coded via a `#set!` predicate - // that sets the injection.language key. - "injection.language" => { - if language_name.is_none() { - language_name = prop.value.as_ref().map(|s| s.as_ref()) - } - } - - // By default, injections do not include the *children* of an - // `injection.content` node - only the ranges that belong to the - // node itself. This can be changed using a `#set!` predicate that - // sets the `injection.include-children` key. - "injection.include-children" => include_children = true, - _ => {} - } - } - - (language_name, content_node, include_children) -} - -fn shrink_and_clear(vec: &mut Vec, capacity: usize) { - if vec.len() > capacity { - vec.truncate(capacity); - vec.shrink_to_fit(); - } - vec.clear(); -} diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 6ab40694..f593dcfb 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -1,7 +1,7 @@ #![allow(unused)] pub mod commands; pub mod graphemes; -pub mod language_mode; +pub mod syntax; mod selection; pub mod state; mod transaction; diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs new file mode 100644 index 00000000..8a4ad519 --- /dev/null +++ b/helix-core/src/syntax.rs @@ -0,0 +1,1014 @@ +pub struct LanguageMode { + parser: Parser, +} + +impl LanguageMode { + // buffer, grammar, config, grammars, sync_timeout? + pub fn new() -> Self { + unimplemented!() + // make a new root layer + // track markers of injections + // + // track scope_descriptor: a Vec of scopes for item in tree + // + // fetch grammar for parser based on language string + // update root layer + } + + // fn buffer_changed -> call layer.update(range, new_text) on root layer and then all marker layers + + // call this on transaction.apply() -> buffer_changed(changes) + // + // fn parse(language, old_tree, ranges) + // + // fn tree() -> Tree + // + // + + // Highlighting + // fn highlight_iter() -> iterates over all the scopes + // on_tokenize + // on_change_highlighting + + // Commenting + // comment_strings_for_pos + // is_commented + + // Indentation + // suggested_indent_for_line_at_buffer_row + // suggested_indent_for_buffer_row + // indent_level_for_line + + // TODO: Folding + + // Syntax APIs + // get_syntax_node_containing_range -> + // ... + // get_syntax_node_at_pos + // buffer_range_for_scope_at_pos +} + +pub struct LanguageLayer { + // mode +// grammar +// depth +// tree: Tree, +} + +impl LanguageLayer { + // fn highlight_iter() -> same as Mode but for this layer. Mode composits these + // fn buffer_changed + // fn update(range) + // fn update_injections() +} + +// -- refactored from tree-sitter-highlight to be able to retain state +// TODO: add seek() to iter + +// problem: any time a layer is updated it must update it's injections on the parent (potentially +// removing some from use) +// can't modify to vec and exist in it at the same time since that would violate borrows +// maybe we can do with an arena +// maybe just caching on the top layer and nevermind the injections for now? +// +// Grammar { +// layers: Vec> to prevent memory moves when vec is modified +// } +// injections tracked by marker: +// if marker areas match it's fine and update +// if not found add new layer +// if length 0 then area got removed, clean up the layer +// +// layer update: +// if range.len = 0 then remove the layer +// for change in changes { tree.edit(change) } +// tree = parser.parse(.., tree, ..) +// calculate affected range and update injections +// injection update: +// look for existing injections +// if present, range = (first injection start, last injection end) +// +// For now cheat and just throw out non-root layers if they exist. This should still improve +// parsing in majority of cases. + +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::{iter, mem, ops, str, usize}; +use tree_sitter::{ + Language, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, QueryMatch, + Range, Tree, +}; + +const CANCELLATION_CHECK_INTERVAL: usize = 100; + +/// Indicates which highlight should be applied to a region of source code. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct Highlight(pub usize); + +/// Represents the reason why syntax highlighting failed. +#[derive(Debug, PartialEq, Eq)] +pub enum Error { + Cancelled, + InvalidLanguage, + Unknown, +} + +/// Represents a single step in rendering a syntax-highlighted document. +#[derive(Copy, Clone, Debug)] +pub enum HighlightEvent { + Source { start: usize, end: usize }, + HighlightStart(Highlight), + HighlightEnd, +} + +/// Contains the data neeeded to higlight code written in a particular language. +/// +/// This struct is immutable and can be shared between threads. +pub struct HighlightConfiguration { + pub language: Language, + pub query: Query, + combined_injections_query: Option, + locals_pattern_index: usize, + highlights_pattern_index: usize, + highlight_indices: Vec>, + non_local_variable_patterns: Vec, + injection_content_capture_index: Option, + injection_language_capture_index: Option, + local_scope_capture_index: Option, + local_def_capture_index: Option, + local_def_value_capture_index: Option, + local_ref_capture_index: Option, +} + +/// Performs syntax highlighting, recognizing a given list of highlight names. +/// +/// For the best performance `Highlighter` values should be reused between +/// syntax highlighting calls. A separate highlighter is needed for each thread that +/// is performing highlighting. +pub struct Highlighter { + parser: Parser, + cursors: Vec, +} + +#[derive(Debug)] +struct LocalDef<'a> { + name: &'a str, + value_range: ops::Range, + highlight: Option, +} + +#[derive(Debug)] +struct LocalScope<'a> { + inherits: bool, + range: ops::Range, + local_defs: Vec>, +} + +struct HighlightIter<'a, F> +where + F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, +{ + source: &'a [u8], + byte_offset: usize, + highlighter: &'a mut Highlighter, + injection_callback: F, + cancellation_flag: Option<&'a AtomicUsize>, + layers: Vec>, + iter_count: usize, + next_event: Option, + last_highlight_range: Option<(usize, usize, usize)>, +} + +struct HighlightIterLayer<'a> { + _tree: Tree, + cursor: QueryCursor, + captures: iter::Peekable>, + config: &'a HighlightConfiguration, + highlight_end_stack: Vec, + scope_stack: Vec>, + ranges: Vec, + depth: usize, +} + +impl Highlighter { + pub fn new() -> Self { + Highlighter { + parser: Parser::new(), + cursors: Vec::new(), + } + } + + pub fn parser(&mut self) -> &mut Parser { + &mut self.parser + } + + /// Iterate over the highlighted regions for a given slice of source code. + pub fn highlight<'a>( + &'a mut self, + config: &'a HighlightConfiguration, + source: &'a [u8], + cancellation_flag: Option<&'a AtomicUsize>, + mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, + ) -> Result> + 'a, Error> { + let layers = HighlightIterLayer::new( + source, + self, + cancellation_flag, + &mut injection_callback, + config, + 0, + vec![Range { + start_byte: 0, + end_byte: usize::MAX, + start_point: Point::new(0, 0), + end_point: Point::new(usize::MAX, usize::MAX), + }], + )?; + assert_ne!(layers.len(), 0); + let mut result = HighlightIter { + source, + byte_offset: 0, + injection_callback, + cancellation_flag, + highlighter: self, + iter_count: 0, + layers, + next_event: None, + last_highlight_range: None, + }; + result.sort_layers(); + Ok(result) + } +} + +impl HighlightConfiguration { + /// Creates a `HighlightConfiguration` for a given `Language` and set of highlighting + /// queries. + /// + /// # Parameters + /// + /// * `language` - The Tree-sitter `Language` that should be used for parsing. + /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This + /// should be non-empty, otherwise no syntax highlights will be added. + /// * `injections_query` - A string containing tree patterns for injecting other languages + /// into the document. This can be empty if no injections are desired. + /// * `locals_query` - A string containing tree patterns for tracking local variable + /// definitions and references. This can be empty if local variable tracking is not needed. + /// + /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method. + pub fn new( + language: Language, + highlights_query: &str, + injection_query: &str, + locals_query: &str, + ) -> Result { + // Concatenate the query strings, keeping track of the start offset of each section. + let mut query_source = String::new(); + query_source.push_str(injection_query); + let locals_query_offset = query_source.len(); + query_source.push_str(locals_query); + let highlights_query_offset = query_source.len(); + query_source.push_str(highlights_query); + + // Construct a single query by concatenating the three query strings, but record the + // range of pattern indices that belong to each individual string. + let mut query = Query::new(language, &query_source)?; + let mut locals_pattern_index = 0; + let mut highlights_pattern_index = 0; + for i in 0..(query.pattern_count()) { + let pattern_offset = query.start_byte_for_pattern(i); + if pattern_offset < highlights_query_offset { + if pattern_offset < highlights_query_offset { + highlights_pattern_index += 1; + } + if pattern_offset < locals_query_offset { + locals_pattern_index += 1; + } + } + } + + // Construct a separate query just for dealing with the 'combined injections'. + // Disable the combined injection patterns in the main query. + let mut combined_injections_query = Query::new(language, injection_query)?; + let mut has_combined_queries = false; + for pattern_index in 0..locals_pattern_index { + let settings = query.property_settings(pattern_index); + if settings.iter().any(|s| &*s.key == "injection.combined") { + has_combined_queries = true; + query.disable_pattern(pattern_index); + } else { + combined_injections_query.disable_pattern(pattern_index); + } + } + let combined_injections_query = if has_combined_queries { + Some(combined_injections_query) + } else { + None + }; + + // Find all of the highlighting patterns that are disabled for nodes that + // have been identified as local variables. + let non_local_variable_patterns = (0..query.pattern_count()) + .map(|i| { + query + .property_predicates(i) + .iter() + .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local") + }) + .collect(); + + // Store the numeric ids for all of the special captures. + let mut injection_content_capture_index = None; + let mut injection_language_capture_index = None; + let mut local_def_capture_index = None; + let mut local_def_value_capture_index = None; + let mut local_ref_capture_index = None; + let mut local_scope_capture_index = None; + for (i, name) in query.capture_names().iter().enumerate() { + let i = Some(i as u32); + match name.as_str() { + "injection.content" => injection_content_capture_index = i, + "injection.language" => injection_language_capture_index = i, + "local.definition" => local_def_capture_index = i, + "local.definition-value" => local_def_value_capture_index = i, + "local.reference" => local_ref_capture_index = i, + "local.scope" => local_scope_capture_index = i, + _ => {} + } + } + + let highlight_indices = vec![None; query.capture_names().len()]; + Ok(HighlightConfiguration { + language, + query, + combined_injections_query, + locals_pattern_index, + highlights_pattern_index, + highlight_indices, + non_local_variable_patterns, + injection_content_capture_index, + injection_language_capture_index, + local_def_capture_index, + local_def_value_capture_index, + local_ref_capture_index, + local_scope_capture_index, + }) + } + + /// Get a slice containing all of the highlight names used in the configuration. + pub fn names(&self) -> &[String] { + self.query.capture_names() + } + + /// Set the list of recognized highlight names. + /// + /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated + /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of + /// these queries can choose to recognize highlights with different levels of specificity. + /// For example, the string `function.builtin` will match against `function.method.builtin` + /// and `function.builtin.constructor`, but will not match `function.method`. + /// + /// When highlighting, results are returned as `Highlight` values, which contain the index + /// of the matched highlight this list of highlight names. + pub fn configure(&mut self, recognized_names: &[String]) { + let mut capture_parts = Vec::new(); + self.highlight_indices.clear(); + self.highlight_indices + .extend(self.query.capture_names().iter().map(move |capture_name| { + capture_parts.clear(); + capture_parts.extend(capture_name.split('.')); + + let mut best_index = None; + let mut best_match_len = 0; + for (i, recognized_name) in recognized_names.iter().enumerate() { + let mut len = 0; + let mut matches = true; + for part in recognized_name.split('.') { + len += 1; + if !capture_parts.contains(&part) { + matches = false; + break; + } + } + if matches && len > best_match_len { + best_index = Some(i); + best_match_len = len; + } + } + best_index.map(Highlight) + })); + } +} + +impl<'a> HighlightIterLayer<'a> { + /// Create a new 'layer' of highlighting for this document. + /// + /// In the even that the new layer contains "combined injections" (injections where multiple + /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and + /// added to the returned vector. + fn new Option<&'a HighlightConfiguration> + 'a>( + source: &'a [u8], + highlighter: &mut Highlighter, + cancellation_flag: Option<&'a AtomicUsize>, + injection_callback: &mut F, + mut config: &'a HighlightConfiguration, + mut depth: usize, + mut ranges: Vec, + ) -> Result, Error> { + let mut result = Vec::with_capacity(1); + let mut queue = Vec::new(); + loop { + if highlighter.parser.set_included_ranges(&ranges).is_ok() { + highlighter + .parser + .set_language(config.language) + .map_err(|_| Error::InvalidLanguage)?; + + unsafe { highlighter.parser.set_cancellation_flag(cancellation_flag) }; + let tree = highlighter + .parser + .parse(source, None) + .ok_or(Error::Cancelled)?; + unsafe { highlighter.parser.set_cancellation_flag(None) }; + let mut cursor = highlighter.cursors.pop().unwrap_or_else(QueryCursor::new); + + // Process combined injections. + if let Some(combined_injections_query) = &config.combined_injections_query { + let mut injections_by_pattern_index = + vec![(None, Vec::new(), false); combined_injections_query.pattern_count()]; + let matches = + cursor.matches(combined_injections_query, tree.root_node(), |n: Node| { + &source[n.byte_range()] + }); + for mat in matches { + let entry = &mut injections_by_pattern_index[mat.pattern_index]; + let (language_name, content_node, include_children) = + injection_for_match(config, combined_injections_query, &mat, source); + if language_name.is_some() { + entry.0 = language_name; + } + if let Some(content_node) = content_node { + entry.1.push(content_node); + } + entry.2 = include_children; + } + for (lang_name, content_nodes, includes_children) in injections_by_pattern_index + { + if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) { + if let Some(next_config) = (injection_callback)(lang_name) { + let ranges = Self::intersect_ranges( + &ranges, + &content_nodes, + includes_children, + ); + if !ranges.is_empty() { + queue.push((next_config, depth + 1, ranges)); + } + } + } + } + } + + // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which + // prevents them from being moved. But both of these values are really just + // pointers, so it's actually ok to move them. + let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(&tree) }; + let cursor_ref = + unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; + let captures = cursor_ref + .captures(&config.query, tree_ref.root_node(), move |n: Node| { + &source[n.byte_range()] + }) + .peekable(); + + result.push(HighlightIterLayer { + highlight_end_stack: Vec::new(), + scope_stack: vec![LocalScope { + inherits: false, + range: 0..usize::MAX, + local_defs: Vec::new(), + }], + cursor, + depth, + _tree: tree, + captures, + config, + ranges, + }); + } + + if queue.is_empty() { + break; + } else { + let (next_config, next_depth, next_ranges) = queue.remove(0); + config = next_config; + depth = next_depth; + ranges = next_ranges; + } + } + + Ok(result) + } + + // Compute the ranges that should be included when parsing an injection. + // This takes into account three things: + // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. + // * `nodes` - Every injection takes place within a set of nodes. The injection ranges + // are the ranges of those nodes. + // * `includes_children` - For some injections, the content nodes' children should be + // excluded from the nested document, so that only the content nodes' *own* content + // is reparsed. For other injections, the content nodes' entire ranges should be + // reparsed, including the ranges of their children. + fn intersect_ranges( + parent_ranges: &[Range], + nodes: &[Node], + includes_children: bool, + ) -> Vec { + let mut cursor = nodes[0].walk(); + let mut result = Vec::new(); + let mut parent_range_iter = parent_ranges.iter(); + let mut parent_range = parent_range_iter + .next() + .expect("Layers should only be constructed with non-empty ranges vectors"); + for node in nodes.iter() { + let mut preceding_range = Range { + start_byte: 0, + start_point: Point::new(0, 0), + end_byte: node.start_byte(), + end_point: node.start_position(), + }; + let following_range = Range { + start_byte: node.end_byte(), + start_point: node.end_position(), + end_byte: usize::MAX, + end_point: Point::new(usize::MAX, usize::MAX), + }; + + for excluded_range in node + .children(&mut cursor) + .filter_map(|child| { + if includes_children { + None + } else { + Some(child.range()) + } + }) + .chain([following_range].iter().cloned()) + { + let mut range = Range { + start_byte: preceding_range.end_byte, + start_point: preceding_range.end_point, + end_byte: excluded_range.start_byte, + end_point: excluded_range.start_point, + }; + preceding_range = excluded_range; + + if range.end_byte < parent_range.start_byte { + continue; + } + + while parent_range.start_byte <= range.end_byte { + if parent_range.end_byte > range.start_byte { + if range.start_byte < parent_range.start_byte { + range.start_byte = parent_range.start_byte; + range.start_point = parent_range.start_point; + } + + if parent_range.end_byte < range.end_byte { + if range.start_byte < parent_range.end_byte { + result.push(Range { + start_byte: range.start_byte, + start_point: range.start_point, + end_byte: parent_range.end_byte, + end_point: parent_range.end_point, + }); + } + range.start_byte = parent_range.end_byte; + range.start_point = parent_range.end_point; + } else { + if range.start_byte < range.end_byte { + result.push(range); + } + break; + } + } + + if let Some(next_range) = parent_range_iter.next() { + parent_range = next_range; + } else { + return result; + } + } + } + } + result + } + + // First, sort scope boundaries by their byte offset in the document. At a + // given position, emit scope endings before scope beginnings. Finally, emit + // scope boundaries from deeper layers first. + fn sort_key(&mut self) -> Option<(usize, bool, isize)> { + let depth = -(self.depth as isize); + let next_start = self + .captures + .peek() + .map(|(m, i)| m.captures[*i].node.start_byte()); + let next_end = self.highlight_end_stack.last().cloned(); + match (next_start, next_end) { + (Some(start), Some(end)) => { + if start < end { + Some((start, true, depth)) + } else { + Some((end, false, depth)) + } + } + (Some(i), None) => Some((i, true, depth)), + (None, Some(j)) => Some((j, false, depth)), + _ => None, + } + } +} + +impl<'a, F> HighlightIter<'a, F> +where + F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, +{ + fn emit_event( + &mut self, + offset: usize, + event: Option, + ) -> Option> { + let result; + if self.byte_offset < offset { + result = Some(Ok(HighlightEvent::Source { + start: self.byte_offset, + end: offset, + })); + self.byte_offset = offset; + self.next_event = event; + } else { + result = event.map(Ok); + } + self.sort_layers(); + result + } + + fn sort_layers(&mut self) { + while !self.layers.is_empty() { + if let Some(sort_key) = self.layers[0].sort_key() { + let mut i = 0; + while i + 1 < self.layers.len() { + if let Some(next_offset) = self.layers[i + 1].sort_key() { + if next_offset < sort_key { + i += 1; + continue; + } + } + break; + } + if i > 0 { + self.layers[0..(i + 1)].rotate_left(1); + } + break; + } else { + let layer = self.layers.remove(0); + self.highlighter.cursors.push(layer.cursor); + } + } + } + + fn insert_layer(&mut self, mut layer: HighlightIterLayer<'a>) { + if let Some(sort_key) = layer.sort_key() { + let mut i = 1; + while i < self.layers.len() { + if let Some(sort_key_i) = self.layers[i].sort_key() { + if sort_key_i > sort_key { + self.layers.insert(i, layer); + return; + } + i += 1; + } else { + self.layers.remove(i); + } + } + self.layers.push(layer); + } + } +} + +impl<'a, F> Iterator for HighlightIter<'a, F> +where + F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, +{ + type Item = Result; + + fn next(&mut self) -> Option { + 'main: loop { + // If we've already determined the next highlight boundary, just return it. + if let Some(e) = self.next_event.take() { + return Some(Ok(e)); + } + + // Periodically check for cancellation, returning `Cancelled` error if the + // cancellation flag was flipped. + if let Some(cancellation_flag) = self.cancellation_flag { + self.iter_count += 1; + if self.iter_count >= CANCELLATION_CHECK_INTERVAL { + self.iter_count = 0; + if cancellation_flag.load(Ordering::Relaxed) != 0 { + return Some(Err(Error::Cancelled)); + } + } + } + + // If none of the layers have any more highlight boundaries, terminate. + if self.layers.is_empty() { + return if self.byte_offset < self.source.len() { + let result = Some(Ok(HighlightEvent::Source { + start: self.byte_offset, + end: self.source.len(), + })); + self.byte_offset = self.source.len(); + result + } else { + None + }; + } + + // Get the next capture from whichever layer has the earliest highlight boundary. + let range; + let layer = &mut self.layers[0]; + if let Some((next_match, capture_index)) = layer.captures.peek() { + let next_capture = next_match.captures[*capture_index]; + range = next_capture.node.byte_range(); + + // If any previous highlight ends before this node starts, then before + // processing this capture, emit the source code up until the end of the + // previous highlight, and an end event for that highlight. + if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { + if end_byte <= range.start { + layer.highlight_end_stack.pop(); + return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); + } + } + } + // If there are no more captures, then emit any remaining highlight end events. + // And if there are none of those, then just advance to the end of the document. + else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { + layer.highlight_end_stack.pop(); + return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); + } else { + return self.emit_event(self.source.len(), None); + }; + + let (mut match_, capture_index) = layer.captures.next().unwrap(); + let mut capture = match_.captures[capture_index]; + + // If this capture represents an injection, then process the injection. + if match_.pattern_index < layer.config.locals_pattern_index { + let (language_name, content_node, include_children) = + injection_for_match(&layer.config, &layer.config.query, &match_, &self.source); + + // Explicitly remove this match so that none of its other captures will remain + // in the stream of captures. + match_.remove(); + + // If a language is found with the given name, then add a new language layer + // to the highlighted document. + if let (Some(language_name), Some(content_node)) = (language_name, content_node) { + if let Some(config) = (self.injection_callback)(language_name) { + let ranges = HighlightIterLayer::intersect_ranges( + &self.layers[0].ranges, + &[content_node], + include_children, + ); + if !ranges.is_empty() { + match HighlightIterLayer::new( + self.source, + self.highlighter, + self.cancellation_flag, + &mut self.injection_callback, + config, + self.layers[0].depth + 1, + ranges, + ) { + Ok(layers) => { + for layer in layers { + self.insert_layer(layer); + } + } + Err(e) => return Some(Err(e)), + } + } + } + } + + self.sort_layers(); + continue 'main; + } + + // Remove from the local scope stack any local scopes that have already ended. + while range.start > layer.scope_stack.last().unwrap().range.end { + layer.scope_stack.pop(); + } + + // If this capture is for tracking local variables, then process the + // local variable info. + let mut reference_highlight = None; + let mut definition_highlight = None; + while match_.pattern_index < layer.config.highlights_pattern_index { + // If the node represents a local scope, push a new local scope onto + // the scope stack. + if Some(capture.index) == layer.config.local_scope_capture_index { + definition_highlight = None; + let mut scope = LocalScope { + inherits: true, + range: range.clone(), + local_defs: Vec::new(), + }; + for prop in layer.config.query.property_settings(match_.pattern_index) { + if let "local.scope-inherits" = prop.key.as_ref() { + scope.inherits = + prop.value.as_ref().map_or(true, |r| r.as_ref() == "true"); + } + } + layer.scope_stack.push(scope); + } + // If the node represents a definition, add a new definition to the + // local scope at the top of the scope stack. + else if Some(capture.index) == layer.config.local_def_capture_index { + reference_highlight = None; + definition_highlight = None; + let scope = layer.scope_stack.last_mut().unwrap(); + + let mut value_range = 0..0; + for capture in match_.captures { + if Some(capture.index) == layer.config.local_def_value_capture_index { + value_range = capture.node.byte_range(); + } + } + + if let Ok(name) = str::from_utf8(&self.source[range.clone()]) { + scope.local_defs.push(LocalDef { + name, + value_range, + highlight: None, + }); + definition_highlight = + scope.local_defs.last_mut().map(|s| &mut s.highlight); + } + } + // If the node represents a reference, then try to find the corresponding + // definition in the scope stack. + else if Some(capture.index) == layer.config.local_ref_capture_index { + if definition_highlight.is_none() { + definition_highlight = None; + if let Ok(name) = str::from_utf8(&self.source[range.clone()]) { + for scope in layer.scope_stack.iter().rev() { + if let Some(highlight) = + scope.local_defs.iter().rev().find_map(|def| { + if def.name == name && range.start >= def.value_range.end { + Some(def.highlight) + } else { + None + } + }) + { + reference_highlight = highlight; + break; + } + if !scope.inherits { + break; + } + } + } + } + } + + // Continue processing any additional matches for the same node. + if let Some((next_match, next_capture_index)) = layer.captures.peek() { + let next_capture = next_match.captures[*next_capture_index]; + if next_capture.node == capture.node { + capture = next_capture; + match_ = layer.captures.next().unwrap().0; + continue; + } + } + + self.sort_layers(); + continue 'main; + } + + // Otherwise, this capture must represent a highlight. + // If this exact range has already been highlighted by an earlier pattern, or by + // a different layer, then skip over this one. + if let Some((last_start, last_end, last_depth)) = self.last_highlight_range { + if range.start == last_start && range.end == last_end && layer.depth < last_depth { + self.sort_layers(); + continue 'main; + } + } + + // If the current node was found to be a local variable, then skip over any + // highlighting patterns that are disabled for local variables. + if definition_highlight.is_some() || reference_highlight.is_some() { + while layer.config.non_local_variable_patterns[match_.pattern_index] { + if let Some((next_match, next_capture_index)) = layer.captures.peek() { + let next_capture = next_match.captures[*next_capture_index]; + if next_capture.node == capture.node { + capture = next_capture; + match_ = layer.captures.next().unwrap().0; + continue; + } + } + + self.sort_layers(); + continue 'main; + } + } + + // Once a highlighting pattern is found for the current node, skip over + // any later highlighting patterns that also match this node. Captures + // for a given node are ordered by pattern index, so these subsequent + // captures are guaranteed to be for highlighting, not injections or + // local variables. + while let Some((next_match, next_capture_index)) = layer.captures.peek() { + let next_capture = next_match.captures[*next_capture_index]; + if next_capture.node == capture.node { + layer.captures.next(); + } else { + break; + } + } + + let current_highlight = layer.config.highlight_indices[capture.index as usize]; + + // If this node represents a local definition, then store the current + // highlight value on the local scope entry representing this node. + if let Some(definition_highlight) = definition_highlight { + *definition_highlight = current_highlight; + } + + // Emit a scope start event and push the node's end position to the stack. + if let Some(highlight) = reference_highlight.or(current_highlight) { + self.last_highlight_range = Some((range.start, range.end, layer.depth)); + layer.highlight_end_stack.push(range.end); + return self + .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight))); + } + + self.sort_layers(); + } + } +} + +fn injection_for_match<'a>( + config: &HighlightConfiguration, + query: &'a Query, + query_match: &QueryMatch<'a>, + source: &'a [u8], +) -> (Option<&'a str>, Option>, bool) { + let content_capture_index = config.injection_content_capture_index; + let language_capture_index = config.injection_language_capture_index; + + let mut language_name = None; + let mut content_node = None; + for capture in query_match.captures { + let index = Some(capture.index); + if index == language_capture_index { + language_name = capture.node.utf8_text(source).ok(); + } else if index == content_capture_index { + content_node = Some(capture.node); + } + } + + let mut include_children = false; + for prop in query.property_settings(query_match.pattern_index) { + match prop.key.as_ref() { + // In addition to specifying the language name via the text of a + // captured node, it can also be hard-coded via a `#set!` predicate + // that sets the injection.language key. + "injection.language" => { + if language_name.is_none() { + language_name = prop.value.as_ref().map(|s| s.as_ref()) + } + } + + // By default, injections do not include the *children* of an + // `injection.content` node - only the ranges that belong to the + // node itself. This can be changed using a `#set!` predicate that + // sets the `injection.include-children` key. + "injection.include-children" => include_children = true, + _ => {} + } + } + + (language_name, content_node, include_children) +} + +fn shrink_and_clear(vec: &mut Vec, capacity: usize) { + if vec.len() > capacity { + vec.truncate(capacity); + vec.shrink_to_fit(); + } + vec.clear(); +} -- cgit v1.2.3-70-g09d2