From 6728e4449038e9481b72251441182d508c165a9c Mon Sep 17 00:00:00 2001 From: Blaž Hrastnik Date: Sun, 7 Nov 2021 00:21:03 +0900 Subject: syntax: Split parsing and highlighting --- helix-core/src/indent.rs | 2 +- helix-core/src/syntax.rs | 1126 +++++++++++++++++++++------------------------- 2 files changed, 511 insertions(+), 617 deletions(-) (limited to 'helix-core/src') diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index 1fc2b8a5..ac2a1208 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -454,7 +454,7 @@ where let language_config = loader.language_config_for_scope("source.rust").unwrap(); let highlight_config = language_config.highlight_config(&[]).unwrap(); - let syntax = Syntax::new(&doc, highlight_config.clone()); + let syntax = Syntax::new(&doc, highlight_config.clone(), std::sync::Arc::new(loader)); let text = doc.slice(..); let tab_width = 4; diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index c7a3e1cc..858b9bdf 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -9,6 +9,7 @@ use crate::{ pub use helix_syntax::get_language; use arc_swap::ArcSwap; +use slotmap::{DefaultKey as LayerId, HopSlotMap}; use std::{ borrow::Cow, @@ -388,9 +389,9 @@ thread_local! { #[derive(Debug)] pub struct Syntax { - config: Arc, - - root_layer: LanguageLayer, + layers: HopSlotMap, + root: LayerId, + loader: Arc, } fn byte_range_to_str(range: std::ops::Range, source: RopeSlice) -> Cow { @@ -400,38 +401,36 @@ fn byte_range_to_str(range: std::ops::Range, source: RopeSlice) -> Cow, - ) -> Self { - let root_layer = LanguageLayer { tree: None }; + pub fn new(source: &Rope, config: Arc, loader: Arc) -> Self { + let root_layer = LanguageLayer { + tree: None, + config, + depth: 0, + ranges: vec![Range { + start_byte: 0, + end_byte: usize::MAX, + start_point: Point::new(0, 0), + end_point: Point::new(usize::MAX, usize::MAX), + }], + }; // track markers of injections // track scope_descriptor: a Vec of scopes for item in tree + let mut layers = HopSlotMap::default(); + let root = layers.insert(root_layer); + let mut syntax = Self { // grammar, - config, - root_layer, + root, + layers, + loader, }; - // update root layer - PARSER.with(|ts_parser| { - // TODO: handle the returned `Result` properly. - let _ = syntax.root_layer.parse( - &mut ts_parser.borrow_mut(), - &syntax.config, - source, - 0, - vec![Range { - start_byte: 0, - end_byte: usize::MAX, - start_point: Point::new(0, 0), - end_point: Point::new(usize::MAX, usize::MAX), - }], - ); - }); + syntax + .update(source, source, &ChangeSet::new(&source)) + .unwrap(); + syntax } @@ -441,30 +440,197 @@ impl Syntax { source: &Rope, changeset: &ChangeSet, ) -> Result<(), Error> { + use std::collections::VecDeque; + let mut queue = VecDeque::new(); + // let source = source.slice(..); + let injection_callback = |language: &str| { + self.loader + .language_configuration_for_injection_string(language) + .and_then(|language_config| { + // TODO: get these theme.scopes from somewhere, probably make them settable on Loader + let scopes = &[ + "attribute", + "constant", + "function.builtin", + "function", + "keyword", + "operator", + "property", + "punctuation", + "punctuation.bracket", + "punctuation.delimiter", + "string", + "string.special", + "tag", + "type", + "type.builtin", + "variable", + "variable.builtin", + "variable.parameter", + ]; + language_config.highlight_config( + &scopes + .iter() + .map(|scope| scope.to_string()) + .collect::>(), + ) + }) + }; + + queue.push_back(self.root); + + // HAXX: for now, clear all layers except root so they get re-parsed + self.layers.retain(|id, _| id == self.root); + + // Workaround for Syntax::new() with empty changeset + if !changeset.is_empty() { + // TODO: do this in a recursive way + // Notify the tree about all the changes + let edits = generate_edits(old_source.slice(..), changeset); + let tree = self.layers[self.root].tree.as_mut().unwrap(); + for edit in edits.iter().rev() { + // apply the edits in reverse. If we applied them in order then edit 1 would disrupt + // the positioning of edit 2 + tree.edit(edit); + } + } + PARSER.with(|ts_parser| { - self.root_layer.update( - &mut ts_parser.borrow_mut(), - &self.config, - old_source, - source, - changeset, - ) - }) + let ts_parser = &mut ts_parser.borrow_mut(); + let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new); + // TODO: might need to set cursor range + + while let Some(layer_id) = queue.pop_front() { + // Re-parse the tree. + self.layers[layer_id].parse(ts_parser, source)?; + + let source = source.slice(..); + let layer = &self.layers[layer_id]; + + // Process injections. + let matches = cursor.matches( + &layer.config.injections_query, + layer.tree().root_node(), + RopeProvider(source), + ); + let mut injections = Vec::new(); + for mat in matches { + let (language_name, content_node, include_children) = injection_for_match( + &layer.config, + &layer.config.injections_query, + &mat, + source, + ); + + // Explicitly remove this match so that none of its other captures will remain + // in the stream of captures. + mat.remove(); // TODO: is this still necessary? + + // If a language is found with the given name, then add a new language layer + // to the highlighted document. + if let (Some(language_name), Some(content_node)) = (language_name, content_node) + { + if let Some(config) = (injection_callback)(&language_name) { + let ranges = + intersect_ranges(&layer.ranges, &[content_node], include_children); + + if !ranges.is_empty() { + log::info!("{} {:?}", language_name, ranges); + injections.push((config, ranges)); + } + } + } + } - // TODO: deal with injections and update them too + // Process combined injections. + if let Some(combined_injections_query) = &layer.config.combined_injections_query { + let mut injections_by_pattern_index = + vec![(None, Vec::new(), false); combined_injections_query.pattern_count()]; + let matches = cursor.matches( + combined_injections_query, + layer.tree().root_node(), + RopeProvider(source), + ); + for mat in matches { + let entry = &mut injections_by_pattern_index[mat.pattern_index]; + let (language_name, content_node, include_children) = injection_for_match( + &layer.config, + combined_injections_query, + &mat, + source, + ); + if language_name.is_some() { + entry.0 = language_name; + } + if let Some(content_node) = content_node { + entry.1.push(content_node); + } + entry.2 = include_children; + } + for (lang_name, content_nodes, includes_children) in injections_by_pattern_index + { + if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) { + if let Some(config) = (injection_callback)(&lang_name) { + let ranges = intersect_ranges( + &layer.ranges, + &content_nodes, + includes_children, + ); + if !ranges.is_empty() { + injections.push((config, ranges)); + } + } + } + } + } + + let depth = layer.depth + 1; + // TODO: can't inline this since matches borrows self.layers + for (config, ranges) in injections { + let layer_id = self.layers.insert(LanguageLayer { + tree: None, + config, + depth, + ranges, + }); + queue.push_back(layer_id); + } + } + + // Return the cursor back in the pool. + ts_parser.cursors.push(cursor); + + Ok(()) // so we can use the try operator + })?; + + Ok(()) } // fn buffer_changed -> call layer.update(range, new_text) on root layer and then all marker layers - // call this on transaction.apply() -> buffer_changed(changes) - // - // fn parse(language, old_tree, ranges) - // + pub fn tree(&self) -> &Tree { - self.root_layer.tree() + self.layers[self.root].tree() } + + // root: Tree + // injections: Vec<(Tree, Range marker)> + + // handle updates that go over a part of the layer by truncating them to start/end appropriately + + // injections tracked by marker: + // if marker areas match it's fine and update + // if not found add new layer + // if length 0 then area got removed, clean up the layer // - // + // layer update: + // if range.len = 0 then remove the layer + // for change in changes { tree.edit(change) } + // tree = parser.parse(.., tree, ..) + // calculate affected range and update injections + // injection update: + // look for existing injections + // if present, range = (first injection start, last injection end) // Highlighting @@ -474,61 +640,76 @@ impl Syntax { source: RopeSlice<'a>, range: Option>, cancellation_flag: Option<&'a AtomicUsize>, - injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, ) -> impl Iterator> + 'a { - // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which - // prevents them from being moved. But both of these values are really just - // pointers, so it's actually ok to move them. - - // reuse a cursor from the pool if possible - let mut cursor = PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - highlighter.cursors.pop().unwrap_or_else(QueryCursor::new) + let mut layers = self + .layers + .iter() + .map(|(_, layer)| { + // Reuse a cursor from the pool if available. + let mut cursor = PARSER.with(|ts_parser| { + let highlighter = &mut ts_parser.borrow_mut(); + highlighter.cursors.pop().unwrap_or_else(QueryCursor::new) + }); + + // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which + // prevents them from being moved. But both of these values are really just + // pointers, so it's actually ok to move them. + let cursor_ref = + unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; + + // if reusing cursors & no range this resets to whole range + // TODO: handle intersect (range & layer.range) + // cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX)); + cursor_ref.set_byte_range(0..usize::MAX); + + let captures = cursor_ref + .captures( + &layer.config.query, + layer.tree().root_node(), + RopeProvider(source), + ) + .peekable(); + + HighlightIterLayer { + highlight_end_stack: Vec::new(), + scope_stack: vec![LocalScope { + inherits: false, + range: 0..usize::MAX, + local_defs: Vec::new(), + }], + cursor, + _tree: None, + captures, + config: layer.config.as_ref(), // TODO: just reuse + depth: layer.depth, // TODO: just reuse + ranges: layer.ranges.clone(), + } + }) + .collect::>(); + + log::info!("--"); + + // HAXX: arrange layers by byte range, with deeper layers positioned first + layers.sort_by_key(|layer| { + ( + layer.ranges.first().cloned(), + std::cmp::Reverse(layer.depth), + ) }); - let tree_ref = self.tree(); - let cursor_ref = unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; - let query_ref = &self.config.query; - let config_ref = self.config.as_ref(); - - // if reusing cursors & no range this resets to whole range - cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX)); - - let captures = cursor_ref - .captures(query_ref, tree_ref.root_node(), RopeProvider(source)) - .peekable(); - - // manually craft the root layer based on the existing tree - let layer = HighlightIterLayer { - highlight_end_stack: Vec::new(), - scope_stack: vec![LocalScope { - inherits: false, - range: 0..usize::MAX, - local_defs: Vec::new(), - }], - cursor, - depth: 0, - _tree: None, - captures, - config: config_ref, - ranges: vec![Range { - start_byte: 0, - end_byte: usize::MAX, - start_point: Point::new(0, 0), - end_point: Point::new(usize::MAX, usize::MAX), - }], - }; let mut result = HighlightIter { source, byte_offset: range.map_or(0, |r| r.start), // TODO: simplify - injection_callback, cancellation_flag, iter_count: 0, - layers: vec![layer], + layers, next_event: None, last_highlight_range: None, }; result.sort_layers(); + for layer in &result.layers { + log::info!("> {:?} {:?}", layer.depth, layer.ranges); // <- for some reason layers are reversed here + } result } // on_tokenize @@ -556,234 +737,155 @@ impl Syntax { pub struct LanguageLayer { // mode // grammar - // depth + pub config: Arc, pub(crate) tree: Option, + pub ranges: Vec, + pub depth: usize, } impl LanguageLayer { - // pub fn new() -> Self { - // Self { tree: None } - // } - pub fn tree(&self) -> &Tree { // TODO: no unwrap self.tree.as_ref().unwrap() } - fn parse( - &mut self, - ts_parser: &mut TsParser, - config: &HighlightConfiguration, - source: &Rope, - _depth: usize, - ranges: Vec, - ) -> Result<(), Error> { - if ts_parser.parser.set_included_ranges(&ranges).is_ok() { - ts_parser - .parser - .set_language(config.language) - .map_err(|_| Error::InvalidLanguage)?; - - // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) }; - let tree = ts_parser - .parser - .parse_with( - &mut |byte, _| { - if byte <= source.len_bytes() { - let (chunk, start_byte, _, _) = source.chunk_at_byte(byte); - chunk[byte - start_byte..].as_bytes() - } else { - // out of range - &[] - } - }, - self.tree.as_ref(), - ) - .ok_or(Error::Cancelled)?; - - self.tree = Some(tree) - } + fn parse(&mut self, ts_parser: &mut TsParser, source: &Rope) -> Result<(), Error> { + ts_parser.parser.set_included_ranges(&self.ranges).unwrap(); + + ts_parser + .parser + .set_language(self.config.language) + .map_err(|_| Error::InvalidLanguage)?; + + // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) }; + let tree = ts_parser + .parser + .parse_with( + &mut |byte, _| { + if byte <= source.len_bytes() { + let (chunk, start_byte, _, _) = source.chunk_at_byte(byte); + chunk[byte - start_byte..].as_bytes() + } else { + // out of range + &[] + } + }, + self.tree.as_ref(), + ) + .ok_or(Error::Cancelled)?; + // unsafe { ts_parser.parser.set_cancellation_flag(None) }; + self.tree = Some(tree); Ok(()) } +} - pub(crate) fn generate_edits( - old_text: RopeSlice, - changeset: &ChangeSet, - ) -> Vec { - use Operation::*; - let mut old_pos = 0; +pub(crate) fn generate_edits( + old_text: RopeSlice, + changeset: &ChangeSet, +) -> Vec { + use Operation::*; + let mut old_pos = 0; - let mut edits = Vec::new(); + let mut edits = Vec::new(); - let mut iter = changeset.changes.iter().peekable(); + let mut iter = changeset.changes.iter().peekable(); - // TODO; this is a lot easier with Change instead of Operation. + // TODO; this is a lot easier with Change instead of Operation. - fn point_at_pos(text: RopeSlice, pos: usize) -> (usize, Point) { - let byte = text.char_to_byte(pos); // <- attempted to index past end - let line = text.char_to_line(pos); - let line_start_byte = text.line_to_byte(line); - let col = byte - line_start_byte; + fn point_at_pos(text: RopeSlice, pos: usize) -> (usize, Point) { + let byte = text.char_to_byte(pos); // <- attempted to index past end + let line = text.char_to_line(pos); + let line_start_byte = text.line_to_byte(line); + let col = byte - line_start_byte; - (byte, Point::new(line, col)) - } + (byte, Point::new(line, col)) + } - fn traverse(point: Point, text: &Tendril) -> Point { - let Point { - mut row, - mut column, - } = point; - - // TODO: there should be a better way here. - let mut chars = text.chars().peekable(); - while let Some(ch) = chars.next() { - if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) { - row += 1; - column = 0; - } else { - column += 1; - } + fn traverse(point: Point, text: &Tendril) -> Point { + let Point { + mut row, + mut column, + } = point; + + // TODO: there should be a better way here. + let mut chars = text.chars().peekable(); + while let Some(ch) = chars.next() { + if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) { + row += 1; + column = 0; + } else { + column += 1; } - Point { row, column } } + Point { row, column } + } - while let Some(change) = iter.next() { - let len = match change { - Delete(i) | Retain(i) => *i, - Insert(_) => 0, - }; - let mut old_end = old_pos + len; + while let Some(change) = iter.next() { + let len = match change { + Delete(i) | Retain(i) => *i, + Insert(_) => 0, + }; + let mut old_end = old_pos + len; + + match change { + Retain(_) => {} + Delete(_) => { + let (start_byte, start_position) = point_at_pos(old_text, old_pos); + let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); + + // TODO: Position also needs to be byte based... + // let byte = char_to_byte(old_pos) + // let line = char_to_line(old_pos) + // let line_start_byte = line_to_byte() + // Position::new(line, line_start_byte - byte) + + // deletion + edits.push(tree_sitter::InputEdit { + start_byte, // old_pos to byte + old_end_byte, // old_end to byte + new_end_byte: start_byte, // old_pos to byte + start_position, // old pos to coords + old_end_position, // old_end to coords + new_end_position: start_position, // old pos to coords + }); + } + Insert(s) => { + let (start_byte, start_position) = point_at_pos(old_text, old_pos); - match change { - Retain(_) => {} - Delete(_) => { - let (start_byte, start_position) = point_at_pos(old_text, old_pos); + // a subsequent delete means a replace, consume it + if let Some(Delete(len)) = iter.peek() { + old_end = old_pos + len; let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); - // TODO: Position also needs to be byte based... - // let byte = char_to_byte(old_pos) - // let line = char_to_line(old_pos) - // let line_start_byte = line_to_byte() - // Position::new(line, line_start_byte - byte) + iter.next(); - // deletion + // replacement edits.push(tree_sitter::InputEdit { - start_byte, // old_pos to byte - old_end_byte, // old_end to byte - new_end_byte: start_byte, // old_pos to byte - start_position, // old pos to coords - old_end_position, // old_end to coords - new_end_position: start_position, // old pos to coords + start_byte, // old_pos to byte + old_end_byte, // old_end to byte + new_end_byte: start_byte + s.len(), // old_pos to byte + s.len() + start_position, // old pos to coords + old_end_position, // old_end to coords + new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) + }); + } else { + // insert + edits.push(tree_sitter::InputEdit { + start_byte, // old_pos to byte + old_end_byte: start_byte, // same + new_end_byte: start_byte + s.len(), // old_pos + s.len() + start_position, // old pos to coords + old_end_position: start_position, // same + new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) }); - } - Insert(s) => { - let (start_byte, start_position) = point_at_pos(old_text, old_pos); - - // a subsequent delete means a replace, consume it - if let Some(Delete(len)) = iter.peek() { - old_end = old_pos + len; - let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); - - iter.next(); - - // replacement - edits.push(tree_sitter::InputEdit { - start_byte, // old_pos to byte - old_end_byte, // old_end to byte - new_end_byte: start_byte + s.len(), // old_pos to byte + s.len() - start_position, // old pos to coords - old_end_position, // old_end to coords - new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) - }); - } else { - // insert - edits.push(tree_sitter::InputEdit { - start_byte, // old_pos to byte - old_end_byte: start_byte, // same - new_end_byte: start_byte + s.len(), // old_pos + s.len() - start_position, // old pos to coords - old_end_position: start_position, // same - new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) - }); - } } } - old_pos = old_end; - } - edits - } - - fn update( - &mut self, - ts_parser: &mut TsParser, - config: &HighlightConfiguration, - old_source: &Rope, - source: &Rope, - changeset: &ChangeSet, - ) -> Result<(), Error> { - if changeset.is_empty() { - return Ok(()); } - - let edits = Self::generate_edits(old_source.slice(..), changeset); - - // Notify the tree about all the changes - for edit in edits.iter().rev() { - // apply the edits in reverse. If we applied them in order then edit 1 would disrupt - // the positioning of edit 2 - self.tree.as_mut().unwrap().edit(edit); - } - - self.parse( - ts_parser, - config, - source, - 0, - // TODO: what to do about this range on update - vec![Range { - start_byte: 0, - end_byte: usize::MAX, - start_point: Point::new(0, 0), - end_point: Point::new(usize::MAX, usize::MAX), - }], - ) + old_pos = old_end; } - - // fn highlight_iter() -> same as Mode but for this layer. Mode composits these - // fn buffer_changed - // fn update(range) - // fn update_injections() + edits } -// -- refactored from tree-sitter-highlight to be able to retain state -// TODO: add seek() to iter - -// problem: any time a layer is updated it must update it's injections on the parent (potentially -// removing some from use) -// can't modify to vec and exist in it at the same time since that would violate borrows -// maybe we can do with an arena -// maybe just caching on the top layer and nevermind the injections for now? -// -// Grammar { -// layers: Vec> to prevent memory moves when vec is modified -// } -// injections tracked by marker: -// if marker areas match it's fine and update -// if not found add new layer -// if length 0 then area got removed, clean up the layer -// -// layer update: -// if range.len = 0 then remove the layer -// for change in changes { tree.edit(change) } -// tree = parser.parse(.., tree, ..) -// calculate affected range and update injections -// injection update: -// look for existing injections -// if present, range = (first injection start, last injection end) -// -// For now cheat and just throw out non-root layers if they exist. This should still improve -// parsing in majority of cases. - use std::sync::atomic::{AtomicUsize, Ordering}; use std::{iter, mem, ops, str, usize}; use tree_sitter::{ @@ -820,8 +922,8 @@ pub enum HighlightEvent { pub struct HighlightConfiguration { pub language: Grammar, pub query: Query, + injections_query: Query, combined_injections_query: Option, - locals_pattern_index: usize, highlights_pattern_index: usize, highlight_indices: ArcSwap>>, non_local_variable_patterns: Vec, @@ -848,13 +950,9 @@ struct LocalScope<'a> { } #[derive(Debug)] -struct HighlightIter<'a, F> -where - F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, -{ +struct HighlightIter<'a> { source: RopeSlice<'a>, byte_offset: usize, - injection_callback: F, cancellation_flag: Option<&'a AtomicUsize>, layers: Vec>, iter_count: usize, @@ -894,8 +992,8 @@ struct HighlightIterLayer<'a> { config: &'a HighlightConfiguration, highlight_end_stack: Vec, scope_stack: Vec>, - ranges: Vec, depth: usize, + ranges: Vec, // TEMP } impl<'a> fmt::Debug for HighlightIterLayer<'a> { @@ -927,38 +1025,32 @@ impl HighlightConfiguration { ) -> Result { // Concatenate the query strings, keeping track of the start offset of each section. let mut query_source = String::new(); - query_source.push_str(injection_query); - let locals_query_offset = query_source.len(); query_source.push_str(locals_query); let highlights_query_offset = query_source.len(); query_source.push_str(highlights_query); // Construct a single query by concatenating the three query strings, but record the // range of pattern indices that belong to each individual string. - let mut query = Query::new(language, &query_source)?; - let mut locals_pattern_index = 0; + let query = Query::new(language, &query_source)?; let mut highlights_pattern_index = 0; for i in 0..(query.pattern_count()) { let pattern_offset = query.start_byte_for_pattern(i); if pattern_offset < highlights_query_offset { - if pattern_offset < highlights_query_offset { - highlights_pattern_index += 1; - } - if pattern_offset < locals_query_offset { - locals_pattern_index += 1; - } + highlights_pattern_index += 1; } } + let mut injections_query = Query::new(language, injection_query)?; + // Construct a separate query just for dealing with the 'combined injections'. // Disable the combined injection patterns in the main query. let mut combined_injections_query = Query::new(language, injection_query)?; let mut has_combined_queries = false; - for pattern_index in 0..locals_pattern_index { - let settings = query.property_settings(pattern_index); + for pattern_index in 0..injections_query.pattern_count() { + let settings = injections_query.property_settings(pattern_index); if settings.iter().any(|s| &*s.key == "injection.combined") { has_combined_queries = true; - query.disable_pattern(pattern_index); + injections_query.disable_pattern(pattern_index); } else { combined_injections_query.disable_pattern(pattern_index); } @@ -990,8 +1082,6 @@ impl HighlightConfiguration { for (i, name) in query.capture_names().iter().enumerate() { let i = Some(i as u32); match name.as_str() { - "injection.content" => injection_content_capture_index = i, - "injection.language" => injection_language_capture_index = i, "local.definition" => local_def_capture_index = i, "local.definition-value" => local_def_value_capture_index = i, "local.reference" => local_ref_capture_index = i, @@ -1000,12 +1090,21 @@ impl HighlightConfiguration { } } + for (i, name) in injections_query.capture_names().iter().enumerate() { + let i = Some(i as u32); + match name.as_str() { + "injection.content" => injection_content_capture_index = i, + "injection.language" => injection_language_capture_index = i, + _ => {} + } + } + let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]); Ok(Self { language, query, + injections_query, combined_injections_query, - locals_pattern_index, highlights_pattern_index, highlight_indices, non_local_variable_patterns, @@ -1070,238 +1169,6 @@ impl HighlightConfiguration { } impl<'a> HighlightIterLayer<'a> { - /// Create a new 'layer' of highlighting for this document. - /// - /// In the even that the new layer contains "combined injections" (injections where multiple - /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and - /// added to the returned vector. - fn new Option<&'a HighlightConfiguration> + 'a>( - source: RopeSlice<'a>, - cancellation_flag: Option<&'a AtomicUsize>, - injection_callback: &mut F, - mut config: &'a HighlightConfiguration, - mut depth: usize, - mut ranges: Vec, - ) -> Result, Error> { - let mut result = Vec::with_capacity(1); - let mut queue = Vec::new(); - loop { - // --> Tree parsing part - - PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - - if highlighter.parser.set_included_ranges(&ranges).is_ok() { - highlighter - .parser - .set_language(config.language) - .map_err(|_| Error::InvalidLanguage)?; - - unsafe { highlighter.parser.set_cancellation_flag(cancellation_flag) }; - let tree = highlighter - .parser - .parse_with( - &mut |byte, _| { - if byte <= source.len_bytes() { - let (chunk, start_byte, _, _) = source.chunk_at_byte(byte); - chunk[byte - start_byte..].as_bytes() - } else { - // out of range - &[] - } - }, - None, - ) - .ok_or(Error::Cancelled)?; - unsafe { highlighter.parser.set_cancellation_flag(None) }; - let mut cursor = highlighter.cursors.pop().unwrap_or_else(QueryCursor::new); - - // Process combined injections. - if let Some(combined_injections_query) = &config.combined_injections_query { - let mut injections_by_pattern_index = vec![ - (None, Vec::new(), false); - combined_injections_query - .pattern_count() - ]; - let matches = cursor.matches( - combined_injections_query, - tree.root_node(), - RopeProvider(source), - ); - for mat in matches { - let entry = &mut injections_by_pattern_index[mat.pattern_index]; - let (language_name, content_node, include_children) = - injection_for_match( - config, - combined_injections_query, - &mat, - source, - ); - if language_name.is_some() { - entry.0 = language_name; - } - if let Some(content_node) = content_node { - entry.1.push(content_node); - } - entry.2 = include_children; - } - for (lang_name, content_nodes, includes_children) in - injections_by_pattern_index - { - if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) - { - if let Some(next_config) = (injection_callback)(&lang_name) { - let ranges = Self::intersect_ranges( - &ranges, - &content_nodes, - includes_children, - ); - if !ranges.is_empty() { - queue.push((next_config, depth + 1, ranges)); - } - } - } - } - } - - // --> Highlighting query part - - // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which - // prevents them from being moved. But both of these values are really just - // pointers, so it's actually ok to move them. - let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(&tree) }; - let cursor_ref = - unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; - let captures = cursor_ref - .captures(&config.query, tree_ref.root_node(), RopeProvider(source)) - .peekable(); - - result.push(HighlightIterLayer { - highlight_end_stack: Vec::new(), - scope_stack: vec![LocalScope { - inherits: false, - range: 0..usize::MAX, - local_defs: Vec::new(), - }], - cursor, - depth, - _tree: Some(tree), - captures, - config, - ranges, - }); - } - - Ok(()) // so we can use the try operator - })?; - - if queue.is_empty() { - break; - } - - let (next_config, next_depth, next_ranges) = queue.remove(0); - config = next_config; - depth = next_depth; - ranges = next_ranges; - } - - Ok(result) - } - - // Compute the ranges that should be included when parsing an injection. - // This takes into account three things: - // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. - // * `nodes` - Every injection takes place within a set of nodes. The injection ranges - // are the ranges of those nodes. - // * `includes_children` - For some injections, the content nodes' children should be - // excluded from the nested document, so that only the content nodes' *own* content - // is reparsed. For other injections, the content nodes' entire ranges should be - // reparsed, including the ranges of their children. - fn intersect_ranges( - parent_ranges: &[Range], - nodes: &[Node], - includes_children: bool, - ) -> Vec { - let mut cursor = nodes[0].walk(); - let mut result = Vec::new(); - let mut parent_range_iter = parent_ranges.iter(); - let mut parent_range = parent_range_iter - .next() - .expect("Layers should only be constructed with non-empty ranges vectors"); - for node in nodes.iter() { - let mut preceding_range = Range { - start_byte: 0, - start_point: Point::new(0, 0), - end_byte: node.start_byte(), - end_point: node.start_position(), - }; - let following_range = Range { - start_byte: node.end_byte(), - start_point: node.end_position(), - end_byte: usize::MAX, - end_point: Point::new(usize::MAX, usize::MAX), - }; - - for excluded_range in node - .children(&mut cursor) - .filter_map(|child| { - if includes_children { - None - } else { - Some(child.range()) - } - }) - .chain([following_range].iter().cloned()) - { - let mut range = Range { - start_byte: preceding_range.end_byte, - start_point: preceding_range.end_point, - end_byte: excluded_range.start_byte, - end_point: excluded_range.start_point, - }; - preceding_range = excluded_range; - - if range.end_byte < parent_range.start_byte { - continue; - } - - while parent_range.start_byte <= range.end_byte { - if parent_range.end_byte > range.start_byte { - if range.start_byte < parent_range.start_byte { - range.start_byte = parent_range.start_byte; - range.start_point = parent_range.start_point; - } - - if parent_range.end_byte < range.end_byte { - if range.start_byte < parent_range.end_byte { - result.push(Range { - start_byte: range.start_byte, - start_point: range.start_point, - end_byte: parent_range.end_byte, - end_point: parent_range.end_point, - }); - } - range.start_byte = parent_range.end_byte; - range.start_point = parent_range.end_point; - } else { - if range.start_byte < range.end_byte { - result.push(range); - } - break; - } - } - - if let Some(next_range) = parent_range_iter.next() { - parent_range = next_range; - } else { - return result; - } - } - } - } - result - } - // First, sort scope boundaries by their byte offset in the document. At a // given position, emit scope endings before scope beginnings. Finally, emit // scope boundaries from deeper layers first. @@ -1327,10 +1194,101 @@ impl<'a> HighlightIterLayer<'a> { } } -impl<'a, F> HighlightIter<'a, F> -where - F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, -{ +// Compute the ranges that should be included when parsing an injection. +// This takes into account three things: +// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. +// * `nodes` - Every injection takes place within a set of nodes. The injection ranges +// are the ranges of those nodes. +// * `includes_children` - For some injections, the content nodes' children should be +// excluded from the nested document, so that only the content nodes' *own* content +// is reparsed. For other injections, the content nodes' entire ranges should be +// reparsed, including the ranges of their children. +fn intersect_ranges( + parent_ranges: &[Range], + nodes: &[Node], + includes_children: bool, +) -> Vec { + let mut cursor = nodes[0].walk(); + let mut result = Vec::new(); + let mut parent_range_iter = parent_ranges.iter(); + let mut parent_range = parent_range_iter + .next() + .expect("Layers should only be constructed with non-empty ranges vectors"); + for node in nodes.iter() { + let mut preceding_range = Range { + start_byte: 0, + start_point: Point::new(0, 0), + end_byte: node.start_byte(), + end_point: node.start_position(), + }; + let following_range = Range { + start_byte: node.end_byte(), + start_point: node.end_position(), + end_byte: usize::MAX, + end_point: Point::new(usize::MAX, usize::MAX), + }; + + for excluded_range in node + .children(&mut cursor) + .filter_map(|child| { + if includes_children { + None + } else { + Some(child.range()) + } + }) + .chain([following_range].iter().cloned()) + { + let mut range = Range { + start_byte: preceding_range.end_byte, + start_point: preceding_range.end_point, + end_byte: excluded_range.start_byte, + end_point: excluded_range.start_point, + }; + preceding_range = excluded_range; + + if range.end_byte < parent_range.start_byte { + continue; + } + + while parent_range.start_byte <= range.end_byte { + if parent_range.end_byte > range.start_byte { + if range.start_byte < parent_range.start_byte { + range.start_byte = parent_range.start_byte; + range.start_point = parent_range.start_point; + } + + if parent_range.end_byte < range.end_byte { + if range.start_byte < parent_range.end_byte { + result.push(Range { + start_byte: range.start_byte, + start_point: range.start_point, + end_byte: parent_range.end_byte, + end_point: parent_range.end_point, + }); + } + range.start_byte = parent_range.end_byte; + range.start_point = parent_range.end_point; + } else { + if range.start_byte < range.end_byte { + result.push(range); + } + break; + } + } + + if let Some(next_range) = parent_range_iter.next() { + parent_range = next_range; + } else { + return result; + } + } + } + } + result +} + +impl<'a> HighlightIter<'a> { fn emit_event( &mut self, offset: usize, @@ -1361,6 +1319,12 @@ where i += 1; continue; } + } else { + let layer = self.layers.remove(i + 1); + PARSER.with(|ts_parser| { + let highlighter = &mut ts_parser.borrow_mut(); + highlighter.cursors.push(layer.cursor); + }); } break; } @@ -1377,30 +1341,9 @@ where } } } - - fn insert_layer(&mut self, mut layer: HighlightIterLayer<'a>) { - if let Some(sort_key) = layer.sort_key() { - let mut i = 1; - while i < self.layers.len() { - if let Some(sort_key_i) = self.layers[i].sort_key() { - if sort_key_i > sort_key { - self.layers.insert(i, layer); - return; - } - i += 1; - } else { - self.layers.remove(i); - } - } - self.layers.push(layer); - } - } } -impl<'a, F> Iterator for HighlightIter<'a, F> -where - F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, -{ +impl<'a> Iterator for HighlightIter<'a> { type Item = Result; fn next(&mut self) -> Option { @@ -1460,55 +1403,12 @@ where layer.highlight_end_stack.pop(); return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); } else { - // return self.emit_event(self.source.len(), None); - return None; + return self.emit_event(self.source.len_bytes(), None); }; let (mut match_, capture_index) = layer.captures.next().unwrap(); let mut capture = match_.captures[capture_index]; - // If this capture represents an injection, then process the injection. - if match_.pattern_index < layer.config.locals_pattern_index { - let (language_name, content_node, include_children) = - injection_for_match(layer.config, &layer.config.query, &match_, self.source); - - // Explicitly remove this match so that none of its other captures will remain - // in the stream of captures. - match_.remove(); - - // If a language is found with the given name, then add a new language layer - // to the highlighted document. - if let (Some(language_name), Some(content_node)) = (language_name, content_node) { - if let Some(config) = (self.injection_callback)(&language_name) { - let ranges = HighlightIterLayer::intersect_ranges( - &self.layers[0].ranges, - &[content_node], - include_children, - ); - if !ranges.is_empty() { - match HighlightIterLayer::new( - self.source, - self.cancellation_flag, - &mut self.injection_callback, - config, - self.layers[0].depth + 1, - ranges, - ) { - Ok(layers) => { - for layer in layers { - self.insert_layer(layer); - } - } - Err(e) => return Some(Err(e)), - } - } - } - } - - self.sort_layers(); - continue 'main; - } - // Remove from the local scope stack any local scopes that have already ended. while range.start > layer.scope_stack.last().unwrap().range.end { layer.scope_stack.pop(); @@ -1703,14 +1603,6 @@ fn injection_for_match<'a>( (language_name, content_node, include_children) } -// fn shrink_and_clear(vec: &mut Vec, capacity: usize) { -// if vec.len() > capacity { -// vec.truncate(capacity); -// vec.shrink_to_fit(); -// } -// vec.clear(); -// } - pub struct Merge { iter: I, spans: Box)>>, @@ -1877,6 +1769,8 @@ mod test { .map(String::from) .collect(); + let loader = Loader::new(Configuration { language: vec![] }); + let language = get_language(&crate::RUNTIME_DIR, "Rust").unwrap(); let config = HighlightConfiguration::new( language, @@ -1899,7 +1793,7 @@ mod test { fn main() {} ", ); - let syntax = Syntax::new(&source, Arc::new(config)); + let syntax = Syntax::new(&source, Arc::new(config), Arc::new(loader)); let tree = syntax.tree(); let root = tree.root_node(); assert_eq!(root.kind(), "source_file"); @@ -1926,7 +1820,7 @@ mod test { &doc, vec![(6, 11, Some("test".into())), (12, 17, None)].into_iter(), ); - let edits = LanguageLayer::generate_edits(doc.slice(..), transaction.changes()); + let edits = generate_edits(doc.slice(..), transaction.changes()); // transaction.apply(&mut state); assert_eq!( @@ -1955,7 +1849,7 @@ mod test { let mut doc = Rope::from("fn test() {}"); let transaction = Transaction::change(&doc, vec![(8, 8, Some("a: u32".into()))].into_iter()); - let edits = LanguageLayer::generate_edits(doc.slice(..), transaction.changes()); + let edits = generate_edits(doc.slice(..), transaction.changes()); transaction.apply(&mut doc); assert_eq!(doc, "fn test(a: u32) {}"); -- cgit v1.2.3-70-g09d2