aboutsummaryrefslogtreecommitdiff
path: root/helix-core
diff options
context:
space:
mode:
authorBlaž Hrastnik2021-11-06 15:21:03 +0000
committerBlaž Hrastnik2022-01-23 07:00:24 +0000
commit6728e4449038e9481b72251441182d508c165a9c (patch)
tree2967675e350be4b6daf815a6662a644b76239882 /helix-core
parent83bde1004d596740a7bb63ec7fe2271734492f59 (diff)
syntax: Split parsing and highlighting
Diffstat (limited to 'helix-core')
-rw-r--r--helix-core/Cargo.toml1
-rw-r--r--helix-core/src/indent.rs2
-rw-r--r--helix-core/src/syntax.rs1126
3 files changed, 512 insertions, 617 deletions
diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml
index 6d694fb3..9056b1f6 100644
--- a/helix-core/Cargo.toml
+++ b/helix-core/Cargo.toml
@@ -22,6 +22,7 @@ unicode-segmentation = "1.8"
unicode-width = "0.1"
unicode-general-category = "0.4"
# slab = "0.4.2"
+slotmap = "1.0"
tree-sitter = "0.20"
once_cell = "1.9"
arc-swap = "1"
diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs
index 1fc2b8a5..ac2a1208 100644
--- a/helix-core/src/indent.rs
+++ b/helix-core/src/indent.rs
@@ -454,7 +454,7 @@ where
let language_config = loader.language_config_for_scope("source.rust").unwrap();
let highlight_config = language_config.highlight_config(&[]).unwrap();
- let syntax = Syntax::new(&doc, highlight_config.clone());
+ let syntax = Syntax::new(&doc, highlight_config.clone(), std::sync::Arc::new(loader));
let text = doc.slice(..);
let tab_width = 4;
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index c7a3e1cc..858b9bdf 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -9,6 +9,7 @@ use crate::{
pub use helix_syntax::get_language;
use arc_swap::ArcSwap;
+use slotmap::{DefaultKey as LayerId, HopSlotMap};
use std::{
borrow::Cow,
@@ -388,9 +389,9 @@ thread_local! {
#[derive(Debug)]
pub struct Syntax {
- config: Arc<HighlightConfiguration>,
-
- root_layer: LanguageLayer,
+ layers: HopSlotMap<LayerId, LanguageLayer>,
+ root: LayerId,
+ loader: Arc<Loader>,
}
fn byte_range_to_str(range: std::ops::Range<usize>, source: RopeSlice) -> Cow<str> {
@@ -400,38 +401,36 @@ fn byte_range_to_str(range: std::ops::Range<usize>, source: RopeSlice) -> Cow<st
}
impl Syntax {
- // buffer, grammar, config, grammars, sync_timeout?
- pub fn new(
- /*language: Lang,*/ source: &Rope,
- config: Arc<HighlightConfiguration>,
- ) -> Self {
- let root_layer = LanguageLayer { tree: None };
+ pub fn new(source: &Rope, config: Arc<HighlightConfiguration>, loader: Arc<Loader>) -> Self {
+ let root_layer = LanguageLayer {
+ tree: None,
+ config,
+ depth: 0,
+ ranges: vec![Range {
+ start_byte: 0,
+ end_byte: usize::MAX,
+ start_point: Point::new(0, 0),
+ end_point: Point::new(usize::MAX, usize::MAX),
+ }],
+ };
// track markers of injections
// track scope_descriptor: a Vec of scopes for item in tree
+ let mut layers = HopSlotMap::default();
+ let root = layers.insert(root_layer);
+
let mut syntax = Self {
// grammar,
- config,
- root_layer,
+ root,
+ layers,
+ loader,
};
- // update root layer
- PARSER.with(|ts_parser| {
- // TODO: handle the returned `Result` properly.
- let _ = syntax.root_layer.parse(
- &mut ts_parser.borrow_mut(),
- &syntax.config,
- source,
- 0,
- vec![Range {
- start_byte: 0,
- end_byte: usize::MAX,
- start_point: Point::new(0, 0),
- end_point: Point::new(usize::MAX, usize::MAX),
- }],
- );
- });
+ syntax
+ .update(source, source, &ChangeSet::new(&source))
+ .unwrap();
+
syntax
}
@@ -441,30 +440,197 @@ impl Syntax {
source: &Rope,
changeset: &ChangeSet,
) -> Result<(), Error> {
+ use std::collections::VecDeque;
+ let mut queue = VecDeque::new();
+ // let source = source.slice(..);
+ let injection_callback = |language: &str| {
+ self.loader
+ .language_configuration_for_injection_string(language)
+ .and_then(|language_config| {
+ // TODO: get these theme.scopes from somewhere, probably make them settable on Loader
+ let scopes = &[
+ "attribute",
+ "constant",
+ "function.builtin",
+ "function",
+ "keyword",
+ "operator",
+ "property",
+ "punctuation",
+ "punctuation.bracket",
+ "punctuation.delimiter",
+ "string",
+ "string.special",
+ "tag",
+ "type",
+ "type.builtin",
+ "variable",
+ "variable.builtin",
+ "variable.parameter",
+ ];
+ language_config.highlight_config(
+ &scopes
+ .iter()
+ .map(|scope| scope.to_string())
+ .collect::<Vec<_>>(),
+ )
+ })
+ };
+
+ queue.push_back(self.root);
+
+ // HAXX: for now, clear all layers except root so they get re-parsed
+ self.layers.retain(|id, _| id == self.root);
+
+ // Workaround for Syntax::new() with empty changeset
+ if !changeset.is_empty() {
+ // TODO: do this in a recursive way
+ // Notify the tree about all the changes
+ let edits = generate_edits(old_source.slice(..), changeset);
+ let tree = self.layers[self.root].tree.as_mut().unwrap();
+ for edit in edits.iter().rev() {
+ // apply the edits in reverse. If we applied them in order then edit 1 would disrupt
+ // the positioning of edit 2
+ tree.edit(edit);
+ }
+ }
+
PARSER.with(|ts_parser| {
- self.root_layer.update(
- &mut ts_parser.borrow_mut(),
- &self.config,
- old_source,
- source,
- changeset,
- )
- })
+ let ts_parser = &mut ts_parser.borrow_mut();
+ let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new);
+ // TODO: might need to set cursor range
+
+ while let Some(layer_id) = queue.pop_front() {
+ // Re-parse the tree.
+ self.layers[layer_id].parse(ts_parser, source)?;
+
+ let source = source.slice(..);
+ let layer = &self.layers[layer_id];
+
+ // Process injections.
+ let matches = cursor.matches(
+ &layer.config.injections_query,
+ layer.tree().root_node(),
+ RopeProvider(source),
+ );
+ let mut injections = Vec::new();
+ for mat in matches {
+ let (language_name, content_node, include_children) = injection_for_match(
+ &layer.config,
+ &layer.config.injections_query,
+ &mat,
+ source,
+ );
+
+ // Explicitly remove this match so that none of its other captures will remain
+ // in the stream of captures.
+ mat.remove(); // TODO: is this still necessary?
+
+ // If a language is found with the given name, then add a new language layer
+ // to the highlighted document.
+ if let (Some(language_name), Some(content_node)) = (language_name, content_node)
+ {
+ if let Some(config) = (injection_callback)(&language_name) {
+ let ranges =
+ intersect_ranges(&layer.ranges, &[content_node], include_children);
+
+ if !ranges.is_empty() {
+ log::info!("{} {:?}", language_name, ranges);
+ injections.push((config, ranges));
+ }
+ }
+ }
+ }
- // TODO: deal with injections and update them too
+ // Process combined injections.
+ if let Some(combined_injections_query) = &layer.config.combined_injections_query {
+ let mut injections_by_pattern_index =
+ vec![(None, Vec::new(), false); combined_injections_query.pattern_count()];
+ let matches = cursor.matches(
+ combined_injections_query,
+ layer.tree().root_node(),
+ RopeProvider(source),
+ );
+ for mat in matches {
+ let entry = &mut injections_by_pattern_index[mat.pattern_index];
+ let (language_name, content_node, include_children) = injection_for_match(
+ &layer.config,
+ combined_injections_query,
+ &mat,
+ source,
+ );
+ if language_name.is_some() {
+ entry.0 = language_name;
+ }
+ if let Some(content_node) = content_node {
+ entry.1.push(content_node);
+ }
+ entry.2 = include_children;
+ }
+ for (lang_name, content_nodes, includes_children) in injections_by_pattern_index
+ {
+ if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
+ if let Some(config) = (injection_callback)(&lang_name) {
+ let ranges = intersect_ranges(
+ &layer.ranges,
+ &content_nodes,
+ includes_children,
+ );
+ if !ranges.is_empty() {
+ injections.push((config, ranges));
+ }
+ }
+ }
+ }
+ }
+
+ let depth = layer.depth + 1;
+ // TODO: can't inline this since matches borrows self.layers
+ for (config, ranges) in injections {
+ let layer_id = self.layers.insert(LanguageLayer {
+ tree: None,
+ config,
+ depth,
+ ranges,
+ });
+ queue.push_back(layer_id);
+ }
+ }
+
+ // Return the cursor back in the pool.
+ ts_parser.cursors.push(cursor);
+
+ Ok(()) // so we can use the try operator
+ })?;
+
+ Ok(())
}
// fn buffer_changed -> call layer.update(range, new_text) on root layer and then all marker layers
-
// call this on transaction.apply() -> buffer_changed(changes)
- //
- // fn parse(language, old_tree, ranges)
- //
+
pub fn tree(&self) -> &Tree {
- self.root_layer.tree()
+ self.layers[self.root].tree()
}
+
+ // root: Tree
+ // injections: Vec<(Tree, Range marker)>
+
+ // handle updates that go over a part of the layer by truncating them to start/end appropriately
+
+ // injections tracked by marker:
+ // if marker areas match it's fine and update
+ // if not found add new layer
+ // if length 0 then area got removed, clean up the layer
//
- // <!--update_for_injection(grammar)-->
+ // layer update:
+ // if range.len = 0 then remove the layer
+ // for change in changes { tree.edit(change) }
+ // tree = parser.parse(.., tree, ..)
+ // calculate affected range and update injections
+ // injection update:
+ // look for existing injections
+ // if present, range = (first injection start, last injection end)
// Highlighting
@@ -474,61 +640,76 @@ impl Syntax {
source: RopeSlice<'a>,
range: Option<std::ops::Range<usize>>,
cancellation_flag: Option<&'a AtomicUsize>,
- injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
) -> impl Iterator<Item = Result<HighlightEvent, Error>> + 'a {
- // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
- // prevents them from being moved. But both of these values are really just
- // pointers, so it's actually ok to move them.
-
- // reuse a cursor from the pool if possible
- let mut cursor = PARSER.with(|ts_parser| {
- let highlighter = &mut ts_parser.borrow_mut();
- highlighter.cursors.pop().unwrap_or_else(QueryCursor::new)
+ let mut layers = self
+ .layers
+ .iter()
+ .map(|(_, layer)| {
+ // Reuse a cursor from the pool if available.
+ let mut cursor = PARSER.with(|ts_parser| {
+ let highlighter = &mut ts_parser.borrow_mut();
+ highlighter.cursors.pop().unwrap_or_else(QueryCursor::new)
+ });
+
+ // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
+ // prevents them from being moved. But both of these values are really just
+ // pointers, so it's actually ok to move them.
+ let cursor_ref =
+ unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
+
+ // if reusing cursors & no range this resets to whole range
+ // TODO: handle intersect (range & layer.range)
+ // cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX));
+ cursor_ref.set_byte_range(0..usize::MAX);
+
+ let captures = cursor_ref
+ .captures(
+ &layer.config.query,
+ layer.tree().root_node(),
+ RopeProvider(source),
+ )
+ .peekable();
+
+ HighlightIterLayer {
+ highlight_end_stack: Vec::new(),
+ scope_stack: vec![LocalScope {
+ inherits: false,
+ range: 0..usize::MAX,
+ local_defs: Vec::new(),
+ }],
+ cursor,
+ _tree: None,
+ captures,
+ config: layer.config.as_ref(), // TODO: just reuse
+ depth: layer.depth, // TODO: just reuse
+ ranges: layer.ranges.clone(),
+ }
+ })
+ .collect::<Vec<_>>();
+
+ log::info!("--");
+
+ // HAXX: arrange layers by byte range, with deeper layers positioned first
+ layers.sort_by_key(|layer| {
+ (
+ layer.ranges.first().cloned(),
+ std::cmp::Reverse(layer.depth),
+ )
});
- let tree_ref = self.tree();
- let cursor_ref = unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
- let query_ref = &self.config.query;
- let config_ref = self.config.as_ref();
-
- // if reusing cursors & no range this resets to whole range
- cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX));
-
- let captures = cursor_ref
- .captures(query_ref, tree_ref.root_node(), RopeProvider(source))
- .peekable();
-
- // manually craft the root layer based on the existing tree
- let layer = HighlightIterLayer {
- highlight_end_stack: Vec::new(),
- scope_stack: vec![LocalScope {
- inherits: false,
- range: 0..usize::MAX,
- local_defs: Vec::new(),
- }],
- cursor,
- depth: 0,
- _tree: None,
- captures,
- config: config_ref,
- ranges: vec![Range {
- start_byte: 0,
- end_byte: usize::MAX,
- start_point: Point::new(0, 0),
- end_point: Point::new(usize::MAX, usize::MAX),
- }],
- };
let mut result = HighlightIter {
source,
byte_offset: range.map_or(0, |r| r.start), // TODO: simplify
- injection_callback,
cancellation_flag,
iter_count: 0,
- layers: vec![layer],
+ layers,
next_event: None,
last_highlight_range: None,
};
result.sort_layers();
+ for layer in &result.layers {
+ log::info!("> {:?} {:?}", layer.depth, layer.ranges); // <- for some reason layers are reversed here
+ }
result
}
// on_tokenize
@@ -556,234 +737,155 @@ impl Syntax {
pub struct LanguageLayer {
// mode
// grammar
- // depth
+ pub config: Arc<HighlightConfiguration>,
pub(crate) tree: Option<Tree>,
+ pub ranges: Vec<Range>,
+ pub depth: usize,
}
impl LanguageLayer {
- // pub fn new() -> Self {
- // Self { tree: None }
- // }
-
pub fn tree(&self) -> &Tree {
// TODO: no unwrap
self.tree.as_ref().unwrap()
}
- fn parse(
- &mut self,
- ts_parser: &mut TsParser,
- config: &HighlightConfiguration,
- source: &Rope,
- _depth: usize,
- ranges: Vec<Range>,
- ) -> Result<(), Error> {
- if ts_parser.parser.set_included_ranges(&ranges).is_ok() {
- ts_parser
- .parser
- .set_language(config.language)
- .map_err(|_| Error::InvalidLanguage)?;
-
- // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) };
- let tree = ts_parser
- .parser
- .parse_with(
- &mut |byte, _| {
- if byte <= source.len_bytes() {
- let (chunk, start_byte, _, _) = source.chunk_at_byte(byte);
- chunk[byte - start_byte..].as_bytes()
- } else {
- // out of range
- &[]
- }
- },
- self.tree.as_ref(),
- )
- .ok_or(Error::Cancelled)?;
-
- self.tree = Some(tree)
- }
+ fn parse(&mut self, ts_parser: &mut TsParser, source: &Rope) -> Result<(), Error> {
+ ts_parser.parser.set_included_ranges(&self.ranges).unwrap();
+
+ ts_parser
+ .parser
+ .set_language(self.config.language)
+ .map_err(|_| Error::InvalidLanguage)?;
+
+ // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) };
+ let tree = ts_parser
+ .parser
+ .parse_with(
+ &mut |byte, _| {
+ if byte <= source.len_bytes() {
+ let (chunk, start_byte, _, _) = source.chunk_at_byte(byte);
+ chunk[byte - start_byte..].as_bytes()
+ } else {
+ // out of range
+ &[]
+ }
+ },
+ self.tree.as_ref(),
+ )
+ .ok_or(Error::Cancelled)?;
+ // unsafe { ts_parser.parser.set_cancellation_flag(None) };
+ self.tree = Some(tree);
Ok(())
}
+}
- pub(crate) fn generate_edits(
- old_text: RopeSlice,
- changeset: &ChangeSet,
- ) -> Vec<tree_sitter::InputEdit> {
- use Operation::*;
- let mut old_pos = 0;
+pub(crate) fn generate_edits(
+ old_text: RopeSlice,
+ changeset: &ChangeSet,
+) -> Vec<tree_sitter::InputEdit> {
+ use Operation::*;
+ let mut old_pos = 0;
- let mut edits = Vec::new();
+ let mut edits = Vec::new();
- let mut iter = changeset.changes.iter().peekable();
+ let mut iter = changeset.changes.iter().peekable();
- // TODO; this is a lot easier with Change instead of Operation.
+ // TODO; this is a lot easier with Change instead of Operation.
- fn point_at_pos(text: RopeSlice, pos: usize) -> (usize, Point) {
- let byte = text.char_to_byte(pos); // <- attempted to index past end
- let line = text.char_to_line(pos);
- let line_start_byte = text.line_to_byte(line);
- let col = byte - line_start_byte;
+ fn point_at_pos(text: RopeSlice, pos: usize) -> (usize, Point) {
+ let byte = text.char_to_byte(pos); // <- attempted to index past end
+ let line = text.char_to_line(pos);
+ let line_start_byte = text.line_to_byte(line);
+ let col = byte - line_start_byte;
- (byte, Point::new(line, col))
- }
+ (byte, Point::new(line, col))
+ }
- fn traverse(point: Point, text: &Tendril) -> Point {
- let Point {
- mut row,
- mut column,
- } = point;
-
- // TODO: there should be a better way here.
- let mut chars = text.chars().peekable();
- while let Some(ch) = chars.next() {
- if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
- row += 1;
- column = 0;
- } else {
- column += 1;
- }
+ fn traverse(point: Point, text: &Tendril) -> Point {
+ let Point {
+ mut row,
+ mut column,
+ } = point;
+
+ // TODO: there should be a better way here.
+ let mut chars = text.chars().peekable();
+ while let Some(ch) = chars.next() {
+ if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
+ row += 1;
+ column = 0;
+ } else {
+ column += 1;
}
- Point { row, column }
}
+ Point { row, column }
+ }
- while let Some(change) = iter.next() {
- let len = match change {
- Delete(i) | Retain(i) => *i,
- Insert(_) => 0,
- };
- let mut old_end = old_pos + len;
+ while let Some(change) = iter.next() {
+ let len = match change {
+ Delete(i) | Retain(i) => *i,
+ Insert(_) => 0,
+ };
+ let mut old_end = old_pos + len;
+
+ match change {
+ Retain(_) => {}
+ Delete(_) => {
+ let (start_byte, start_position) = point_at_pos(old_text, old_pos);
+ let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end);
+
+ // TODO: Position also needs to be byte based...
+ // let byte = char_to_byte(old_pos)
+ // let line = char_to_line(old_pos)
+ // let line_start_byte = line_to_byte()
+ // Position::new(line, line_start_byte - byte)
+
+ // deletion
+ edits.push(tree_sitter::InputEdit {
+ start_byte, // old_pos to byte
+ old_end_byte, // old_end to byte
+ new_end_byte: start_byte, // old_pos to byte
+ start_position, // old pos to coords
+ old_end_position, // old_end to coords
+ new_end_position: start_position, // old pos to coords
+ });
+ }
+ Insert(s) => {
+ let (start_byte, start_position) = point_at_pos(old_text, old_pos);
- match change {
- Retain(_) => {}
- Delete(_) => {
- let (start_byte, start_position) = point_at_pos(old_text, old_pos);
+ // a subsequent delete means a replace, consume it
+ if let Some(Delete(len)) = iter.peek() {
+ old_end = old_pos + len;
let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end);
- // TODO: Position also needs to be byte based...
- // let byte = char_to_byte(old_pos)
- // let line = char_to_line(old_pos)
- // let line_start_byte = line_to_byte()
- // Position::new(line, line_start_byte - byte)
+ iter.next();
- // deletion
+ // replacement
edits.push(tree_sitter::InputEdit {
- start_byte, // old_pos to byte
- old_end_byte, // old_end to byte
- new_end_byte: start_byte, // old_pos to byte
- start_position, // old pos to coords
- old_end_position, // old_end to coords
- new_end_position: start_position, // old pos to coords
+ start_byte, // old_pos to byte
+ old_end_byte, // old_end to byte
+ new_end_byte: start_byte + s.len(), // old_pos to byte + s.len()
+ start_position, // old pos to coords
+ old_end_position, // old_end to coords
+ new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over)
+ });
+ } else {
+ // insert
+ edits.push(tree_sitter::InputEdit {
+ start_byte, // old_pos to byte
+ old_end_byte: start_byte, // same
+ new_end_byte: start_byte + s.len(), // old_pos + s.len()
+ start_position, // old pos to coords
+ old_end_position: start_position, // same
+ new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over)
});
- }
- Insert(s) => {
- let (start_byte, start_position) = point_at_pos(old_text, old_pos);
-
- // a subsequent delete means a replace, consume it
- if let Some(Delete(len)) = iter.peek() {
- old_end = old_pos + len;
- let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end);
-
- iter.next();
-
- // replacement
- edits.push(tree_sitter::InputEdit {
- start_byte, // old_pos to byte
- old_end_byte, // old_end to byte
- new_end_byte: start_byte + s.len(), // old_pos to byte + s.len()
- start_position, // old pos to coords
- old_end_position, // old_end to coords
- new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over)
- });
- } else {
- // insert
- edits.push(tree_sitter::InputEdit {
- start_byte, // old_pos to byte
- old_end_byte: start_byte, // same
- new_end_byte: start_byte + s.len(), // old_pos + s.len()
- start_position, // old pos to coords
- old_end_position: start_position, // same
- new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over)
- });
- }
}
}
- old_pos = old_end;
- }
- edits
- }
-
- fn update(
- &mut self,
- ts_parser: &mut TsParser,
- config: &HighlightConfiguration,
- old_source: &Rope,
- source: &Rope,
- changeset: &ChangeSet,
- ) -> Result<(), Error> {
- if changeset.is_empty() {
- return Ok(());
}
-
- let edits = Self::generate_edits(old_source.slice(..), changeset);
-
- // Notify the tree about all the changes
- for edit in edits.iter().rev() {
- // apply the edits in reverse. If we applied them in order then edit 1 would disrupt
- // the positioning of edit 2
- self.tree.as_mut().unwrap().edit(edit);
- }
-
- self.parse(
- ts_parser,
- config,
- source,
- 0,
- // TODO: what to do about this range on update
- vec![Range {
- start_byte: 0,
- end_byte: usize::MAX,
- start_point: Point::new(0, 0),
- end_point: Point::new(usize::MAX, usize::MAX),
- }],
- )
+ old_pos = old_end;
}
-
- // fn highlight_iter() -> same as Mode but for this layer. Mode composits these
- // fn buffer_changed
- // fn update(range)
- // fn update_injections()
+ edits
}
-// -- refactored from tree-sitter-highlight to be able to retain state
-// TODO: add seek() to iter
-
-// problem: any time a layer is updated it must update it's injections on the parent (potentially
-// removing some from use)
-// can't modify to vec and exist in it at the same time since that would violate borrows
-// maybe we can do with an arena
-// maybe just caching on the top layer and nevermind the injections for now?
-//
-// Grammar {
-// layers: Vec<Box<Layer>> to prevent memory moves when vec is modified
-// }
-// injections tracked by marker:
-// if marker areas match it's fine and update
-// if not found add new layer
-// if length 0 then area got removed, clean up the layer
-//
-// layer update:
-// if range.len = 0 then remove the layer
-// for change in changes { tree.edit(change) }
-// tree = parser.parse(.., tree, ..)
-// calculate affected range and update injections
-// injection update:
-// look for existing injections
-// if present, range = (first injection start, last injection end)
-//
-// For now cheat and just throw out non-root layers if they exist. This should still improve
-// parsing in majority of cases.
-
use std::sync::atomic::{AtomicUsize, Ordering};
use std::{iter, mem, ops, str, usize};
use tree_sitter::{
@@ -820,8 +922,8 @@ pub enum HighlightEvent {
pub struct HighlightConfiguration {
pub language: Grammar,
pub query: Query,
+ injections_query: Query,
combined_injections_query: Option<Query>,
- locals_pattern_index: usize,
highlights_pattern_index: usize,
highlight_indices: ArcSwap<Vec<Option<Highlight>>>,
non_local_variable_patterns: Vec<bool>,
@@ -848,13 +950,9 @@ struct LocalScope<'a> {
}
#[derive(Debug)]
-struct HighlightIter<'a, F>
-where
- F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
-{
+struct HighlightIter<'a> {
source: RopeSlice<'a>,
byte_offset: usize,
- injection_callback: F,
cancellation_flag: Option<&'a AtomicUsize>,
layers: Vec<HighlightIterLayer<'a>>,
iter_count: usize,
@@ -894,8 +992,8 @@ struct HighlightIterLayer<'a> {
config: &'a HighlightConfiguration,
highlight_end_stack: Vec<usize>,
scope_stack: Vec<LocalScope<'a>>,
- ranges: Vec<Range>,
depth: usize,
+ ranges: Vec<Range>, // TEMP
}
impl<'a> fmt::Debug for HighlightIterLayer<'a> {
@@ -927,38 +1025,32 @@ impl HighlightConfiguration {
) -> Result<Self, QueryError> {
// Concatenate the query strings, keeping track of the start offset of each section.
let mut query_source = String::new();
- query_source.push_str(injection_query);
- let locals_query_offset = query_source.len();
query_source.push_str(locals_query);
let highlights_query_offset = query_source.len();
query_source.push_str(highlights_query);
// Construct a single query by concatenating the three query strings, but record the
// range of pattern indices that belong to each individual string.
- let mut query = Query::new(language, &query_source)?;
- let mut locals_pattern_index = 0;
+ let query = Query::new(language, &query_source)?;
let mut highlights_pattern_index = 0;
for i in 0..(query.pattern_count()) {
let pattern_offset = query.start_byte_for_pattern(i);
if pattern_offset < highlights_query_offset {
- if pattern_offset < highlights_query_offset {
- highlights_pattern_index += 1;
- }
- if pattern_offset < locals_query_offset {
- locals_pattern_index += 1;
- }
+ highlights_pattern_index += 1;
}
}
+ let mut injections_query = Query::new(language, injection_query)?;
+
// Construct a separate query just for dealing with the 'combined injections'.
// Disable the combined injection patterns in the main query.
let mut combined_injections_query = Query::new(language, injection_query)?;
let mut has_combined_queries = false;
- for pattern_index in 0..locals_pattern_index {
- let settings = query.property_settings(pattern_index);
+ for pattern_index in 0..injections_query.pattern_count() {
+ let settings = injections_query.property_settings(pattern_index);
if settings.iter().any(|s| &*s.key == "injection.combined") {
has_combined_queries = true;
- query.disable_pattern(pattern_index);
+ injections_query.disable_pattern(pattern_index);
} else {
combined_injections_query.disable_pattern(pattern_index);
}
@@ -990,8 +1082,6 @@ impl HighlightConfiguration {
for (i, name) in query.capture_names().iter().enumerate() {
let i = Some(i as u32);
match name.as_str() {
- "injection.content" => injection_content_capture_index = i,
- "injection.language" => injection_language_capture_index = i,
"local.definition" => local_def_capture_index = i,
"local.definition-value" => local_def_value_capture_index = i,
"local.reference" => local_ref_capture_index = i,
@@ -1000,12 +1090,21 @@ impl HighlightConfiguration {
}
}
+ for (i, name) in injections_query.capture_names().iter().enumerate() {
+ let i = Some(i as u32);
+ match name.as_str() {
+ "injection.content" => injection_content_capture_index = i,
+ "injection.language" => injection_language_capture_index = i,
+ _ => {}
+ }
+ }
+
let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]);
Ok(Self {
language,
query,
+ injections_query,
combined_injections_query,
- locals_pattern_index,
highlights_pattern_index,
highlight_indices,
non_local_variable_patterns,
@@ -1070,238 +1169,6 @@ impl HighlightConfiguration {
}
impl<'a> HighlightIterLayer<'a> {
- /// Create a new 'layer' of highlighting for this document.
- ///
- /// In the even that the new layer contains "combined injections" (injections where multiple
- /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and
- /// added to the returned vector.
- fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>(
- source: RopeSlice<'a>,
- cancellation_flag: Option<&'a AtomicUsize>,
- injection_callback: &mut F,
- mut config: &'a HighlightConfiguration,
- mut depth: usize,
- mut ranges: Vec<Range>,
- ) -> Result<Vec<Self>, Error> {
- let mut result = Vec::with_capacity(1);
- let mut queue = Vec::new();
- loop {
- // --> Tree parsing part
-
- PARSER.with(|ts_parser| {
- let highlighter = &mut ts_parser.borrow_mut();
-
- if highlighter.parser.set_included_ranges(&ranges).is_ok() {
- highlighter
- .parser
- .set_language(config.language)
- .map_err(|_| Error::InvalidLanguage)?;
-
- unsafe { highlighter.parser.set_cancellation_flag(cancellation_flag) };
- let tree = highlighter
- .parser
- .parse_with(
- &mut |byte, _| {
- if byte <= source.len_bytes() {
- let (chunk, start_byte, _, _) = source.chunk_at_byte(byte);
- chunk[byte - start_byte..].as_bytes()
- } else {
- // out of range
- &[]
- }
- },
- None,
- )
- .ok_or(Error::Cancelled)?;
- unsafe { highlighter.parser.set_cancellation_flag(None) };
- let mut cursor = highlighter.cursors.pop().unwrap_or_else(QueryCursor::new);
-
- // Process combined injections.
- if let Some(combined_injections_query) = &config.combined_injections_query {
- let mut injections_by_pattern_index = vec![
- (None, Vec::new(), false);
- combined_injections_query
- .pattern_count()
- ];
- let matches = cursor.matches(
- combined_injections_query,
- tree.root_node(),
- RopeProvider(source),
- );
- for mat in matches {
- let entry = &mut injections_by_pattern_index[mat.pattern_index];
- let (language_name, content_node, include_children) =
- injection_for_match(
- config,
- combined_injections_query,
- &mat,
- source,
- );
- if language_name.is_some() {
- entry.0 = language_name;
- }
- if let Some(content_node) = content_node {
- entry.1.push(content_node);
- }
- entry.2 = include_children;
- }
- for (lang_name, content_nodes, includes_children) in
- injections_by_pattern_index
- {
- if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty())
- {
- if let Some(next_config) = (injection_callback)(&lang_name) {
- let ranges = Self::intersect_ranges(
- &ranges,
- &content_nodes,
- includes_children,
- );
- if !ranges.is_empty() {
- queue.push((next_config, depth + 1, ranges));
- }
- }
- }
- }
- }
-
- // --> Highlighting query part
-
- // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
- // prevents them from being moved. But both of these values are really just
- // pointers, so it's actually ok to move them.
- let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(&tree) };
- let cursor_ref =
- unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
- let captures = cursor_ref
- .captures(&config.query, tree_ref.root_node(), RopeProvider(source))
- .peekable();
-
- result.push(HighlightIterLayer {
- highlight_end_stack: Vec::new(),
- scope_stack: vec![LocalScope {
- inherits: false,
- range: 0..usize::MAX,
- local_defs: Vec::new(),
- }],
- cursor,
- depth,
- _tree: Some(tree),
- captures,
- config,
- ranges,
- });
- }
-
- Ok(()) // so we can use the try operator
- })?;
-
- if queue.is_empty() {
- break;
- }
-
- let (next_config, next_depth, next_ranges) = queue.remove(0);
- config = next_config;
- depth = next_depth;
- ranges = next_ranges;
- }
-
- Ok(result)
- }
-
- // Compute the ranges that should be included when parsing an injection.
- // This takes into account three things:
- // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
- // * `nodes` - Every injection takes place within a set of nodes. The injection ranges
- // are the ranges of those nodes.
- // * `includes_children` - For some injections, the content nodes' children should be
- // excluded from the nested document, so that only the content nodes' *own* content
- // is reparsed. For other injections, the content nodes' entire ranges should be
- // reparsed, including the ranges of their children.
- fn intersect_ranges(
- parent_ranges: &[Range],
- nodes: &[Node],
- includes_children: bool,
- ) -> Vec<Range> {
- let mut cursor = nodes[0].walk();
- let mut result = Vec::new();
- let mut parent_range_iter = parent_ranges.iter();
- let mut parent_range = parent_range_iter
- .next()
- .expect("Layers should only be constructed with non-empty ranges vectors");
- for node in nodes.iter() {
- let mut preceding_range = Range {
- start_byte: 0,
- start_point: Point::new(0, 0),
- end_byte: node.start_byte(),
- end_point: node.start_position(),
- };
- let following_range = Range {
- start_byte: node.end_byte(),
- start_point: node.end_position(),
- end_byte: usize::MAX,
- end_point: Point::new(usize::MAX, usize::MAX),
- };
-
- for excluded_range in node
- .children(&mut cursor)
- .filter_map(|child| {
- if includes_children {
- None
- } else {
- Some(child.range())
- }
- })
- .chain([following_range].iter().cloned())
- {
- let mut range = Range {
- start_byte: preceding_range.end_byte,
- start_point: preceding_range.end_point,
- end_byte: excluded_range.start_byte,
- end_point: excluded_range.start_point,
- };
- preceding_range = excluded_range;
-
- if range.end_byte < parent_range.start_byte {
- continue;
- }
-
- while parent_range.start_byte <= range.end_byte {
- if parent_range.end_byte > range.start_byte {
- if range.start_byte < parent_range.start_byte {
- range.start_byte = parent_range.start_byte;
- range.start_point = parent_range.start_point;
- }
-
- if parent_range.end_byte < range.end_byte {
- if range.start_byte < parent_range.end_byte {
- result.push(Range {
- start_byte: range.start_byte,
- start_point: range.start_point,
- end_byte: parent_range.end_byte,
- end_point: parent_range.end_point,
- });
- }
- range.start_byte = parent_range.end_byte;
- range.start_point = parent_range.end_point;
- } else {
- if range.start_byte < range.end_byte {
- result.push(range);
- }
- break;
- }
- }
-
- if let Some(next_range) = parent_range_iter.next() {
- parent_range = next_range;
- } else {
- return result;
- }
- }
- }
- }
- result
- }
-
// First, sort scope boundaries by their byte offset in the document. At a
// given position, emit scope endings before scope beginnings. Finally, emit
// scope boundaries from deeper layers first.
@@ -1327,10 +1194,101 @@ impl<'a> HighlightIterLayer<'a> {
}
}
-impl<'a, F> HighlightIter<'a, F>
-where
- F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
-{
+// Compute the ranges that should be included when parsing an injection.
+// This takes into account three things:
+// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
+// * `nodes` - Every injection takes place within a set of nodes. The injection ranges
+// are the ranges of those nodes.
+// * `includes_children` - For some injections, the content nodes' children should be
+// excluded from the nested document, so that only the content nodes' *own* content
+// is reparsed. For other injections, the content nodes' entire ranges should be
+// reparsed, including the ranges of their children.
+fn intersect_ranges(
+ parent_ranges: &[Range],
+ nodes: &[Node],
+ includes_children: bool,
+) -> Vec<Range> {
+ let mut cursor = nodes[0].walk();
+ let mut result = Vec::new();
+ let mut parent_range_iter = parent_ranges.iter();
+ let mut parent_range = parent_range_iter
+ .next()
+ .expect("Layers should only be constructed with non-empty ranges vectors");
+ for node in nodes.iter() {
+ let mut preceding_range = Range {
+ start_byte: 0,
+ start_point: Point::new(0, 0),
+ end_byte: node.start_byte(),
+ end_point: node.start_position(),
+ };
+ let following_range = Range {
+ start_byte: node.end_byte(),
+ start_point: node.end_position(),
+ end_byte: usize::MAX,
+ end_point: Point::new(usize::MAX, usize::MAX),
+ };
+
+ for excluded_range in node
+ .children(&mut cursor)
+ .filter_map(|child| {
+ if includes_children {
+ None
+ } else {
+ Some(child.range())
+ }
+ })
+ .chain([following_range].iter().cloned())
+ {
+ let mut range = Range {
+ start_byte: preceding_range.end_byte,
+ start_point: preceding_range.end_point,
+ end_byte: excluded_range.start_byte,
+ end_point: excluded_range.start_point,
+ };
+ preceding_range = excluded_range;
+
+ if range.end_byte < parent_range.start_byte {
+ continue;
+ }
+
+ while parent_range.start_byte <= range.end_byte {
+ if parent_range.end_byte > range.start_byte {
+ if range.start_byte < parent_range.start_byte {
+ range.start_byte = parent_range.start_byte;
+ range.start_point = parent_range.start_point;
+ }
+
+ if parent_range.end_byte < range.end_byte {
+ if range.start_byte < parent_range.end_byte {
+ result.push(Range {
+ start_byte: range.start_byte,
+ start_point: range.start_point,
+ end_byte: parent_range.end_byte,
+ end_point: parent_range.end_point,
+ });
+ }
+ range.start_byte = parent_range.end_byte;
+ range.start_point = parent_range.end_point;
+ } else {
+ if range.start_byte < range.end_byte {
+ result.push(range);
+ }
+ break;
+ }
+ }
+
+ if let Some(next_range) = parent_range_iter.next() {
+ parent_range = next_range;
+ } else {
+ return result;
+ }
+ }
+ }
+ }
+ result
+}
+
+impl<'a> HighlightIter<'a> {
fn emit_event(
&mut self,
offset: usize,
@@ -1361,6 +1319,12 @@ where
i += 1;
continue;
}
+ } else {
+ let layer = self.layers.remove(i + 1);
+ PARSER.with(|ts_parser| {
+ let highlighter = &mut ts_parser.borrow_mut();
+ highlighter.cursors.push(layer.cursor);
+ });
}
break;
}
@@ -1377,30 +1341,9 @@ where
}
}
}
-
- fn insert_layer(&mut self, mut layer: HighlightIterLayer<'a>) {
- if let Some(sort_key) = layer.sort_key() {
- let mut i = 1;
- while i < self.layers.len() {
- if let Some(sort_key_i) = self.layers[i].sort_key() {
- if sort_key_i > sort_key {
- self.layers.insert(i, layer);
- return;
- }
- i += 1;
- } else {
- self.layers.remove(i);
- }
- }
- self.layers.push(layer);
- }
- }
}
-impl<'a, F> Iterator for HighlightIter<'a, F>
-where
- F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
-{
+impl<'a> Iterator for HighlightIter<'a> {
type Item = Result<HighlightEvent, Error>;
fn next(&mut self) -> Option<Self::Item> {
@@ -1460,55 +1403,12 @@ where
layer.highlight_end_stack.pop();
return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
} else {
- // return self.emit_event(self.source.len(), None);
- return None;
+ return self.emit_event(self.source.len_bytes(), None);
};
let (mut match_, capture_index) = layer.captures.next().unwrap();
let mut capture = match_.captures[capture_index];
- // If this capture represents an injection, then process the injection.
- if match_.pattern_index < layer.config.locals_pattern_index {
- let (language_name, content_node, include_children) =
- injection_for_match(layer.config, &layer.config.query, &match_, self.source);
-
- // Explicitly remove this match so that none of its other captures will remain
- // in the stream of captures.
- match_.remove();
-
- // If a language is found with the given name, then add a new language layer
- // to the highlighted document.
- if let (Some(language_name), Some(content_node)) = (language_name, content_node) {
- if let Some(config) = (self.injection_callback)(&language_name) {
- let ranges = HighlightIterLayer::intersect_ranges(
- &self.layers[0].ranges,
- &[content_node],
- include_children,
- );
- if !ranges.is_empty() {
- match HighlightIterLayer::new(
- self.source,
- self.cancellation_flag,
- &mut self.injection_callback,
- config,
- self.layers[0].depth + 1,
- ranges,
- ) {
- Ok(layers) => {
- for layer in layers {
- self.insert_layer(layer);
- }
- }
- Err(e) => return Some(Err(e)),
- }
- }
- }
- }
-
- self.sort_layers();
- continue 'main;
- }
-
// Remove from the local scope stack any local scopes that have already ended.
while range.start > layer.scope_stack.last().unwrap().range.end {
layer.scope_stack.pop();
@@ -1703,14 +1603,6 @@ fn injection_for_match<'a>(
(language_name, content_node, include_children)
}
-// fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) {
-// if vec.len() > capacity {
-// vec.truncate(capacity);
-// vec.shrink_to_fit();
-// }
-// vec.clear();
-// }
-
pub struct Merge<I> {
iter: I,
spans: Box<dyn Iterator<Item = (usize, std::ops::Range<usize>)>>,
@@ -1877,6 +1769,8 @@ mod test {
.map(String::from)
.collect();
+ let loader = Loader::new(Configuration { language: vec![] });
+
let language = get_language(&crate::RUNTIME_DIR, "Rust").unwrap();
let config = HighlightConfiguration::new(
language,
@@ -1899,7 +1793,7 @@ mod test {
fn main() {}
",
);
- let syntax = Syntax::new(&source, Arc::new(config));
+ let syntax = Syntax::new(&source, Arc::new(config), Arc::new(loader));
let tree = syntax.tree();
let root = tree.root_node();
assert_eq!(root.kind(), "source_file");
@@ -1926,7 +1820,7 @@ mod test {
&doc,
vec![(6, 11, Some("test".into())), (12, 17, None)].into_iter(),
);
- let edits = LanguageLayer::generate_edits(doc.slice(..), transaction.changes());
+ let edits = generate_edits(doc.slice(..), transaction.changes());
// transaction.apply(&mut state);
assert_eq!(
@@ -1955,7 +1849,7 @@ mod test {
let mut doc = Rope::from("fn test() {}");
let transaction =
Transaction::change(&doc, vec![(8, 8, Some("a: u32".into()))].into_iter());
- let edits = LanguageLayer::generate_edits(doc.slice(..), transaction.changes());
+ let edits = generate_edits(doc.slice(..), transaction.changes());
transaction.apply(&mut doc);
assert_eq!(doc, "fn test(a: u32) {}");