summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Davis2024-01-10 20:58:44 +0000
committerBlaž Hrastnik2024-03-23 06:32:34 +0000
commitb1222f06640c02feb1b87b988d6bca53fdddb9c0 (patch)
treeeec9a682fa9faa020bf3bfe0647af4626a1a9edd
parent6dd46bfe1c87a4ba4d3ae2feef1270a90ef5f67b (diff)
Add a TreeCursor type that travels over injection layers
This uses the layer parentage information from the parent commit to traverse the layers. It's a similar API to `tree_sitter:TreeCursor` but internally it does not use a `tree_sitter::TreeCursor` currently because that interface is behaving very unexpectedly. Using the `next_sibling`/`prev_sibling`/`parent` API on `tree_sitter::Node` reflects the previous code's behavior so this should result in no surprising changes.
-rw-r--r--helix-core/src/syntax.rs16
-rw-r--r--helix-core/src/syntax/tree_cursor.rs160
2 files changed, 173 insertions, 3 deletions
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index 6414f3ce..78abc0b0 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -1,3 +1,5 @@
+mod tree_cursor;
+
use crate::{
auto_pairs::AutoPairs,
chars::char_is_line_ending,
@@ -32,6 +34,8 @@ use serde::{ser::SerializeSeq, Deserialize, Serialize};
use helix_loader::grammar::{get_language, load_runtime_file};
+pub use tree_cursor::TreeCursor;
+
fn deserialize_regex<'de, D>(deserializer: D) -> Result<Option<Regex>, D::Error>
where
D: serde::Deserializer<'de>,
@@ -1495,6 +1499,12 @@ impl Syntax {
.descendant_for_byte_range(start, end)
}
+ pub fn walk(&self) -> TreeCursor<'_> {
+ // data structure to find the smallest range that contains a point
+ // when some of the ranges in the structure can overlap.
+ TreeCursor::new(&self.layers, self.root)
+ }
+
// Commenting
// comment_strings_for_pos
// is_commented
@@ -1723,7 +1733,7 @@ use std::sync::atomic::{AtomicUsize, Ordering};
use std::{iter, mem, ops, str, usize};
use tree_sitter::{
Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError,
- QueryMatch, Range, TextProvider, Tree, TreeCursor,
+ QueryMatch, Range, TextProvider, Tree,
};
const CANCELLATION_CHECK_INTERVAL: usize = 100;
@@ -2657,7 +2667,7 @@ pub fn pretty_print_tree<W: fmt::Write>(fmt: &mut W, node: Node) -> fmt::Result
fn pretty_print_tree_impl<W: fmt::Write>(
fmt: &mut W,
- cursor: &mut TreeCursor,
+ cursor: &mut tree_sitter::TreeCursor,
depth: usize,
) -> fmt::Result {
let node = cursor.node();
@@ -2967,7 +2977,7 @@ mod test {
// rule but `name` and `body` belong to an unnamed helper `_method_rest`.
// This can cause a bug with a pretty-printing implementation that
// uses `Node::field_name_for_child` to determine field names but is
- // fixed when using `TreeCursor::field_name`.
+ // fixed when using `tree_sitter::TreeCursor::field_name`.
let source = "def self.method_name
true
end";
diff --git a/helix-core/src/syntax/tree_cursor.rs b/helix-core/src/syntax/tree_cursor.rs
new file mode 100644
index 00000000..d9d140c9
--- /dev/null
+++ b/helix-core/src/syntax/tree_cursor.rs
@@ -0,0 +1,160 @@
+use std::{cmp::Reverse, ops::Range};
+
+use super::{LanguageLayer, LayerId};
+
+use slotmap::HopSlotMap;
+use tree_sitter::Node;
+
+/// The byte range of an injection layer.
+///
+/// Injection ranges may overlap, but all overlapping parts are subsets of their parent ranges.
+/// This allows us to sort the ranges ahead of time in order to efficiently find a range that
+/// contains a point with maximum depth.
+#[derive(Debug)]
+struct InjectionRange {
+ start: usize,
+ end: usize,
+ layer_id: LayerId,
+ depth: u32,
+}
+
+pub struct TreeCursor<'a> {
+ layers: &'a HopSlotMap<LayerId, LanguageLayer>,
+ root: LayerId,
+ current: LayerId,
+ injection_ranges: Vec<InjectionRange>,
+ // TODO: Ideally this would be a `tree_sitter::TreeCursor<'a>` but
+ // that returns very surprising results in testing.
+ cursor: Node<'a>,
+}
+
+impl<'a> TreeCursor<'a> {
+ pub(super) fn new(layers: &'a HopSlotMap<LayerId, LanguageLayer>, root: LayerId) -> Self {
+ let mut injection_ranges = Vec::new();
+
+ for (layer_id, layer) in layers.iter() {
+ // Skip the root layer
+ if layer.parent.is_none() {
+ continue;
+ }
+ for byte_range in layer.ranges.iter() {
+ let range = InjectionRange {
+ start: byte_range.start_byte,
+ end: byte_range.end_byte,
+ layer_id,
+ depth: layer.depth,
+ };
+ injection_ranges.push(range);
+ }
+ }
+
+ injection_ranges.sort_unstable_by_key(|range| (range.end, Reverse(range.depth)));
+
+ let cursor = layers[root].tree().root_node();
+
+ Self {
+ layers,
+ root,
+ current: root,
+ injection_ranges,
+ cursor,
+ }
+ }
+
+ pub fn node(&self) -> Node<'a> {
+ self.cursor
+ }
+
+ pub fn goto_parent(&mut self) -> bool {
+ if let Some(parent) = self.node().parent() {
+ self.cursor = parent;
+ return true;
+ }
+
+ // If we are already on the root layer, we cannot ascend.
+ if self.current == self.root {
+ return false;
+ }
+
+ // Ascend to the parent layer.
+ let range = self.node().byte_range();
+ let parent_id = self.layers[self.current]
+ .parent
+ .expect("non-root layers have a parent");
+ self.current = parent_id;
+ let root = self.layers[self.current].tree().root_node();
+ self.cursor = root
+ .descendant_for_byte_range(range.start, range.end)
+ .unwrap_or(root);
+
+ true
+ }
+
+ /// Finds the injection layer that has exactly the same range as the given `range`.
+ fn layer_id_of_byte_range(&self, search_range: Range<usize>) -> Option<LayerId> {
+ let start_idx = self
+ .injection_ranges
+ .partition_point(|range| range.end < search_range.end);
+
+ self.injection_ranges[start_idx..]
+ .iter()
+ .take_while(|range| range.end == search_range.end)
+ .find_map(|range| (range.start == search_range.start).then_some(range.layer_id))
+ }
+
+ pub fn goto_first_child(&mut self) -> bool {
+ // Check if the current node's range is an exact injection layer range.
+ if let Some(layer_id) = self
+ .layer_id_of_byte_range(self.node().byte_range())
+ .filter(|&layer_id| layer_id != self.current)
+ {
+ // Switch to the child layer.
+ self.current = layer_id;
+ self.cursor = self.layers[self.current].tree().root_node();
+ true
+ } else if let Some(child) = self.cursor.child(0) {
+ // Otherwise descend in the current tree.
+ self.cursor = child;
+ true
+ } else {
+ false
+ }
+ }
+
+ pub fn goto_next_sibling(&mut self) -> bool {
+ if let Some(sibling) = self.cursor.next_sibling() {
+ self.cursor = sibling;
+ true
+ } else {
+ false
+ }
+ }
+
+ pub fn goto_prev_sibling(&mut self) -> bool {
+ if let Some(sibling) = self.cursor.prev_sibling() {
+ self.cursor = sibling;
+ true
+ } else {
+ false
+ }
+ }
+
+ /// Finds the injection layer that contains the given start-end range.
+ fn layer_id_containing_byte_range(&self, start: usize, end: usize) -> LayerId {
+ let start_idx = self
+ .injection_ranges
+ .partition_point(|range| range.end < end);
+
+ self.injection_ranges[start_idx..]
+ .iter()
+ .take_while(|range| range.start < end)
+ .find_map(|range| (range.start <= start).then_some(range.layer_id))
+ .unwrap_or(self.root)
+ }
+
+ pub fn reset_to_byte_range(&mut self, start: usize, end: usize) {
+ self.current = self.layer_id_containing_byte_range(start, end);
+ let root = self.layers[self.current].tree().root_node();
+ self.cursor = root.descendant_for_byte_range(start, end).unwrap_or(root);
+ }
+}