diff options
author | Michael Davis | 2022-11-17 01:03:02 +0000 |
---|---|---|
committer | GitHub | 2022-11-17 01:03:02 +0000 |
commit | 94346356e7657011adf0012651ec031b98d87411 (patch) | |
tree | 694dd8d0cb604facf011b890ba482e7f802ed78b /helix-core/src/syntax.rs | |
parent | c6b83368b3b626cb63120b5ac684cc8b1b693172 (diff) |
Use TreeCursor to pretty-print :tree-sitter-subtree (#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the
field name belongs to an unnamed child node. Take this ruby example:
def self.method_name
true
end
The subtree given by tree-sitter-cli is:
(singleton_method [2, 0] - [4, 3]
object: (self [2, 4] - [2, 8])
name: (identifier [2, 9] - [2, 20])
body: (body_statement [3, 2] - [3, 6]
(true [3, 2] - [3, 6])))
But the `:tree-sitter-subtree` output was
(singleton_method
object: (self)
body: (identifier)
(body_statement (true)))
The `singleton_method` rule defines the `name` and `body` fields in an
unnamed helper rule `_method_rest` and the old implementation of
`pretty_print_tree_impl` would pass the `field_name` down from the
named `singleton_method` node.
To fix it we switch to the [TreeCursor] API which is recommended by
the tree-sitter docs for traversing the tree. `TreeCursor::field_name`
accurately determines the field name for the current cursor position
even when the node is unnamed.
[TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
Diffstat (limited to 'helix-core/src/syntax.rs')
-rw-r--r-- | helix-core/src/syntax.rs | 101 |
1 files changed, 71 insertions, 30 deletions
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index b320fcda..93bd7fe9 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -1283,7 +1283,7 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use std::{iter, mem, ops, str, usize}; use tree_sitter::{ Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, - QueryMatch, Range, TextProvider, Tree, + QueryMatch, Range, TextProvider, Tree, TreeCursor, }; const CANCELLATION_CHECK_INTERVAL: usize = 100; @@ -2153,57 +2153,68 @@ impl<I: Iterator<Item = HighlightEvent>> Iterator for Merge<I> { } } +fn node_is_visible(node: &Node) -> bool { + node.is_missing() || (node.is_named() && node.language().node_kind_is_visible(node.kind_id())) +} + pub fn pretty_print_tree<W: fmt::Write>(fmt: &mut W, node: Node) -> fmt::Result { - pretty_print_tree_impl(fmt, node, true, None, 0) + if node.child_count() == 0 { + if node_is_visible(&node) { + write!(fmt, "({})", node.kind()) + } else { + write!(fmt, "\"{}\"", node.kind()) + } + } else { + pretty_print_tree_impl(fmt, &mut node.walk(), 0) + } } fn pretty_print_tree_impl<W: fmt::Write>( fmt: &mut W, - node: Node, - is_root: bool, - field_name: Option<&str>, + cursor: &mut TreeCursor, depth: usize, ) -> fmt::Result { - fn is_visible(node: Node) -> bool { - node.is_missing() - || (node.is_named() && node.language().node_kind_is_visible(node.kind_id())) - } + let node = cursor.node(); + let visible = node_is_visible(&node); - if is_visible(node) { + if visible { let indentation_columns = depth * 2; write!(fmt, "{:indentation_columns$}", "")?; - if let Some(field_name) = field_name { + if let Some(field_name) = cursor.field_name() { write!(fmt, "{}: ", field_name)?; } write!(fmt, "({}", node.kind())?; - } else if is_root { - write!(fmt, "(\"{}\")", node.kind())?; } - for child_idx in 0..node.child_count() { - if let Some(child) = node.child(child_idx) { - if is_visible(child) { + // Handle children. + if cursor.goto_first_child() { + loop { + if node_is_visible(&cursor.node()) { fmt.write_char('\n')?; } - pretty_print_tree_impl( - fmt, - child, - false, - node.field_name_for_child(child_idx as u32), - depth + 1, - )?; + pretty_print_tree_impl(fmt, cursor, depth + 1)?; + + if !cursor.goto_next_sibling() { + break; + } } + + let moved = cursor.goto_parent(); + // The parent of the first child must exist, and must be `node`. + debug_assert!(moved); + debug_assert!(cursor.node() == node); } - if is_visible(node) { - write!(fmt, ")")?; + if visible { + fmt.write_char(')')?; } Ok(()) } + #[cfg(test)] mod test { use super::*; @@ -2376,11 +2387,17 @@ mod test { } #[track_caller] - fn assert_pretty_print(source: &str, expected: &str, start: usize, end: usize) { + fn assert_pretty_print( + language_name: &str, + source: &str, + expected: &str, + start: usize, + end: usize, + ) { let source = Rope::from_str(source); let loader = Loader::new(Configuration { language: vec![] }); - let language = get_language("rust").unwrap(); + let language = get_language(language_name).unwrap(); let config = HighlightConfiguration::new(language, "", "", "").unwrap(); let syntax = Syntax::new(&source, Arc::new(config), Arc::new(loader)); @@ -2400,13 +2417,14 @@ mod test { #[test] fn test_pretty_print() { let source = r#"/// Hello"#; - assert_pretty_print(source, "(line_comment)", 0, source.len()); + assert_pretty_print("rust", source, "(line_comment)", 0, source.len()); // A large tree should be indented with fields: let source = r#"fn main() { println!("Hello, World!"); }"#; assert_pretty_print( + "rust", source, concat!( "(function_item\n", @@ -2425,11 +2443,34 @@ mod test { // Selecting a token should print just that token: let source = r#"fn main() {}"#; - assert_pretty_print(source, r#"("fn")"#, 0, 1); + assert_pretty_print("rust", source, r#""fn""#, 0, 1); // Error nodes are printed as errors: let source = r#"}{"#; - assert_pretty_print(source, "(ERROR)", 0, source.len()); + assert_pretty_print("rust", source, "(ERROR)", 0, source.len()); + + // Fields broken under unnamed nodes are determined correctly. + // In the following source, `object` belongs to the `singleton_method` + // rule but `name` and `body` belong to an unnamed helper `_method_rest`. + // This can cause a bug with a pretty-printing implementation that + // uses `Node::field_name_for_child` to determine field names but is + // fixed when using `TreeCursor::field_name`. + let source = "def self.method_name + true + end"; + assert_pretty_print( + "ruby", + source, + concat!( + "(singleton_method\n", + " object: (self)\n", + " name: (identifier)\n", + " body: (body_statement\n", + " (true)))" + ), + 0, + source.len(), + ); } #[test] |