From 4ee92cad19cc94f0751f91fa9391d1899353d740 Mon Sep 17 00:00:00 2001 From: Gokul Soumya Date: Sat, 23 Oct 2021 08:11:19 +0530 Subject: Add treesitter textobjects (#728) * Add treesitter textobject queries Only for Go, Python and Rust for now. * Add tree-sitter textobjects Only has functions and class objects as of now. * Fix tests * Add docs for tree-sitter textobjects * Add guide for creating new textobject queries * Add parameter textobject Only parameter.inside is implemented now, parameter.around will probably require custom predicates akin to nvim' `make-range` since we want to select a trailing comma too (a comma will be an anonymous node and matching against them doesn't work similar to named nodes) * Simplify TextObject cell init--- helix-core/src/indent.rs | 1 + helix-core/src/syntax.rs | 43 +++++++++++++++++++++++++++++++++++-- helix-core/src/textobject.rs | 51 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 2 deletions(-) (limited to 'helix-core') diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index d9a0155f..20f034ea 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -464,6 +464,7 @@ where unit: String::from(" "), }), indent_query: OnceCell::new(), + textobject_query: OnceCell::new(), }], }); diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 0929e38f..f4b4535b 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -49,7 +49,7 @@ pub struct Configuration { #[serde(rename_all = "kebab-case")] pub struct LanguageConfiguration { #[serde(rename = "name")] - pub(crate) language_id: String, + pub language_id: String, pub scope: String, // source.rust pub file_types: Vec, // filename ends_with? pub roots: Vec, // these indicate project roots <.git, Cargo.toml> @@ -76,6 +76,8 @@ pub struct LanguageConfiguration { #[serde(skip)] pub(crate) indent_query: OnceCell>, + #[serde(skip)] + pub(crate) textobject_query: OnceCell>, } #[derive(Debug, Serialize, Deserialize)] @@ -105,6 +107,32 @@ pub struct IndentQuery { pub outdent: HashSet, } +#[derive(Debug)] +pub struct TextObjectQuery { + pub query: Query, +} + +impl TextObjectQuery { + /// Run the query on the given node and return sub nodes which match given + /// capture ("function.inside", "class.around", etc). + pub fn capture_nodes<'a>( + &'a self, + capture_name: &str, + node: Node<'a>, + slice: RopeSlice<'a>, + cursor: &'a mut QueryCursor, + ) -> Option>> { + let capture_idx = self.query.capture_index_for_name(capture_name)?; + let captures = cursor.captures(&self.query, node, RopeProvider(slice)); + + captures + .filter_map(move |(mat, idx)| { + (mat.captures[idx].index == capture_idx).then(|| mat.captures[idx].node) + }) + .into() + } +} + fn load_runtime_file(language: &str, filename: &str) -> Result { let path = crate::RUNTIME_DIR .join("queries") @@ -153,7 +181,6 @@ impl LanguageConfiguration { // highlights_query += "\n(ERROR) @error"; let injections_query = read_query(&language, "injections.scm"); - let locals_query = read_query(&language, "locals.scm"); if highlights_query.is_empty() { @@ -203,6 +230,18 @@ impl LanguageConfiguration { .as_ref() } + pub fn textobject_query(&self) -> Option<&TextObjectQuery> { + self.textobject_query + .get_or_init(|| -> Option { + let lang_name = self.language_id.to_ascii_lowercase(); + let query_text = read_query(&lang_name, "textobjects.scm"); + let lang = self.highlight_config.get()?.as_ref()?.language; + let query = Query::new(lang, &query_text).ok()?; + Some(TextObjectQuery { query }) + }) + .as_ref() + } + pub fn scope(&self) -> &str { &self.scope } diff --git a/helix-core/src/textobject.rs b/helix-core/src/textobject.rs index b965f6df..975ed115 100644 --- a/helix-core/src/textobject.rs +++ b/helix-core/src/textobject.rs @@ -1,9 +1,13 @@ +use std::fmt::Display; + use ropey::RopeSlice; +use tree_sitter::{Node, QueryCursor}; use crate::chars::{categorize_char, char_is_whitespace, CharCategory}; use crate::graphemes::next_grapheme_boundary; use crate::movement::Direction; use crate::surround; +use crate::syntax::LanguageConfiguration; use crate::Range; fn find_word_boundary(slice: RopeSlice, mut pos: usize, direction: Direction) -> usize { @@ -51,6 +55,15 @@ pub enum TextObject { Inside, } +impl Display for TextObject { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + Self::Around => "around", + Self::Inside => "inside", + }) + } +} + // count doesn't do anything yet pub fn textobject_word( slice: RopeSlice, @@ -108,6 +121,44 @@ pub fn textobject_surround( .unwrap_or(range) } +/// Transform the given range to select text objects based on tree-sitter. +/// `object_name` is a query capture base name like "function", "class", etc. +/// `slice_tree` is the tree-sitter node corresponding to given text slice. +pub fn textobject_treesitter( + slice: RopeSlice, + range: Range, + textobject: TextObject, + object_name: &str, + slice_tree: Node, + lang_config: &LanguageConfiguration, + _count: usize, +) -> Range { + let get_range = move || -> Option { + let byte_pos = slice.char_to_byte(range.cursor(slice)); + + let capture_name = format!("{}.{}", object_name, textobject); // eg. function.inner + let mut cursor = QueryCursor::new(); + let node = lang_config + .textobject_query()? + .capture_nodes(&capture_name, slice_tree, slice, &mut cursor)? + .filter(|node| node.byte_range().contains(&byte_pos)) + .min_by_key(|node| node.byte_range().len())?; + + let len = slice.len_bytes(); + let start_byte = node.start_byte(); + let end_byte = node.end_byte(); + if start_byte >= len || end_byte >= len { + return None; + } + + let start_char = slice.byte_to_char(start_byte); + let end_char = slice.byte_to_char(end_byte); + + Some(Range::new(start_char, end_char)) + }; + get_range().unwrap_or(range) +} + #[cfg(test)] mod test { use super::TextObject::*; -- cgit v1.2.3-70-g09d2