aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGokul Soumya2021-10-23 02:41:19 +0000
committerGitHub2021-10-23 02:41:19 +0000
commit4ee92cad19cc94f0751f91fa9391d1899353d740 (patch)
tree794be048905f5d5026ba1968dc0152d12473c024
parentc5298caa752dee136ab1a21dae27a702a00d8eea (diff)
Add treesitter textobjects (#728)
* Add treesitter textobject queries Only for Go, Python and Rust for now. * Add tree-sitter textobjects Only has functions and class objects as of now. * Fix tests * Add docs for tree-sitter textobjects * Add guide for creating new textobject queries * Add parameter textobject Only parameter.inside is implemented now, parameter.around will probably require custom predicates akin to nvim' `make-range` since we want to select a trailing comma too (a comma will be an anonymous node and matching against them doesn't work similar to named nodes) * Simplify TextObject cell init
-rw-r--r--book/src/SUMMARY.md2
-rw-r--r--book/src/guides/README.md4
-rw-r--r--book/src/guides/textobject.md30
-rw-r--r--book/src/usage.md13
-rw-r--r--helix-core/src/indent.rs1
-rw-r--r--helix-core/src/syntax.rs43
-rw-r--r--helix-core/src/textobject.rs51
-rw-r--r--helix-term/src/commands.rs19
-rw-r--r--runtime/queries/go/textobjects.scm21
-rw-r--r--runtime/queries/python/textobjects.scm14
-rw-r--r--runtime/queries/rust/textobjects.scm26
11 files changed, 219 insertions, 5 deletions
diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md
index 3fa8e067..56f50e21 100644
--- a/book/src/SUMMARY.md
+++ b/book/src/SUMMARY.md
@@ -8,3 +8,5 @@
- [Keymap](./keymap.md)
- [Key Remapping](./remapping.md)
- [Hooks](./hooks.md)
+- [Guides](./guides/README.md)
+ - [Adding Textobject Queries](./guides/textobject.md)
diff --git a/book/src/guides/README.md b/book/src/guides/README.md
new file mode 100644
index 00000000..96e62978
--- /dev/null
+++ b/book/src/guides/README.md
@@ -0,0 +1,4 @@
+# Guides
+
+This section contains guides for adding new language server configurations,
+tree-sitter grammers, textobject queries, etc.
diff --git a/book/src/guides/textobject.md b/book/src/guides/textobject.md
new file mode 100644
index 00000000..50b3b574
--- /dev/null
+++ b/book/src/guides/textobject.md
@@ -0,0 +1,30 @@
+# Adding Textobject Queries
+
+Textobjects that are language specific ([like functions, classes, etc][textobjects])
+require an accompanying tree-sitter grammar and a `textobjects.scm` query file
+to work properly. Tree-sitter allows us to query the source code syntax tree
+and capture specific parts of it. The queries are written in a lisp dialect.
+More information on how to write queries can be found in the [official tree-sitter
+documentation](tree-sitter-queries).
+
+Query files should be placed in `runtime/queries/{language}/textobjects.scm`
+when contributing. Note that to test the query files locally you should put
+them under your local runtime directory (`~/.config/helix/runtime` on Linux
+for example).
+
+The following [captures][tree-sitter-captures] are recognized:
+
+| Capture Name |
+| --- |
+| `function.inside` |
+| `function.around` |
+| `class.inside` |
+| `class.around` |
+| `parameter.inside` |
+
+[Example query files][textobject-examples] can be found in the helix GitHub repository.
+
+[textobjects]: ../usage.md#textobjects
+[tree-sitter-queries]: https://tree-sitter.github.io/tree-sitter/using-parsers#query-syntax
+[tree-sitter-captures]: https://tree-sitter.github.io/tree-sitter/using-parsers#capturing-nodes
+[textobject-examples]: https://github.com/search?q=repo%3Ahelix-editor%2Fhelix+filename%3Atextobjects.scm&type=Code&ref=advsearch&l=&l=
diff --git a/book/src/usage.md b/book/src/usage.md
index 2de8d01a..d31e03a1 100644
--- a/book/src/usage.md
+++ b/book/src/usage.md
@@ -51,9 +51,10 @@ Multiple characters are currently not supported, but planned.
## Textobjects
-Currently supported: `word`, `surround`.
+Currently supported: `word`, `surround`, `function`, `class`, `parameter`.
![textobject-demo](https://user-images.githubusercontent.com/23398472/124231131-81a4bb00-db2d-11eb-9d10-8e577ca7b177.gif)
+![textobject-treesitter-demo](https://user-images.githubusercontent.com/23398472/132537398-2a2e0a54-582b-44ab-a77f-eb818942203d.gif)
- `ma` - Select around the object (`va` in vim, `<alt-a>` in kakoune)
- `mi` - Select inside the object (`vi` in vim, `<alt-i>` in kakoune)
@@ -62,5 +63,11 @@ Currently supported: `word`, `surround`.
| --- | --- |
| `w` | Word |
| `(`, `[`, `'`, etc | Specified surround pairs |
-
-Textobjects based on treesitter, like `function`, `class`, etc are planned.
+| `f` | Function |
+| `c` | Class |
+| `p` | Parameter |
+
+Note: `f`, `c`, etc need a tree-sitter grammar active for the current
+document and a special tree-sitter query file to work properly. [Only
+some grammars](https://github.com/search?q=repo%3Ahelix-editor%2Fhelix+filename%3Atextobjects.scm&type=Code&ref=advsearch&l=&l=)
+currently have the query file implemented. Contributions are welcome !
diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs
index d9a0155f..20f034ea 100644
--- a/helix-core/src/indent.rs
+++ b/helix-core/src/indent.rs
@@ -464,6 +464,7 @@ where
unit: String::from(" "),
}),
indent_query: OnceCell::new(),
+ textobject_query: OnceCell::new(),
}],
});
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index 0929e38f..f4b4535b 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -49,7 +49,7 @@ pub struct Configuration {
#[serde(rename_all = "kebab-case")]
pub struct LanguageConfiguration {
#[serde(rename = "name")]
- pub(crate) language_id: String,
+ pub language_id: String,
pub scope: String, // source.rust
pub file_types: Vec<String>, // filename ends_with? <Gemfile, rb, etc>
pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml>
@@ -76,6 +76,8 @@ pub struct LanguageConfiguration {
#[serde(skip)]
pub(crate) indent_query: OnceCell<Option<IndentQuery>>,
+ #[serde(skip)]
+ pub(crate) textobject_query: OnceCell<Option<TextObjectQuery>>,
}
#[derive(Debug, Serialize, Deserialize)]
@@ -105,6 +107,32 @@ pub struct IndentQuery {
pub outdent: HashSet<String>,
}
+#[derive(Debug)]
+pub struct TextObjectQuery {
+ pub query: Query,
+}
+
+impl TextObjectQuery {
+ /// Run the query on the given node and return sub nodes which match given
+ /// capture ("function.inside", "class.around", etc).
+ pub fn capture_nodes<'a>(
+ &'a self,
+ capture_name: &str,
+ node: Node<'a>,
+ slice: RopeSlice<'a>,
+ cursor: &'a mut QueryCursor,
+ ) -> Option<impl Iterator<Item = Node<'a>>> {
+ let capture_idx = self.query.capture_index_for_name(capture_name)?;
+ let captures = cursor.captures(&self.query, node, RopeProvider(slice));
+
+ captures
+ .filter_map(move |(mat, idx)| {
+ (mat.captures[idx].index == capture_idx).then(|| mat.captures[idx].node)
+ })
+ .into()
+ }
+}
+
fn load_runtime_file(language: &str, filename: &str) -> Result<String, std::io::Error> {
let path = crate::RUNTIME_DIR
.join("queries")
@@ -153,7 +181,6 @@ impl LanguageConfiguration {
// highlights_query += "\n(ERROR) @error";
let injections_query = read_query(&language, "injections.scm");
-
let locals_query = read_query(&language, "locals.scm");
if highlights_query.is_empty() {
@@ -203,6 +230,18 @@ impl LanguageConfiguration {
.as_ref()
}
+ pub fn textobject_query(&self) -> Option<&TextObjectQuery> {
+ self.textobject_query
+ .get_or_init(|| -> Option<TextObjectQuery> {
+ let lang_name = self.language_id.to_ascii_lowercase();
+ let query_text = read_query(&lang_name, "textobjects.scm");
+ let lang = self.highlight_config.get()?.as_ref()?.language;
+ let query = Query::new(lang, &query_text).ok()?;
+ Some(TextObjectQuery { query })
+ })
+ .as_ref()
+ }
+
pub fn scope(&self) -> &str {
&self.scope
}
diff --git a/helix-core/src/textobject.rs b/helix-core/src/textobject.rs
index b965f6df..975ed115 100644
--- a/helix-core/src/textobject.rs
+++ b/helix-core/src/textobject.rs
@@ -1,9 +1,13 @@
+use std::fmt::Display;
+
use ropey::RopeSlice;
+use tree_sitter::{Node, QueryCursor};
use crate::chars::{categorize_char, char_is_whitespace, CharCategory};
use crate::graphemes::next_grapheme_boundary;
use crate::movement::Direction;
use crate::surround;
+use crate::syntax::LanguageConfiguration;
use crate::Range;
fn find_word_boundary(slice: RopeSlice, mut pos: usize, direction: Direction) -> usize {
@@ -51,6 +55,15 @@ pub enum TextObject {
Inside,
}
+impl Display for TextObject {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ f.write_str(match self {
+ Self::Around => "around",
+ Self::Inside => "inside",
+ })
+ }
+}
+
// count doesn't do anything yet
pub fn textobject_word(
slice: RopeSlice,
@@ -108,6 +121,44 @@ pub fn textobject_surround(
.unwrap_or(range)
}
+/// Transform the given range to select text objects based on tree-sitter.
+/// `object_name` is a query capture base name like "function", "class", etc.
+/// `slice_tree` is the tree-sitter node corresponding to given text slice.
+pub fn textobject_treesitter(
+ slice: RopeSlice,
+ range: Range,
+ textobject: TextObject,
+ object_name: &str,
+ slice_tree: Node,
+ lang_config: &LanguageConfiguration,
+ _count: usize,
+) -> Range {
+ let get_range = move || -> Option<Range> {
+ let byte_pos = slice.char_to_byte(range.cursor(slice));
+
+ let capture_name = format!("{}.{}", object_name, textobject); // eg. function.inner
+ let mut cursor = QueryCursor::new();
+ let node = lang_config
+ .textobject_query()?
+ .capture_nodes(&capture_name, slice_tree, slice, &mut cursor)?
+ .filter(|node| node.byte_range().contains(&byte_pos))
+ .min_by_key(|node| node.byte_range().len())?;
+
+ let len = slice.len_bytes();
+ let start_byte = node.start_byte();
+ let end_byte = node.end_byte();
+ if start_byte >= len || end_byte >= len {
+ return None;
+ }
+
+ let start_char = slice.byte_to_char(start_byte);
+ let end_char = slice.byte_to_char(end_byte);
+
+ Some(Range::new(start_char, end_char))
+ };
+ get_range().unwrap_or(range)
+}
+
#[cfg(test)]
mod test {
use super::TextObject::*;
diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs
index 9f54292d..272a9d9a 100644
--- a/helix-term/src/commands.rs
+++ b/helix-term/src/commands.rs
@@ -4465,9 +4465,28 @@ fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) {
let (view, doc) = current!(cx.editor);
let text = doc.text().slice(..);
+ let textobject_treesitter = |obj_name: &str, range: Range| -> Range {
+ let (lang_config, syntax) = match doc.language_config().zip(doc.syntax()) {
+ Some(t) => t,
+ None => return range,
+ };
+ textobject::textobject_treesitter(
+ text,
+ range,
+ objtype,
+ obj_name,
+ syntax.tree().root_node(),
+ lang_config,
+ count,
+ )
+ };
+
let selection = doc.selection(view.id).clone().transform(|range| {
match ch {
'w' => textobject::textobject_word(text, range, objtype, count),
+ 'c' => textobject_treesitter("class", range),
+ 'f' => textobject_treesitter("function", range),
+ 'p' => textobject_treesitter("parameter", range),
// TODO: cancel new ranges if inconsistent surround matches across lines
ch if !ch.is_ascii_alphanumeric() => {
textobject::textobject_surround(text, range, objtype, ch, count)
diff --git a/runtime/queries/go/textobjects.scm b/runtime/queries/go/textobjects.scm
new file mode 100644
index 00000000..9bcfc690
--- /dev/null
+++ b/runtime/queries/go/textobjects.scm
@@ -0,0 +1,21 @@
+(function_declaration
+ body: (block)? @function.inside) @function.around
+
+(func_literal
+ (_)? @function.inside) @function.around
+
+(method_declaration
+ body: (block)? @function.inside) @function.around
+
+;; struct and interface declaration as class textobject?
+(type_declaration
+ (type_spec (type_identifier) (struct_type (field_declaration_list (_)?) @class.inside))) @class.around
+
+(type_declaration
+ (type_spec (type_identifier) (interface_type (method_spec_list (_)?) @class.inside))) @class.around
+
+(parameter_list
+ (_) @parameter.inside)
+
+(argument_list
+ (_) @parameter.inside)
diff --git a/runtime/queries/python/textobjects.scm b/runtime/queries/python/textobjects.scm
new file mode 100644
index 00000000..a52538af
--- /dev/null
+++ b/runtime/queries/python/textobjects.scm
@@ -0,0 +1,14 @@
+(function_definition
+ body: (block)? @function.inside) @function.around
+
+(class_definition
+ body: (block)? @class.inside) @class.around
+
+(parameters
+ (_) @parameter.inside)
+
+(lambda_parameters
+ (_) @parameter.inside)
+
+(argument_list
+ (_) @parameter.inside)
diff --git a/runtime/queries/rust/textobjects.scm b/runtime/queries/rust/textobjects.scm
new file mode 100644
index 00000000..e3132687
--- /dev/null
+++ b/runtime/queries/rust/textobjects.scm
@@ -0,0 +1,26 @@
+(function_item
+ body: (_) @function.inside) @function.around
+
+(struct_item
+ body: (_) @class.inside) @class.around
+
+(enum_item
+ body: (_) @class.inside) @class.around
+
+(union_item
+ body: (_) @class.inside) @class.around
+
+(trait_item
+ body: (_) @class.inside) @class.around
+
+(impl_item
+ body: (_) @class.inside) @class.around
+
+(parameters
+ (_) @parameter.inside)
+
+(closure_parameters
+ (_) @parameter.inside)
+
+(arguments
+ (_) @parameter.inside)