aboutsummaryrefslogtreecommitdiff
path: root/helix-core/src/indent.rs
diff options
context:
space:
mode:
authorTriton1712022-03-30 15:08:07 +0000
committerGitHub2022-03-30 15:08:07 +0000
commit58758fee610a3808dfaeafddd1b4b4242a7e42cd (patch)
tree1ca1bc05978270080693c56d6cfb3beb6dd86e1a /helix-core/src/indent.rs
parentc18de0e8f001041e234b2b9bb0f8fea122858ad3 (diff)
Indentation rework (#1562)
* WIP: Rework indentation system * Add ComplexNode for context-aware indentation (including a proof of concept for assignment statements in rust) * Add switch statements to Go indents.toml (fixes the second half of issue #1523) Remove commented-out code * Migrate all existing indentation queries. Add more options to ComplexNode and use them to improve C/C++ indentation. * Add comments & replace Option<Vec<_>> with Vec<_> * Add more detailed documentation for tree-sitter indentation * Improve code style in indent.rs * Use tree-sitter queries for indentation instead of TOML config. Migrate existing indent queries. * Add documentation for the new indent queries. Change xtask docgen to look for indents.scm instead of indents.toml * Improve code style in indent.rs. Fix an issue with the rust indent query. * Move indentation test sources to separate files. Add `#not-kind-eq?`, `#same-line?` and `#not-same-line` custom predicates. Improve the rust and c indent queries. * Fix indent test. Improve rust indent queries. * Move indentation tests to integration test folder. * Improve code style in indent.rs. Reuse tree-sitter cursors for indentation queries. * Migrate HCL indent query * Replace custom loading in indent tests with a designated languages.toml * Update indent query file name for --health command. * Fix single-space formatting in indent queries. * Add explanation for unwrapping. Co-authored-by: Triton171 <triton0171@gmail.com>
Diffstat (limited to 'helix-core/src/indent.rs')
-rw-r--r--helix-core/src/indent.rs594
1 files changed, 374 insertions, 220 deletions
diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs
index 30f4a340..529139b8 100644
--- a/helix-core/src/indent.rs
+++ b/helix-core/src/indent.rs
@@ -1,6 +1,10 @@
+use std::collections::HashMap;
+
+use tree_sitter::{Query, QueryCursor, QueryPredicateArg};
+
use crate::{
chars::{char_is_line_ending, char_is_whitespace},
- syntax::{IndentQuery, LanguageConfiguration, Syntax},
+ syntax::{LanguageConfiguration, RopeProvider, Syntax},
tree_sitter::Node,
Rope, RopeSlice,
};
@@ -186,103 +190,405 @@ pub fn indent_level_for_line(line: RopeSlice, tab_width: usize) -> usize {
len / tab_width
}
-/// Find the highest syntax node at position.
-/// This is to identify the column where this node (e.g., an HTML closing tag) ends.
-fn get_highest_syntax_node_at_bytepos(syntax: &Syntax, pos: usize) -> Option<Node> {
- let tree = syntax.tree();
-
- // named_descendant
- let mut node = tree.root_node().descendant_for_byte_range(pos, pos)?;
-
- while let Some(parent) = node.parent() {
- if parent.start_byte() == node.start_byte() {
- node = parent
+/// Computes for node and all ancestors whether they are the first node on their line.
+/// The first entry in the return value represents the root node, the last one the node itself
+fn get_first_in_line(mut node: Node, byte_pos: usize, new_line: bool) -> Vec<bool> {
+ let mut first_in_line = Vec::new();
+ loop {
+ if let Some(prev) = node.prev_sibling() {
+ // If we insert a new line, the first node at/after the cursor is considered to be the first in its line
+ let first = prev.end_position().row != node.start_position().row
+ || (new_line && node.start_byte() >= byte_pos && prev.start_byte() < byte_pos);
+ first_in_line.push(Some(first));
+ } else {
+ // Nodes that have no previous siblings are first in their line if and only if their parent is
+ // (which we don't know yet)
+ first_in_line.push(None);
+ }
+ if let Some(parent) = node.parent() {
+ node = parent;
} else {
break;
}
}
- Some(node)
+ let mut result = Vec::with_capacity(first_in_line.len());
+ let mut parent_is_first = true; // The root node is by definition the first node in its line
+ for first in first_in_line.into_iter().rev() {
+ if let Some(first) = first {
+ result.push(first);
+ parent_is_first = first;
+ } else {
+ result.push(parent_is_first);
+ }
+ }
+ result
}
-/// Calculate the indentation at a given treesitter node.
-/// If newline is false, then any "indent" nodes on the line are ignored ("outdent" still applies).
-/// This is because the indentation is only increased starting at the second line of the node.
-fn calculate_indentation(
- query: &IndentQuery,
- node: Option<Node>,
- line: usize,
- newline: bool,
-) -> usize {
- let mut increment: isize = 0;
-
- let mut node = match node {
- Some(node) => node,
- None => return 0,
- };
+/// The total indent for some line of code.
+/// This is usually constructed in one of 2 ways:
+/// - Successively add indent captures to get the (added) indent from a single line
+/// - Successively add the indent results for each line
+#[derive(Default)]
+struct Indentation {
+ /// The total indent (the number of indent levels) is defined as max(0, indent-outdent).
+ /// The string that this results in depends on the indent style (spaces or tabs, etc.)
+ indent: usize,
+ outdent: usize,
+}
+impl Indentation {
+ /// Add some other [IndentResult] to this.
+ /// The added indent should be the total added indent from one line
+ fn add_line(&mut self, added: &Indentation) {
+ if added.indent > 0 && added.outdent == 0 {
+ self.indent += 1;
+ } else if added.outdent > 0 && added.indent == 0 {
+ self.outdent += 1;
+ }
+ }
+ /// Add an indent capture to this indent.
+ /// All the captures that are added in this way should be on the same line.
+ fn add_capture(&mut self, added: IndentCaptureType) {
+ match added {
+ IndentCaptureType::Indent => {
+ self.indent = 1;
+ }
+ IndentCaptureType::Outdent => {
+ self.outdent = 1;
+ }
+ }
+ }
+ fn as_string(&self, indent_style: &IndentStyle) -> String {
+ let indent_level = if self.indent >= self.outdent {
+ self.indent - self.outdent
+ } else {
+ log::warn!("Encountered more outdent than indent nodes while calculating indentation: {} outdent, {} indent", self.outdent, self.indent);
+ 0
+ };
+ indent_style.as_str().repeat(indent_level)
+ }
+}
- let mut current_line = line;
- let mut consider_indent = newline;
- let mut increment_from_line: isize = 0;
+/// An indent definition which corresponds to a capture from the indent query
+struct IndentCapture {
+ capture_type: IndentCaptureType,
+ scope: IndentScope,
+}
+#[derive(Clone, Copy)]
+enum IndentCaptureType {
+ Indent,
+ Outdent,
+}
+impl IndentCaptureType {
+ fn default_scope(&self) -> IndentScope {
+ match self {
+ IndentCaptureType::Indent => IndentScope::Tail,
+ IndentCaptureType::Outdent => IndentScope::All,
+ }
+ }
+}
+/// This defines which part of a node an [IndentCapture] applies to.
+/// Each [IndentCaptureType] has a default scope, but the scope can be changed
+/// with `#set!` property declarations.
+#[derive(Clone, Copy)]
+enum IndentScope {
+ /// The indent applies to the whole node
+ All,
+ /// The indent applies to everything except for the first line of the node
+ Tail,
+}
- loop {
- let node_kind = node.kind();
- let start = node.start_position().row;
- if current_line != start {
- // Indent/dedent by at most one per line:
- // .map(|a| { <-- ({ is two scopes
- // let len = 1; <-- indents one level
- // }) <-- }) is two scopes
- if consider_indent || increment_from_line < 0 {
- increment += increment_from_line.signum();
+/// Execute the indent query.
+/// Returns for each node (identified by its id) a list of indent captures for that node.
+fn query_indents(
+ query: &Query,
+ syntax: &Syntax,
+ cursor: &mut QueryCursor,
+ text: RopeSlice,
+ range: std::ops::Range<usize>,
+ // Position of the (optional) newly inserted line break.
+ // Given as (line, byte_pos)
+ new_line_break: Option<(usize, usize)>,
+) -> HashMap<usize, Vec<IndentCapture>> {
+ let mut indent_captures: HashMap<usize, Vec<IndentCapture>> = HashMap::new();
+ cursor.set_byte_range(range);
+ // Iterate over all captures from the query
+ for m in cursor.matches(query, syntax.tree().root_node(), RopeProvider(text)) {
+ // Skip matches where not all custom predicates are fulfilled
+ if !query.general_predicates(m.pattern_index).iter().all(|pred| {
+ match pred.operator.as_ref() {
+ "not-kind-eq?" => match (pred.args.get(0), pred.args.get(1)) {
+ (
+ Some(QueryPredicateArg::Capture(capture_idx)),
+ Some(QueryPredicateArg::String(kind)),
+ ) => {
+ let node = m.nodes_for_capture_index(*capture_idx).next();
+ match node {
+ Some(node) => node.kind()!=kind.as_ref(),
+ _ => true,
+ }
+ }
+ _ => {
+ panic!("Invalid indent query: Arguments to \"not-kind-eq?\" must be a capture and a string");
+ }
+ },
+ "same-line?" | "not-same-line?" => {
+ match (pred.args.get(0), pred.args.get(1)) {
+ (
+ Some(QueryPredicateArg::Capture(capt1)),
+ Some(QueryPredicateArg::Capture(capt2))
+ ) => {
+ let get_line_num = |node: Node| {
+ let mut node_line = node.start_position().row;
+ // Adjust for the new line that will be inserted
+ if let Some((line, byte)) = new_line_break {
+ if node_line==line && node.start_byte()>=byte {
+ node_line += 1;
+ }
+ }
+ node_line
+ };
+ let n1 = m.nodes_for_capture_index(*capt1).next();
+ let n2 = m.nodes_for_capture_index(*capt2).next();
+ match (n1, n2) {
+ (Some(n1), Some(n2)) => {
+ let same_line = get_line_num(n1)==get_line_num(n2);
+ same_line==(pred.operator.as_ref()=="same-line?")
+ }
+ _ => true,
+ }
+ }
+ _ => {
+ panic!("Invalid indent query: Arguments to \"{}\" must be 2 captures", pred.operator);
+ }
+ }
+ }
+ _ => {
+ panic!(
+ "Invalid indent query: Unknown predicate (\"{}\")",
+ pred.operator
+ );
+ }
}
- increment_from_line = 0;
- current_line = start;
- consider_indent = true;
+ }) {
+ continue;
}
-
- if query.outdent.contains(node_kind) {
- increment_from_line -= 1;
+ for capture in m.captures {
+ let capture_type = query.capture_names()[capture.index as usize].as_str();
+ let capture_type = match capture_type {
+ "indent" => IndentCaptureType::Indent,
+ "outdent" => IndentCaptureType::Outdent,
+ _ => {
+ // Ignore any unknown captures (these may be needed for predicates such as #match?)
+ continue;
+ }
+ };
+ let scope = capture_type.default_scope();
+ let mut indent_capture = IndentCapture {
+ capture_type,
+ scope,
+ };
+ // Apply additional settings for this capture
+ for property in query.property_settings(m.pattern_index) {
+ match property.key.as_ref() {
+ "scope" => {
+ indent_capture.scope = match property.value.as_deref() {
+ Some("all") => IndentScope::All,
+ Some("tail") => IndentScope::Tail,
+ Some(s) => {
+ panic!("Invalid indent query: Unknown value for \"scope\" property (\"{}\")", s);
+ }
+ None => {
+ panic!(
+ "Invalid indent query: Missing value for \"scope\" property"
+ );
+ }
+ }
+ }
+ _ => {
+ panic!(
+ "Invalid indent query: Unknown property \"{}\"",
+ property.key
+ );
+ }
+ }
+ }
+ indent_captures
+ .entry(capture.node.id())
+ // Most entries only need to contain a single IndentCapture
+ .or_insert_with(|| Vec::with_capacity(1))
+ .push(indent_capture);
}
- if query.indent.contains(node_kind) {
- increment_from_line += 1;
+ }
+ indent_captures
+}
+
+/// Use the syntax tree to determine the indentation for a given position.
+/// This can be used in 2 ways:
+///
+/// - To get the correct indentation for an existing line (new_line=false), not necessarily equal to the current indentation.
+/// - In this case, pos should be inside the first tree-sitter node on that line.
+/// In most cases, this can just be the first non-whitespace on that line.
+/// - To get the indentation for a new line (new_line=true). This behaves like the first usecase if the part of the current line
+/// after pos were moved to a new line.
+///
+/// The indentation is determined by traversing all the tree-sitter nodes containing the position.
+/// Each of these nodes produces some [AddedIndent] for:
+///
+/// - The line of the (beginning of the) node. This is defined by the scope `all` if this is the first node on its line.
+/// - The line after the node. This is defined by:
+/// - The scope `tail`.
+/// - The scope `all` if this node is not the first node on its line.
+/// Intuitively, `all` applies to everything contained in this node while `tail` applies to everything except for the first line of the node.
+/// The indents from different nodes for the same line are then combined.
+/// The [IndentResult] is simply the sum of the [AddedIndent] for all lines.
+///
+/// Specifying which line exactly an [AddedIndent] applies to is important because indents on the same line combine differently than indents on different lines:
+/// ```ignore
+/// some_function(|| {
+/// // Both the function parameters as well as the contained block should be indented.
+/// // Because they are on the same line, this only yields one indent level
+/// });
+/// ```
+///
+/// ```ignore
+/// some_function(
+/// parm1,
+/// || {
+/// // Here we get 2 indent levels because the 'parameters' and the 'block' node begin on different lines
+/// },
+/// );
+/// ```
+pub fn treesitter_indent_for_pos(
+ query: &Query,
+ syntax: &Syntax,
+ indent_style: &IndentStyle,
+ text: RopeSlice,
+ line: usize,
+ pos: usize,
+ new_line: bool,
+) -> Option<String> {
+ let byte_pos = text.char_to_byte(pos);
+ let mut node = syntax
+ .tree()
+ .root_node()
+ .descendant_for_byte_range(byte_pos, byte_pos)?;
+ let mut first_in_line = get_first_in_line(node, byte_pos, new_line);
+ let new_line_break = if new_line {
+ Some((line, byte_pos))
+ } else {
+ None
+ };
+ let query_result = crate::syntax::PARSER.with(|ts_parser| {
+ let mut ts_parser = ts_parser.borrow_mut();
+ let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new);
+ let query_result = query_indents(
+ query,
+ syntax,
+ &mut cursor,
+ text,
+ byte_pos..byte_pos + 1,
+ new_line_break,
+ );
+ ts_parser.cursors.push(cursor);
+ query_result
+ });
+
+ let mut result = Indentation::default();
+ // We always keep track of all the indent changes on one line, in order to only indent once
+ // even if there are multiple "indent" nodes on the same line
+ let mut indent_for_line = Indentation::default();
+ let mut indent_for_line_below = Indentation::default();
+ loop {
+ // This can safely be unwrapped because `first_in_line` contains
+ // one entry for each ancestor of the node (which is what we iterate over)
+ let is_first = *first_in_line.last().unwrap();
+ // Apply all indent definitions for this node
+ if let Some(definitions) = query_result.get(&node.id()) {
+ for definition in definitions {
+ match definition.scope {
+ IndentScope::All => {
+ if is_first {
+ indent_for_line.add_capture(definition.capture_type);
+ } else {
+ indent_for_line_below.add_capture(definition.capture_type);
+ }
+ }
+ IndentScope::Tail => {
+ indent_for_line_below.add_capture(definition.capture_type);
+ }
+ }
+ }
}
if let Some(parent) = node.parent() {
+ let mut node_line = node.start_position().row;
+ let mut parent_line = parent.start_position().row;
+ if node_line == line && new_line {
+ // Also consider the line that will be inserted
+ if node.start_byte() >= byte_pos {
+ node_line += 1;
+ }
+ if parent.start_byte() >= byte_pos {
+ parent_line += 1;
+ }
+ };
+ if node_line != parent_line {
+ if node_line < line + (new_line as usize) {
+ // Don't add indent for the line below the line of the query
+ result.add_line(&indent_for_line_below);
+ }
+ if node_line == parent_line + 1 {
+ indent_for_line_below = indent_for_line;
+ } else {
+ result.add_line(&indent_for_line);
+ indent_for_line_below = Indentation::default();
+ }
+ indent_for_line = Indentation::default();
+ }
+
node = parent;
+ first_in_line.pop();
} else {
+ result.add_line(&indent_for_line_below);
+ result.add_line(&indent_for_line);
break;
}
}
- if consider_indent || increment_from_line < 0 {
- increment += increment_from_line.signum();
- }
- increment.max(0) as usize
+ Some(result.as_string(indent_style))
}
-// TODO: two usecases: if we are triggering this for a new, blank line:
-// - it should return 0 when mass indenting stuff
-// - it should look up the wrapper node and count it too when we press o/O
-pub fn suggested_indent_for_pos(
+/// Returns the indentation for a new line.
+/// This is done either using treesitter, or if that's not available by copying the indentation from the current line
+#[allow(clippy::too_many_arguments)]
+pub fn indent_for_newline(
language_config: Option<&LanguageConfiguration>,
syntax: Option<&Syntax>,
+ indent_style: &IndentStyle,
+ tab_width: usize,
text: RopeSlice,
- pos: usize,
- line: usize,
- new_line: bool,
-) -> Option<usize> {
+ line_before: usize,
+ line_before_end_pos: usize,
+ current_line: usize,
+) -> String {
if let (Some(query), Some(syntax)) = (
language_config.and_then(|config| config.indent_query()),
syntax,
) {
- let byte_start = text.char_to_byte(pos);
- let node = get_highest_syntax_node_at_bytepos(syntax, byte_start);
- // TODO: special case for comments
- // TODO: if preserve_leading_whitespace
- Some(calculate_indentation(query, node, line, new_line))
- } else {
- None
+ if let Some(indent) = treesitter_indent_for_pos(
+ query,
+ syntax,
+ indent_style,
+ text,
+ line_before,
+ line_before_end_pos,
+ true,
+ ) {
+ return indent;
+ };
}
+ let indent_level = indent_level_for_line(text.line(current_line), tab_width);
+ indent_style.as_str().repeat(indent_level)
}
pub fn get_scopes(syntax: Option<&Syntax>, text: RopeSlice, pos: usize) -> Vec<&'static str> {
@@ -326,156 +632,4 @@ mod test {
let line = Rope::from("\t \tfn new"); // 1 tab, 4 spaces, tab
assert_eq!(indent_level_for_line(line.slice(..), tab_width), 3);
}
-
- #[test]
- fn test_suggested_indent_for_line() {
- let doc = Rope::from(
- "
-use std::{
- io::{self, stdout, Stdout, Write},
- path::PathBuf,
- sync::Arc,
- time::Duration,
-}
-mod test {
- fn hello_world() {
- 1 + 1;
-
- let does_indentation_work = 1;
-
- let test_function = function_with_param(this_param,
- that_param
- );
-
- let test_function = function_with_param(
- this_param,
- that_param
- );
-
- let test_function = function_with_proper_indent(param1,
- param2,
- );
-
- let selection = Selection::new(
- changes
- .clone()
- .map(|(start, end, text): (usize, usize, Option<Tendril>)| {
- let len = text.map(|text| text.len()).unwrap() - 1; // minus newline
- let pos = start + len;
- Range::new(pos, pos)
- })
- .collect(),
- 0,
- );
-
- return;
- }
-}
-
-impl<A, D> MyTrait<A, D> for YourType
-where
- A: TraitB + TraitC,
- D: TraitE + TraitF,
-{
-
-}
-#[test]
-//
-match test {
- Some(a) => 1,
- None => {
- unimplemented!()
- }
-}
-std::panic::set_hook(Box::new(move |info| {
- hook(info);
-}));
-
-{ { {
- 1
-}}}
-
-pub fn change<I>(document: &Document, changes: I) -> Self
-where
- I: IntoIterator<Item = Change> + ExactSizeIterator,
-{
- [
- 1,
- 2,
- 3,
- ];
- (
- 1,
- 2
- );
- true
-}
-",
- );
-
- let doc = doc;
- use crate::diagnostic::Severity;
- use crate::syntax::{
- Configuration, IndentationConfiguration, LanguageConfiguration, Loader,
- };
- use once_cell::sync::OnceCell;
- let loader = Loader::new(Configuration {
- language: vec![LanguageConfiguration {
- scope: "source.rust".to_string(),
- file_types: vec!["rs".to_string()],
- shebangs: vec![],
- language_id: "Rust".to_string(),
- highlight_config: OnceCell::new(),
- config: None,
- //
- injection_regex: None,
- roots: vec![],
- comment_token: None,
- auto_format: false,
- diagnostic_severity: Severity::Warning,
- grammar: None,
- language_server: None,
- indent: Some(IndentationConfiguration {
- tab_width: 4,
- unit: String::from(" "),
- }),
- indent_query: OnceCell::new(),
- textobject_query: OnceCell::new(),
- debugger: None,
- auto_pairs: None,
- }],
- });
-
- // set runtime path so we can find the queries
- let mut runtime = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
- runtime.push("../runtime");
- std::env::set_var("HELIX_RUNTIME", runtime.to_str().unwrap());
-
- let language_config = loader.language_config_for_scope("source.rust").unwrap();
- let highlight_config = language_config.highlight_config(&[]).unwrap();
- let syntax = Syntax::new(&doc, highlight_config, std::sync::Arc::new(loader));
- let text = doc.slice(..);
- let tab_width = 4;
-
- for i in 0..doc.len_lines() {
- let line = text.line(i);
- if let Some(pos) = crate::find_first_non_whitespace_char(line) {
- let indent = indent_level_for_line(line, tab_width);
- assert_eq!(
- suggested_indent_for_pos(
- Some(&language_config),
- Some(&syntax),
- text,
- text.line_to_char(i) + pos,
- i,
- false
- ),
- Some(indent),
- "line {}: \"{}\"",
- i,
- line
- );
- }
- }
- }
}