From d29a66f267afd79f83c0ddc2e483bfb407f4ed98 Mon Sep 17 00:00:00 2001 From: Daniel Ebert Date: Sat, 16 Sep 2023 18:22:46 +0200 Subject: Implement relative indent queries, i.e. also take into account the indentation of a previous line when computing the indentation for a new line. --- helix-core/src/indent.rs | 294 ++++++++++++++++++++++++++++++++------------- helix-core/tests/indent.rs | 4 +- 2 files changed, 213 insertions(+), 85 deletions(-) diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index abf45d6f..8dfc1f1e 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -4,6 +4,7 @@ use tree_sitter::{Query, QueryCursor, QueryPredicateArg}; use crate::{ chars::{char_is_line_ending, char_is_whitespace}, + find_first_non_whitespace_char, graphemes::{grapheme_width, tab_width_at}, syntax::{LanguageConfiguration, RopeProvider, Syntax}, tree_sitter::Node, @@ -196,6 +197,19 @@ pub fn indent_level_for_line(line: RopeSlice, tab_width: usize, indent_width: us len / indent_width } +/// Create a string of tabs & spaces that has the same visual width as the given RopeSlice (independent of the tab width). +fn whitespace_with_same_width(text: RopeSlice) -> String { + let mut s = String::new(); + for grapheme in RopeGraphemes::new(text) { + if grapheme == "\t" { + s.push('\t'); + } else { + s.extend(std::iter::repeat(' ').take(grapheme_width(&Cow::from(grapheme)))); + } + } + s +} + /// Computes for node and all ancestors whether they are the first node on their line. /// The first entry in the return value represents the root node, the last one the node itself fn get_first_in_line(mut node: Node, new_line_byte_pos: Option) -> Vec { @@ -241,21 +255,21 @@ fn get_first_in_line(mut node: Node, new_line_byte_pos: Option) -> Vec { indent: usize, indent_always: usize, outdent: usize, outdent_always: usize, /// The alignment, as a string containing only tabs & spaces. Storing this as a string instead of e.g. /// the (visual) width ensures that the alignment is preserved even if the tab width changes. - align: Option, + align: Option>, } -impl Indentation { +impl<'a> Indentation<'a> { /// Add some other [Indentation] to this. /// The added indent should be the total added indent from one line. /// Indent should always be added starting from the bottom (or equivalently, the innermost tree-sitter node). - fn add_line(&mut self, added: Indentation) { + fn add_line(&mut self, added: Indentation<'a>) { // Align overrides the indent from outer scopes. if self.align.is_some() { return; @@ -274,7 +288,7 @@ impl Indentation { /// Only captures that apply to the same line should be added together in this way (otherwise use `add_line`) /// and the captures should be added starting from the innermost tree-sitter node (currently this only matters /// if multiple `@align` patterns occur on the same line). - fn add_capture(&mut self, added: IndentCaptureType) { + fn add_capture(&mut self, added: IndentCaptureType<'a>) { match added { IndentCaptureType::Indent => { if self.indent_always == 0 { @@ -303,7 +317,60 @@ impl Indentation { } } } - fn into_string(self, indent_style: &IndentStyle) -> String { + fn net_indent(&self) -> isize { + (self.indent + self.indent_always) as isize + - ((self.outdent + self.outdent_always) as isize) + } + /// Convert `self` into a string, taking into account the computed and actual indentation of some other line. + fn relative_indent( + &self, + other_computed_indent: &Self, + other_leading_whitespace: RopeSlice, + indent_style: &IndentStyle, + tab_width: usize, + ) -> Option { + if self.align == other_computed_indent.align { + // If self and baseline are either not aligned to anything or both aligned the same way, + // we can simply take `other_leading_whitespace` and add some indent / outdent to it (in the second + // case, the alignment should already be accounted for in `other_leading_whitespace`). + let indent_diff = self.net_indent() - other_computed_indent.net_indent(); + if indent_diff >= 0 { + let mut indent = other_leading_whitespace.to_string(); + indent.push_str(&indent_style.as_str().repeat(indent_diff as usize)); + Some(indent) + } else { + // It's not entirely clear how to subtract a given indent level from the other line if its indentation is + // complex (e.g. a weird alignment or a mixture of tabs and spaces). Therefore, we only consider the indent level + // of `baseline_leading_whitespace`, add `indent_diff` to it and convert this indent level back to a string. If we ever encounter + // cases where some other behavior is expected, the behavior for strings that don't exactly correspond to some indent + // level could be re-evaluated. + let actual_baseline_indent_level = indent_level_for_line( + other_leading_whitespace, + tab_width, + indent_style.indent_width(tab_width), + ); + let total_indent = actual_baseline_indent_level as isize + indent_diff; + if total_indent < 0 { + log::warn!( + "Computed negative indent during a relative indent computation (actual baseline indent: {}, computed baseline indent: {}, computed line indent: {})", + actual_baseline_indent_level, + other_computed_indent.net_indent(), + self.net_indent(), + ); + Some(String::new()) + } else { + Some(indent_style.as_str().repeat(total_indent as usize)) + } + } + } else if self.align.is_some() { + Some(self.to_string(indent_style)) + } else { + // If the other line has some alignment and `self` does not, there is no reasonable way to take + // into account `other_leading_whitespace`. + None + } + } + pub fn to_string(&self, indent_style: &IndentStyle) -> String { let indent = self.indent_always + self.indent; let outdent = self.outdent_always + self.outdent; @@ -314,7 +381,7 @@ impl Indentation { 0 }; let mut indent_string = if let Some(align) = self.align { - align + whitespace_with_same_width(align) } else { String::new() }; @@ -325,21 +392,21 @@ impl Indentation { /// An indent definition which corresponds to a capture from the indent query #[derive(Debug)] -struct IndentCapture { - capture_type: IndentCaptureType, +struct IndentCapture<'a> { + capture_type: IndentCaptureType<'a>, scope: IndentScope, } #[derive(Debug, Clone, PartialEq)] -enum IndentCaptureType { +enum IndentCaptureType<'a> { Indent, IndentAlways, Outdent, OutdentAlways, /// Alignment given as a string of whitespace - Align(String), + Align(RopeSlice<'a>), } -impl IndentCaptureType { +impl<'a> IndentCaptureType<'a> { fn default_scope(&self) -> IndentScope { match self { IndentCaptureType::Indent | IndentCaptureType::IndentAlways => IndentScope::Tail, @@ -371,8 +438,8 @@ enum ExtendCapture { /// each node (identified by its ID) the relevant captures (already filtered /// by predicates). #[derive(Debug)] -struct IndentQueryResult { - indent_captures: HashMap>, +struct IndentQueryResult<'a> { + indent_captures: HashMap>>, extend_captures: HashMap>, } @@ -393,14 +460,14 @@ fn get_node_end_line(node: Node, new_line_byte_pos: Option) -> usize { node_line } -fn query_indents( +fn query_indents<'a>( query: &Query, syntax: &Syntax, cursor: &mut QueryCursor, - text: RopeSlice, + text: RopeSlice<'a>, range: std::ops::Range, new_line_byte_pos: Option, -) -> IndentQueryResult { +) -> IndentQueryResult<'a> { let mut indent_captures: HashMap> = HashMap::new(); let mut extend_captures: HashMap> = HashMap::new(); cursor.set_byte_range(range); @@ -488,7 +555,7 @@ fn query_indents( "outdent" => IndentCaptureType::Outdent, "outdent.always" => IndentCaptureType::OutdentAlways, // The alignment will be updated to the correct value at the end, when the anchor is known. - "align" => IndentCaptureType::Align(String::from("")), + "align" => IndentCaptureType::Align(RopeSlice::from("")), "anchor" => { if anchor.is_some() { log::error!("Invalid indent query: Encountered more than one @anchor in the same match.") @@ -560,22 +627,10 @@ fn query_indents( } Some(anchor) => anchor, }; - // Create a string of tabs & spaces that should have the same width - // as the string that precedes the anchor (independent of the tab width). - let mut align = String::new(); - for grapheme in RopeGraphemes::new( + capture.capture_type = IndentCaptureType::Align( text.line(anchor.start_position().row) .byte_slice(0..anchor.start_position().column), - ) { - if grapheme == "\t" { - align.push('\t'); - } else { - align.extend( - std::iter::repeat(' ').take(grapheme_width(&Cow::from(grapheme))), - ); - } - } - capture.capture_type = IndentCaptureType::Align(align); + ); } indent_captures .entry(node_id) @@ -661,56 +716,20 @@ fn extend_nodes<'a>( } } -/// Use the syntax tree to determine the indentation for a given position. -/// This can be used in 2 ways: -/// -/// - To get the correct indentation for an existing line (new_line=false), not necessarily equal to the current indentation. -/// - In this case, pos should be inside the first tree-sitter node on that line. -/// In most cases, this can just be the first non-whitespace on that line. -/// - To get the indentation for a new line (new_line=true). This behaves like the first usecase if the part of the current line -/// after pos were moved to a new line. -/// -/// The indentation is determined by traversing all the tree-sitter nodes containing the position. -/// Each of these nodes produces some [Indentation] for: -/// -/// - The line of the (beginning of the) node. This is defined by the scope `all` if this is the first node on its line. -/// - The line after the node. This is defined by: -/// - The scope `tail`. -/// - The scope `all` if this node is not the first node on its line. -/// Intuitively, `all` applies to everything contained in this node while `tail` applies to everything except for the first line of the node. -/// The indents from different nodes for the same line are then combined. -/// The result [Indentation] is simply the sum of the [Indentation] for all lines. -/// -/// Specifying which line exactly an [Indentation] applies to is important because indents on the same line combine differently than indents on different lines: -/// ```ignore -/// some_function(|| { -/// // Both the function parameters as well as the contained block should be indented. -/// // Because they are on the same line, this only yields one indent level -/// }); -/// ``` -/// -/// ```ignore -/// some_function( -/// param1, -/// || { -/// // Here we get 2 indent levels because the 'parameters' and the 'block' node begin on different lines -/// }, -/// ); -/// ``` +/// Prepare an indent query by computing: +/// - The node from which to start the query (this is non-trivial due to `@extend` captures) +/// - The indent captures for all relevant nodes. #[allow(clippy::too_many_arguments)] -pub fn treesitter_indent_for_pos( +fn init_indent_query<'a, 'b>( query: &Query, - syntax: &Syntax, - indent_style: &IndentStyle, + syntax: &'a Syntax, + text: RopeSlice<'b>, tab_width: usize, indent_width: usize, - text: RopeSlice, line: usize, - pos: usize, - new_line: bool, -) -> Option { - let byte_pos = text.char_to_byte(pos); - let new_line_byte_pos = new_line.then_some(byte_pos); + byte_pos: usize, + new_line_byte_pos: Option, +) -> Option<(Node<'a>, HashMap>>)> { // The innermost tree-sitter node which is considered for the indent // computation. It may change if some predeceding node is extended let mut node = syntax @@ -754,7 +773,6 @@ pub fn treesitter_indent_for_pos( (query_result, deepest_preceding) }) }; - let mut indent_captures = query_result.indent_captures; let extend_captures = query_result.extend_captures; // Check for extend captures, potentially changing the node that the indent calculation starts with @@ -769,6 +787,68 @@ pub fn treesitter_indent_for_pos( indent_width, ); } + Some((node, query_result.indent_captures)) +} + +/// Use the syntax tree to determine the indentation for a given position. +/// This can be used in 2 ways: +/// +/// - To get the correct indentation for an existing line (new_line=false), not necessarily equal to the current indentation. +/// - In this case, pos should be inside the first tree-sitter node on that line. +/// In most cases, this can just be the first non-whitespace on that line. +/// - To get the indentation for a new line (new_line=true). This behaves like the first usecase if the part of the current line +/// after pos were moved to a new line. +/// +/// The indentation is determined by traversing all the tree-sitter nodes containing the position. +/// Each of these nodes produces some [Indentation] for: +/// +/// - The line of the (beginning of the) node. This is defined by the scope `all` if this is the first node on its line. +/// - The line after the node. This is defined by: +/// - The scope `tail`. +/// - The scope `all` if this node is not the first node on its line. +/// Intuitively, `all` applies to everything contained in this node while `tail` applies to everything except for the first line of the node. +/// The indents from different nodes for the same line are then combined. +/// The result [Indentation] is simply the sum of the [Indentation] for all lines. +/// +/// Specifying which line exactly an [Indentation] applies to is important because indents on the same line combine differently than indents on different lines: +/// ```ignore +/// some_function(|| { +/// // Both the function parameters as well as the contained block should be indented. +/// // Because they are on the same line, this only yields one indent level +/// }); +/// ``` +/// +/// ```ignore +/// some_function( +/// param1, +/// || { +/// // Here we get 2 indent levels because the 'parameters' and the 'block' node begin on different lines +/// }, +/// ); +/// ``` +#[allow(clippy::too_many_arguments)] +pub fn treesitter_indent_for_pos<'a>( + query: &Query, + syntax: &Syntax, + tab_width: usize, + indent_width: usize, + text: RopeSlice<'a>, + line: usize, + pos: usize, + new_line: bool, +) -> Option> { + let byte_pos = text.char_to_byte(pos); + let new_line_byte_pos = new_line.then_some(byte_pos); + let (mut node, mut indent_captures) = init_indent_query( + query, + syntax, + text, + tab_width, + indent_width, + line, + byte_pos, + new_line_byte_pos, + )?; let mut first_in_line = get_first_in_line(node, new_line.then_some(byte_pos)); let mut result = Indentation::default(); @@ -836,7 +916,7 @@ pub fn treesitter_indent_for_pos( break; } } - Some(result.into_string(indent_style)) + Some(result) } /// Returns the indentation for a new line. @@ -860,7 +940,6 @@ pub fn indent_for_newline( if let Some(indent) = treesitter_indent_for_pos( query, syntax, - indent_style, tab_width, indent_width, text, @@ -868,9 +947,55 @@ pub fn indent_for_newline( line_before_end_pos, true, ) { - return indent; + // We want to compute the indentation not only based on the + // syntax tree but also on the actual indentation of a previous + // line. This makes indentation computation more resilient to + // incomplete queries, incomplete source code & differing indentation + // styles for the same language. + // However, using the indent of a previous line as a baseline may not + // make sense, e.g. if it has a different alignment than the new line. + // In order to prevent edge cases with long running times, we only try + // a constant number of (non-empty) lines. + const MAX_ATTEMPTS: usize = 2; + let mut num_attempts = 0; + for line_idx in (0..=line_before).rev() { + let line = text.line(line_idx); + let first_non_whitespace_char = match find_first_non_whitespace_char(line) { + Some(i) => i, + None => { + continue; + } + }; + if let Some(indent) = (|| { + let computed_indent = treesitter_indent_for_pos( + query, + syntax, + tab_width, + indent_width, + text, + line_idx, + text.line_to_char(line_idx) + first_non_whitespace_char, + false, + )?; + let leading_whitespace = line.slice(0..first_non_whitespace_char); + indent.relative_indent( + &computed_indent, + leading_whitespace, + indent_style, + tab_width, + ) + })() { + return indent; + } + num_attempts += 1; + if num_attempts == MAX_ATTEMPTS { + break; + } + } + return indent.to_string(indent_style); }; } + // Fallback in case we either don't have indent queries or they failed for some reason let indent_level = indent_level_for_line(text.line(current_line), tab_width, indent_width); indent_style.as_str().repeat(indent_level) } @@ -962,10 +1087,13 @@ mod test { ..Default::default() }; - let add_capture = |mut indent: Indentation, capture| { + fn add_capture<'a>( + mut indent: Indentation<'a>, + capture: IndentCaptureType<'a>, + ) -> Indentation<'a> { indent.add_capture(capture); indent - }; + } // adding an indent to no indent makes an indent assert_eq!( diff --git a/helix-core/tests/indent.rs b/helix-core/tests/indent.rs index 26010c90..cd96fcff 100644 --- a/helix-core/tests/indent.rs +++ b/helix-core/tests/indent.rs @@ -210,7 +210,6 @@ fn test_treesitter_indent( let suggested_indent = treesitter_indent_for_pos( indent_query, &syntax, - &indent_style, tab_width, indent_style.indent_width(tab_width), text, @@ -218,7 +217,8 @@ fn test_treesitter_indent( text.line_to_char(i) + pos, false, ) - .unwrap(); + .unwrap() + .to_string(&indent_style); assert!( line.get_slice(..pos).map_or(false, |s| s == suggested_indent), "Wrong indentation for file {:?} on line {}:\n\"{}\" (original line)\n\"{}\" (suggested indentation)\n", -- cgit v1.2.3-70-g09d2