summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Ebert2023-09-16 16:22:46 +0000
committerBlaž Hrastnik2023-12-15 06:59:54 +0000
commitd29a66f267afd79f83c0ddc2e483bfb407f4ed98 (patch)
tree2cda470baf199de6091cf290ea7f93e0b57240c5
parent23fd145a56260293072622c4d64482fc6447af7c (diff)
Implement relative indent queries,
i.e. also take into account the indentation of a previous line when computing the indentation for a new line.
-rw-r--r--helix-core/src/indent.rs294
-rw-r--r--helix-core/tests/indent.rs4
2 files changed, 213 insertions, 85 deletions
diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs
index abf45d6f..8dfc1f1e 100644
--- a/helix-core/src/indent.rs
+++ b/helix-core/src/indent.rs
@@ -4,6 +4,7 @@ use tree_sitter::{Query, QueryCursor, QueryPredicateArg};
use crate::{
chars::{char_is_line_ending, char_is_whitespace},
+ find_first_non_whitespace_char,
graphemes::{grapheme_width, tab_width_at},
syntax::{LanguageConfiguration, RopeProvider, Syntax},
tree_sitter::Node,
@@ -196,6 +197,19 @@ pub fn indent_level_for_line(line: RopeSlice, tab_width: usize, indent_width: us
len / indent_width
}
+/// Create a string of tabs & spaces that has the same visual width as the given RopeSlice (independent of the tab width).
+fn whitespace_with_same_width(text: RopeSlice) -> String {
+ let mut s = String::new();
+ for grapheme in RopeGraphemes::new(text) {
+ if grapheme == "\t" {
+ s.push('\t');
+ } else {
+ s.extend(std::iter::repeat(' ').take(grapheme_width(&Cow::from(grapheme))));
+ }
+ }
+ s
+}
+
/// Computes for node and all ancestors whether they are the first node on their line.
/// The first entry in the return value represents the root node, the last one the node itself
fn get_first_in_line(mut node: Node, new_line_byte_pos: Option<usize>) -> Vec<bool> {
@@ -241,21 +255,21 @@ fn get_first_in_line(mut node: Node, new_line_byte_pos: Option<usize>) -> Vec<bo
/// - max(0, indent - outdent) tabs, if tabs are used for indentation
/// - max(0, indent - outdent)*indent_width spaces, if spaces are used for indentation
#[derive(Default, Debug, PartialEq, Eq, Clone)]
-pub struct Indentation {
+pub struct Indentation<'a> {
indent: usize,
indent_always: usize,
outdent: usize,
outdent_always: usize,
/// The alignment, as a string containing only tabs & spaces. Storing this as a string instead of e.g.
/// the (visual) width ensures that the alignment is preserved even if the tab width changes.
- align: Option<String>,
+ align: Option<RopeSlice<'a>>,
}
-impl Indentation {
+impl<'a> Indentation<'a> {
/// Add some other [Indentation] to this.
/// The added indent should be the total added indent from one line.
/// Indent should always be added starting from the bottom (or equivalently, the innermost tree-sitter node).
- fn add_line(&mut self, added: Indentation) {
+ fn add_line(&mut self, added: Indentation<'a>) {
// Align overrides the indent from outer scopes.
if self.align.is_some() {
return;
@@ -274,7 +288,7 @@ impl Indentation {
/// Only captures that apply to the same line should be added together in this way (otherwise use `add_line`)
/// and the captures should be added starting from the innermost tree-sitter node (currently this only matters
/// if multiple `@align` patterns occur on the same line).
- fn add_capture(&mut self, added: IndentCaptureType) {
+ fn add_capture(&mut self, added: IndentCaptureType<'a>) {
match added {
IndentCaptureType::Indent => {
if self.indent_always == 0 {
@@ -303,7 +317,60 @@ impl Indentation {
}
}
}
- fn into_string(self, indent_style: &IndentStyle) -> String {
+ fn net_indent(&self) -> isize {
+ (self.indent + self.indent_always) as isize
+ - ((self.outdent + self.outdent_always) as isize)
+ }
+ /// Convert `self` into a string, taking into account the computed and actual indentation of some other line.
+ fn relative_indent(
+ &self,
+ other_computed_indent: &Self,
+ other_leading_whitespace: RopeSlice,
+ indent_style: &IndentStyle,
+ tab_width: usize,
+ ) -> Option<String> {
+ if self.align == other_computed_indent.align {
+ // If self and baseline are either not aligned to anything or both aligned the same way,
+ // we can simply take `other_leading_whitespace` and add some indent / outdent to it (in the second
+ // case, the alignment should already be accounted for in `other_leading_whitespace`).
+ let indent_diff = self.net_indent() - other_computed_indent.net_indent();
+ if indent_diff >= 0 {
+ let mut indent = other_leading_whitespace.to_string();
+ indent.push_str(&indent_style.as_str().repeat(indent_diff as usize));
+ Some(indent)
+ } else {
+ // It's not entirely clear how to subtract a given indent level from the other line if its indentation is
+ // complex (e.g. a weird alignment or a mixture of tabs and spaces). Therefore, we only consider the indent level
+ // of `baseline_leading_whitespace`, add `indent_diff` to it and convert this indent level back to a string. If we ever encounter
+ // cases where some other behavior is expected, the behavior for strings that don't exactly correspond to some indent
+ // level could be re-evaluated.
+ let actual_baseline_indent_level = indent_level_for_line(
+ other_leading_whitespace,
+ tab_width,
+ indent_style.indent_width(tab_width),
+ );
+ let total_indent = actual_baseline_indent_level as isize + indent_diff;
+ if total_indent < 0 {
+ log::warn!(
+ "Computed negative indent during a relative indent computation (actual baseline indent: {}, computed baseline indent: {}, computed line indent: {})",
+ actual_baseline_indent_level,
+ other_computed_indent.net_indent(),
+ self.net_indent(),
+ );
+ Some(String::new())
+ } else {
+ Some(indent_style.as_str().repeat(total_indent as usize))
+ }
+ }
+ } else if self.align.is_some() {
+ Some(self.to_string(indent_style))
+ } else {
+ // If the other line has some alignment and `self` does not, there is no reasonable way to take
+ // into account `other_leading_whitespace`.
+ None
+ }
+ }
+ pub fn to_string(&self, indent_style: &IndentStyle) -> String {
let indent = self.indent_always + self.indent;
let outdent = self.outdent_always + self.outdent;
@@ -314,7 +381,7 @@ impl Indentation {
0
};
let mut indent_string = if let Some(align) = self.align {
- align
+ whitespace_with_same_width(align)
} else {
String::new()
};
@@ -325,21 +392,21 @@ impl Indentation {
/// An indent definition which corresponds to a capture from the indent query
#[derive(Debug)]
-struct IndentCapture {
- capture_type: IndentCaptureType,
+struct IndentCapture<'a> {
+ capture_type: IndentCaptureType<'a>,
scope: IndentScope,
}
#[derive(Debug, Clone, PartialEq)]
-enum IndentCaptureType {
+enum IndentCaptureType<'a> {
Indent,
IndentAlways,
Outdent,
OutdentAlways,
/// Alignment given as a string of whitespace
- Align(String),
+ Align(RopeSlice<'a>),
}
-impl IndentCaptureType {
+impl<'a> IndentCaptureType<'a> {
fn default_scope(&self) -> IndentScope {
match self {
IndentCaptureType::Indent | IndentCaptureType::IndentAlways => IndentScope::Tail,
@@ -371,8 +438,8 @@ enum ExtendCapture {
/// each node (identified by its ID) the relevant captures (already filtered
/// by predicates).
#[derive(Debug)]
-struct IndentQueryResult {
- indent_captures: HashMap<usize, Vec<IndentCapture>>,
+struct IndentQueryResult<'a> {
+ indent_captures: HashMap<usize, Vec<IndentCapture<'a>>>,
extend_captures: HashMap<usize, Vec<ExtendCapture>>,
}
@@ -393,14 +460,14 @@ fn get_node_end_line(node: Node, new_line_byte_pos: Option<usize>) -> usize {
node_line
}
-fn query_indents(
+fn query_indents<'a>(
query: &Query,
syntax: &Syntax,
cursor: &mut QueryCursor,
- text: RopeSlice,
+ text: RopeSlice<'a>,
range: std::ops::Range<usize>,
new_line_byte_pos: Option<usize>,
-) -> IndentQueryResult {
+) -> IndentQueryResult<'a> {
let mut indent_captures: HashMap<usize, Vec<IndentCapture>> = HashMap::new();
let mut extend_captures: HashMap<usize, Vec<ExtendCapture>> = HashMap::new();
cursor.set_byte_range(range);
@@ -488,7 +555,7 @@ fn query_indents(
"outdent" => IndentCaptureType::Outdent,
"outdent.always" => IndentCaptureType::OutdentAlways,
// The alignment will be updated to the correct value at the end, when the anchor is known.
- "align" => IndentCaptureType::Align(String::from("")),
+ "align" => IndentCaptureType::Align(RopeSlice::from("")),
"anchor" => {
if anchor.is_some() {
log::error!("Invalid indent query: Encountered more than one @anchor in the same match.")
@@ -560,22 +627,10 @@ fn query_indents(
}
Some(anchor) => anchor,
};
- // Create a string of tabs & spaces that should have the same width
- // as the string that precedes the anchor (independent of the tab width).
- let mut align = String::new();
- for grapheme in RopeGraphemes::new(
+ capture.capture_type = IndentCaptureType::Align(
text.line(anchor.start_position().row)
.byte_slice(0..anchor.start_position().column),
- ) {
- if grapheme == "\t" {
- align.push('\t');
- } else {
- align.extend(
- std::iter::repeat(' ').take(grapheme_width(&Cow::from(grapheme))),
- );
- }
- }
- capture.capture_type = IndentCaptureType::Align(align);
+ );
}
indent_captures
.entry(node_id)
@@ -661,56 +716,20 @@ fn extend_nodes<'a>(
}
}
-/// Use the syntax tree to determine the indentation for a given position.
-/// This can be used in 2 ways:
-///
-/// - To get the correct indentation for an existing line (new_line=false), not necessarily equal to the current indentation.
-/// - In this case, pos should be inside the first tree-sitter node on that line.
-/// In most cases, this can just be the first non-whitespace on that line.
-/// - To get the indentation for a new line (new_line=true). This behaves like the first usecase if the part of the current line
-/// after pos were moved to a new line.
-///
-/// The indentation is determined by traversing all the tree-sitter nodes containing the position.
-/// Each of these nodes produces some [Indentation] for:
-///
-/// - The line of the (beginning of the) node. This is defined by the scope `all` if this is the first node on its line.
-/// - The line after the node. This is defined by:
-/// - The scope `tail`.
-/// - The scope `all` if this node is not the first node on its line.
-/// Intuitively, `all` applies to everything contained in this node while `tail` applies to everything except for the first line of the node.
-/// The indents from different nodes for the same line are then combined.
-/// The result [Indentation] is simply the sum of the [Indentation] for all lines.
-///
-/// Specifying which line exactly an [Indentation] applies to is important because indents on the same line combine differently than indents on different lines:
-/// ```ignore
-/// some_function(|| {
-/// // Both the function parameters as well as the contained block should be indented.
-/// // Because they are on the same line, this only yields one indent level
-/// });
-/// ```
-///
-/// ```ignore
-/// some_function(
-/// param1,
-/// || {
-/// // Here we get 2 indent levels because the 'parameters' and the 'block' node begin on different lines
-/// },
-/// );
-/// ```
+/// Prepare an indent query by computing:
+/// - The node from which to start the query (this is non-trivial due to `@extend` captures)
+/// - The indent captures for all relevant nodes.
#[allow(clippy::too_many_arguments)]
-pub fn treesitter_indent_for_pos(
+fn init_indent_query<'a, 'b>(
query: &Query,
- syntax: &Syntax,
- indent_style: &IndentStyle,
+ syntax: &'a Syntax,
+ text: RopeSlice<'b>,
tab_width: usize,
indent_width: usize,
- text: RopeSlice,
line: usize,
- pos: usize,
- new_line: bool,
-) -> Option<String> {
- let byte_pos = text.char_to_byte(pos);
- let new_line_byte_pos = new_line.then_some(byte_pos);
+ byte_pos: usize,
+ new_line_byte_pos: Option<usize>,
+) -> Option<(Node<'a>, HashMap<usize, Vec<IndentCapture<'b>>>)> {
// The innermost tree-sitter node which is considered for the indent
// computation. It may change if some predeceding node is extended
let mut node = syntax
@@ -754,7 +773,6 @@ pub fn treesitter_indent_for_pos(
(query_result, deepest_preceding)
})
};
- let mut indent_captures = query_result.indent_captures;
let extend_captures = query_result.extend_captures;
// Check for extend captures, potentially changing the node that the indent calculation starts with
@@ -769,6 +787,68 @@ pub fn treesitter_indent_for_pos(
indent_width,
);
}
+ Some((node, query_result.indent_captures))
+}
+
+/// Use the syntax tree to determine the indentation for a given position.
+/// This can be used in 2 ways:
+///
+/// - To get the correct indentation for an existing line (new_line=false), not necessarily equal to the current indentation.
+/// - In this case, pos should be inside the first tree-sitter node on that line.
+/// In most cases, this can just be the first non-whitespace on that line.
+/// - To get the indentation for a new line (new_line=true). This behaves like the first usecase if the part of the current line
+/// after pos were moved to a new line.
+///
+/// The indentation is determined by traversing all the tree-sitter nodes containing the position.
+/// Each of these nodes produces some [Indentation] for:
+///
+/// - The line of the (beginning of the) node. This is defined by the scope `all` if this is the first node on its line.
+/// - The line after the node. This is defined by:
+/// - The scope `tail`.
+/// - The scope `all` if this node is not the first node on its line.
+/// Intuitively, `all` applies to everything contained in this node while `tail` applies to everything except for the first line of the node.
+/// The indents from different nodes for the same line are then combined.
+/// The result [Indentation] is simply the sum of the [Indentation] for all lines.
+///
+/// Specifying which line exactly an [Indentation] applies to is important because indents on the same line combine differently than indents on different lines:
+/// ```ignore
+/// some_function(|| {
+/// // Both the function parameters as well as the contained block should be indented.
+/// // Because they are on the same line, this only yields one indent level
+/// });
+/// ```
+///
+/// ```ignore
+/// some_function(
+/// param1,
+/// || {
+/// // Here we get 2 indent levels because the 'parameters' and the 'block' node begin on different lines
+/// },
+/// );
+/// ```
+#[allow(clippy::too_many_arguments)]
+pub fn treesitter_indent_for_pos<'a>(
+ query: &Query,
+ syntax: &Syntax,
+ tab_width: usize,
+ indent_width: usize,
+ text: RopeSlice<'a>,
+ line: usize,
+ pos: usize,
+ new_line: bool,
+) -> Option<Indentation<'a>> {
+ let byte_pos = text.char_to_byte(pos);
+ let new_line_byte_pos = new_line.then_some(byte_pos);
+ let (mut node, mut indent_captures) = init_indent_query(
+ query,
+ syntax,
+ text,
+ tab_width,
+ indent_width,
+ line,
+ byte_pos,
+ new_line_byte_pos,
+ )?;
let mut first_in_line = get_first_in_line(node, new_line.then_some(byte_pos));
let mut result = Indentation::default();
@@ -836,7 +916,7 @@ pub fn treesitter_indent_for_pos(
break;
}
}
- Some(result.into_string(indent_style))
+ Some(result)
}
/// Returns the indentation for a new line.
@@ -860,7 +940,6 @@ pub fn indent_for_newline(
if let Some(indent) = treesitter_indent_for_pos(
query,
syntax,
- indent_style,
tab_width,
indent_width,
text,
@@ -868,9 +947,55 @@ pub fn indent_for_newline(
line_before_end_pos,
true,
) {
- return indent;
+ // We want to compute the indentation not only based on the
+ // syntax tree but also on the actual indentation of a previous
+ // line. This makes indentation computation more resilient to
+ // incomplete queries, incomplete source code & differing indentation
+ // styles for the same language.
+ // However, using the indent of a previous line as a baseline may not
+ // make sense, e.g. if it has a different alignment than the new line.
+ // In order to prevent edge cases with long running times, we only try
+ // a constant number of (non-empty) lines.
+ const MAX_ATTEMPTS: usize = 2;
+ let mut num_attempts = 0;
+ for line_idx in (0..=line_before).rev() {
+ let line = text.line(line_idx);
+ let first_non_whitespace_char = match find_first_non_whitespace_char(line) {
+ Some(i) => i,
+ None => {
+ continue;
+ }
+ };
+ if let Some(indent) = (|| {
+ let computed_indent = treesitter_indent_for_pos(
+ query,
+ syntax,
+ tab_width,
+ indent_width,
+ text,
+ line_idx,
+ text.line_to_char(line_idx) + first_non_whitespace_char,
+ false,
+ )?;
+ let leading_whitespace = line.slice(0..first_non_whitespace_char);
+ indent.relative_indent(
+ &computed_indent,
+ leading_whitespace,
+ indent_style,
+ tab_width,
+ )
+ })() {
+ return indent;
+ }
+ num_attempts += 1;
+ if num_attempts == MAX_ATTEMPTS {
+ break;
+ }
+ }
+ return indent.to_string(indent_style);
};
}
+ // Fallback in case we either don't have indent queries or they failed for some reason
let indent_level = indent_level_for_line(text.line(current_line), tab_width, indent_width);
indent_style.as_str().repeat(indent_level)
}
@@ -962,10 +1087,13 @@ mod test {
..Default::default()
};
- let add_capture = |mut indent: Indentation, capture| {
+ fn add_capture<'a>(
+ mut indent: Indentation<'a>,
+ capture: IndentCaptureType<'a>,
+ ) -> Indentation<'a> {
indent.add_capture(capture);
indent
- };
+ }
// adding an indent to no indent makes an indent
assert_eq!(
diff --git a/helix-core/tests/indent.rs b/helix-core/tests/indent.rs
index 26010c90..cd96fcff 100644
--- a/helix-core/tests/indent.rs
+++ b/helix-core/tests/indent.rs
@@ -210,7 +210,6 @@ fn test_treesitter_indent(
let suggested_indent = treesitter_indent_for_pos(
indent_query,
&syntax,
- &indent_style,
tab_width,
indent_style.indent_width(tab_width),
text,
@@ -218,7 +217,8 @@ fn test_treesitter_indent(
text.line_to_char(i) + pos,
false,
)
- .unwrap();
+ .unwrap()
+ .to_string(&indent_style);
assert!(
line.get_slice(..pos).map_or(false, |s| s == suggested_indent),
"Wrong indentation for file {:?} on line {}:\n\"{}\" (original line)\n\"{}\" (suggested indentation)\n",