diff options
author | Triton171 | 2022-03-30 15:08:07 +0000 |
---|---|---|
committer | GitHub | 2022-03-30 15:08:07 +0000 |
commit | 58758fee610a3808dfaeafddd1b4b4242a7e42cd (patch) | |
tree | 1ca1bc05978270080693c56d6cfb3beb6dd86e1a /helix-core/src | |
parent | c18de0e8f001041e234b2b9bb0f8fea122858ad3 (diff) |
Indentation rework (#1562)
* WIP: Rework indentation system
* Add ComplexNode for context-aware indentation (including a proof of concept for assignment statements in rust)
* Add switch statements to Go indents.toml (fixes the second half of issue #1523)
Remove commented-out code
* Migrate all existing indentation queries.
Add more options to ComplexNode and use them to improve C/C++ indentation.
* Add comments & replace Option<Vec<_>> with Vec<_>
* Add more detailed documentation for tree-sitter indentation
* Improve code style in indent.rs
* Use tree-sitter queries for indentation instead of TOML config.
Migrate existing indent queries.
* Add documentation for the new indent queries.
Change xtask docgen to look for indents.scm instead of indents.toml
* Improve code style in indent.rs.
Fix an issue with the rust indent query.
* Move indentation test sources to separate files.
Add `#not-kind-eq?`, `#same-line?` and `#not-same-line` custom predicates.
Improve the rust and c indent queries.
* Fix indent test.
Improve rust indent queries.
* Move indentation tests to integration test folder.
* Improve code style in indent.rs.
Reuse tree-sitter cursors for indentation queries.
* Migrate HCL indent query
* Replace custom loading in indent tests with a designated languages.toml
* Update indent query file name for --health command.
* Fix single-space formatting in indent queries.
* Add explanation for unwrapping.
Co-authored-by: Triton171 <triton0171@gmail.com>
Diffstat (limited to 'helix-core/src')
-rw-r--r-- | helix-core/src/indent.rs | 594 | ||||
-rw-r--r-- | helix-core/src/syntax.rs | 31 |
2 files changed, 384 insertions, 241 deletions
diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index 30f4a340..529139b8 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -1,6 +1,10 @@ +use std::collections::HashMap; + +use tree_sitter::{Query, QueryCursor, QueryPredicateArg}; + use crate::{ chars::{char_is_line_ending, char_is_whitespace}, - syntax::{IndentQuery, LanguageConfiguration, Syntax}, + syntax::{LanguageConfiguration, RopeProvider, Syntax}, tree_sitter::Node, Rope, RopeSlice, }; @@ -186,103 +190,405 @@ pub fn indent_level_for_line(line: RopeSlice, tab_width: usize) -> usize { len / tab_width } -/// Find the highest syntax node at position. -/// This is to identify the column where this node (e.g., an HTML closing tag) ends. -fn get_highest_syntax_node_at_bytepos(syntax: &Syntax, pos: usize) -> Option<Node> { - let tree = syntax.tree(); - - // named_descendant - let mut node = tree.root_node().descendant_for_byte_range(pos, pos)?; - - while let Some(parent) = node.parent() { - if parent.start_byte() == node.start_byte() { - node = parent +/// Computes for node and all ancestors whether they are the first node on their line. +/// The first entry in the return value represents the root node, the last one the node itself +fn get_first_in_line(mut node: Node, byte_pos: usize, new_line: bool) -> Vec<bool> { + let mut first_in_line = Vec::new(); + loop { + if let Some(prev) = node.prev_sibling() { + // If we insert a new line, the first node at/after the cursor is considered to be the first in its line + let first = prev.end_position().row != node.start_position().row + || (new_line && node.start_byte() >= byte_pos && prev.start_byte() < byte_pos); + first_in_line.push(Some(first)); + } else { + // Nodes that have no previous siblings are first in their line if and only if their parent is + // (which we don't know yet) + first_in_line.push(None); + } + if let Some(parent) = node.parent() { + node = parent; } else { break; } } - Some(node) + let mut result = Vec::with_capacity(first_in_line.len()); + let mut parent_is_first = true; // The root node is by definition the first node in its line + for first in first_in_line.into_iter().rev() { + if let Some(first) = first { + result.push(first); + parent_is_first = first; + } else { + result.push(parent_is_first); + } + } + result } -/// Calculate the indentation at a given treesitter node. -/// If newline is false, then any "indent" nodes on the line are ignored ("outdent" still applies). -/// This is because the indentation is only increased starting at the second line of the node. -fn calculate_indentation( - query: &IndentQuery, - node: Option<Node>, - line: usize, - newline: bool, -) -> usize { - let mut increment: isize = 0; - - let mut node = match node { - Some(node) => node, - None => return 0, - }; +/// The total indent for some line of code. +/// This is usually constructed in one of 2 ways: +/// - Successively add indent captures to get the (added) indent from a single line +/// - Successively add the indent results for each line +#[derive(Default)] +struct Indentation { + /// The total indent (the number of indent levels) is defined as max(0, indent-outdent). + /// The string that this results in depends on the indent style (spaces or tabs, etc.) + indent: usize, + outdent: usize, +} +impl Indentation { + /// Add some other [IndentResult] to this. + /// The added indent should be the total added indent from one line + fn add_line(&mut self, added: &Indentation) { + if added.indent > 0 && added.outdent == 0 { + self.indent += 1; + } else if added.outdent > 0 && added.indent == 0 { + self.outdent += 1; + } + } + /// Add an indent capture to this indent. + /// All the captures that are added in this way should be on the same line. + fn add_capture(&mut self, added: IndentCaptureType) { + match added { + IndentCaptureType::Indent => { + self.indent = 1; + } + IndentCaptureType::Outdent => { + self.outdent = 1; + } + } + } + fn as_string(&self, indent_style: &IndentStyle) -> String { + let indent_level = if self.indent >= self.outdent { + self.indent - self.outdent + } else { + log::warn!("Encountered more outdent than indent nodes while calculating indentation: {} outdent, {} indent", self.outdent, self.indent); + 0 + }; + indent_style.as_str().repeat(indent_level) + } +} - let mut current_line = line; - let mut consider_indent = newline; - let mut increment_from_line: isize = 0; +/// An indent definition which corresponds to a capture from the indent query +struct IndentCapture { + capture_type: IndentCaptureType, + scope: IndentScope, +} +#[derive(Clone, Copy)] +enum IndentCaptureType { + Indent, + Outdent, +} +impl IndentCaptureType { + fn default_scope(&self) -> IndentScope { + match self { + IndentCaptureType::Indent => IndentScope::Tail, + IndentCaptureType::Outdent => IndentScope::All, + } + } +} +/// This defines which part of a node an [IndentCapture] applies to. +/// Each [IndentCaptureType] has a default scope, but the scope can be changed +/// with `#set!` property declarations. +#[derive(Clone, Copy)] +enum IndentScope { + /// The indent applies to the whole node + All, + /// The indent applies to everything except for the first line of the node + Tail, +} - loop { - let node_kind = node.kind(); - let start = node.start_position().row; - if current_line != start { - // Indent/dedent by at most one per line: - // .map(|a| { <-- ({ is two scopes - // let len = 1; <-- indents one level - // }) <-- }) is two scopes - if consider_indent || increment_from_line < 0 { - increment += increment_from_line.signum(); +/// Execute the indent query. +/// Returns for each node (identified by its id) a list of indent captures for that node. +fn query_indents( + query: &Query, + syntax: &Syntax, + cursor: &mut QueryCursor, + text: RopeSlice, + range: std::ops::Range<usize>, + // Position of the (optional) newly inserted line break. + // Given as (line, byte_pos) + new_line_break: Option<(usize, usize)>, +) -> HashMap<usize, Vec<IndentCapture>> { + let mut indent_captures: HashMap<usize, Vec<IndentCapture>> = HashMap::new(); + cursor.set_byte_range(range); + // Iterate over all captures from the query + for m in cursor.matches(query, syntax.tree().root_node(), RopeProvider(text)) { + // Skip matches where not all custom predicates are fulfilled + if !query.general_predicates(m.pattern_index).iter().all(|pred| { + match pred.operator.as_ref() { + "not-kind-eq?" => match (pred.args.get(0), pred.args.get(1)) { + ( + Some(QueryPredicateArg::Capture(capture_idx)), + Some(QueryPredicateArg::String(kind)), + ) => { + let node = m.nodes_for_capture_index(*capture_idx).next(); + match node { + Some(node) => node.kind()!=kind.as_ref(), + _ => true, + } + } + _ => { + panic!("Invalid indent query: Arguments to \"not-kind-eq?\" must be a capture and a string"); + } + }, + "same-line?" | "not-same-line?" => { + match (pred.args.get(0), pred.args.get(1)) { + ( + Some(QueryPredicateArg::Capture(capt1)), + Some(QueryPredicateArg::Capture(capt2)) + ) => { + let get_line_num = |node: Node| { + let mut node_line = node.start_position().row; + // Adjust for the new line that will be inserted + if let Some((line, byte)) = new_line_break { + if node_line==line && node.start_byte()>=byte { + node_line += 1; + } + } + node_line + }; + let n1 = m.nodes_for_capture_index(*capt1).next(); + let n2 = m.nodes_for_capture_index(*capt2).next(); + match (n1, n2) { + (Some(n1), Some(n2)) => { + let same_line = get_line_num(n1)==get_line_num(n2); + same_line==(pred.operator.as_ref()=="same-line?") + } + _ => true, + } + } + _ => { + panic!("Invalid indent query: Arguments to \"{}\" must be 2 captures", pred.operator); + } + } + } + _ => { + panic!( + "Invalid indent query: Unknown predicate (\"{}\")", + pred.operator + ); + } } - increment_from_line = 0; - current_line = start; - consider_indent = true; + }) { + continue; } - - if query.outdent.contains(node_kind) { - increment_from_line -= 1; + for capture in m.captures { + let capture_type = query.capture_names()[capture.index as usize].as_str(); + let capture_type = match capture_type { + "indent" => IndentCaptureType::Indent, + "outdent" => IndentCaptureType::Outdent, + _ => { + // Ignore any unknown captures (these may be needed for predicates such as #match?) + continue; + } + }; + let scope = capture_type.default_scope(); + let mut indent_capture = IndentCapture { + capture_type, + scope, + }; + // Apply additional settings for this capture + for property in query.property_settings(m.pattern_index) { + match property.key.as_ref() { + "scope" => { + indent_capture.scope = match property.value.as_deref() { + Some("all") => IndentScope::All, + Some("tail") => IndentScope::Tail, + Some(s) => { + panic!("Invalid indent query: Unknown value for \"scope\" property (\"{}\")", s); + } + None => { + panic!( + "Invalid indent query: Missing value for \"scope\" property" + ); + } + } + } + _ => { + panic!( + "Invalid indent query: Unknown property \"{}\"", + property.key + ); + } + } + } + indent_captures + .entry(capture.node.id()) + // Most entries only need to contain a single IndentCapture + .or_insert_with(|| Vec::with_capacity(1)) + .push(indent_capture); } - if query.indent.contains(node_kind) { - increment_from_line += 1; + } + indent_captures +} + +/// Use the syntax tree to determine the indentation for a given position. +/// This can be used in 2 ways: +/// +/// - To get the correct indentation for an existing line (new_line=false), not necessarily equal to the current indentation. +/// - In this case, pos should be inside the first tree-sitter node on that line. +/// In most cases, this can just be the first non-whitespace on that line. +/// - To get the indentation for a new line (new_line=true). This behaves like the first usecase if the part of the current line +/// after pos were moved to a new line. +/// +/// The indentation is determined by traversing all the tree-sitter nodes containing the position. +/// Each of these nodes produces some [AddedIndent] for: +/// +/// - The line of the (beginning of the) node. This is defined by the scope `all` if this is the first node on its line. +/// - The line after the node. This is defined by: +/// - The scope `tail`. +/// - The scope `all` if this node is not the first node on its line. +/// Intuitively, `all` applies to everything contained in this node while `tail` applies to everything except for the first line of the node. +/// The indents from different nodes for the same line are then combined. +/// The [IndentResult] is simply the sum of the [AddedIndent] for all lines. +/// +/// Specifying which line exactly an [AddedIndent] applies to is important because indents on the same line combine differently than indents on different lines: +/// ```ignore +/// some_function(|| { +/// // Both the function parameters as well as the contained block should be indented. +/// // Because they are on the same line, this only yields one indent level +/// }); +/// ``` +/// +/// ```ignore +/// some_function( +/// parm1, +/// || { +/// // Here we get 2 indent levels because the 'parameters' and the 'block' node begin on different lines +/// }, +/// ); +/// ``` +pub fn treesitter_indent_for_pos( + query: &Query, + syntax: &Syntax, + indent_style: &IndentStyle, + text: RopeSlice, + line: usize, + pos: usize, + new_line: bool, +) -> Option<String> { + let byte_pos = text.char_to_byte(pos); + let mut node = syntax + .tree() + .root_node() + .descendant_for_byte_range(byte_pos, byte_pos)?; + let mut first_in_line = get_first_in_line(node, byte_pos, new_line); + let new_line_break = if new_line { + Some((line, byte_pos)) + } else { + None + }; + let query_result = crate::syntax::PARSER.with(|ts_parser| { + let mut ts_parser = ts_parser.borrow_mut(); + let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new); + let query_result = query_indents( + query, + syntax, + &mut cursor, + text, + byte_pos..byte_pos + 1, + new_line_break, + ); + ts_parser.cursors.push(cursor); + query_result + }); + + let mut result = Indentation::default(); + // We always keep track of all the indent changes on one line, in order to only indent once + // even if there are multiple "indent" nodes on the same line + let mut indent_for_line = Indentation::default(); + let mut indent_for_line_below = Indentation::default(); + loop { + // This can safely be unwrapped because `first_in_line` contains + // one entry for each ancestor of the node (which is what we iterate over) + let is_first = *first_in_line.last().unwrap(); + // Apply all indent definitions for this node + if let Some(definitions) = query_result.get(&node.id()) { + for definition in definitions { + match definition.scope { + IndentScope::All => { + if is_first { + indent_for_line.add_capture(definition.capture_type); + } else { + indent_for_line_below.add_capture(definition.capture_type); + } + } + IndentScope::Tail => { + indent_for_line_below.add_capture(definition.capture_type); + } + } + } } if let Some(parent) = node.parent() { + let mut node_line = node.start_position().row; + let mut parent_line = parent.start_position().row; + if node_line == line && new_line { + // Also consider the line that will be inserted + if node.start_byte() >= byte_pos { + node_line += 1; + } + if parent.start_byte() >= byte_pos { + parent_line += 1; + } + }; + if node_line != parent_line { + if node_line < line + (new_line as usize) { + // Don't add indent for the line below the line of the query + result.add_line(&indent_for_line_below); + } + if node_line == parent_line + 1 { + indent_for_line_below = indent_for_line; + } else { + result.add_line(&indent_for_line); + indent_for_line_below = Indentation::default(); + } + indent_for_line = Indentation::default(); + } + node = parent; + first_in_line.pop(); } else { + result.add_line(&indent_for_line_below); + result.add_line(&indent_for_line); break; } } - if consider_indent || increment_from_line < 0 { - increment += increment_from_line.signum(); - } - increment.max(0) as usize + Some(result.as_string(indent_style)) } -// TODO: two usecases: if we are triggering this for a new, blank line: -// - it should return 0 when mass indenting stuff -// - it should look up the wrapper node and count it too when we press o/O -pub fn suggested_indent_for_pos( +/// Returns the indentation for a new line. +/// This is done either using treesitter, or if that's not available by copying the indentation from the current line +#[allow(clippy::too_many_arguments)] +pub fn indent_for_newline( language_config: Option<&LanguageConfiguration>, syntax: Option<&Syntax>, + indent_style: &IndentStyle, + tab_width: usize, text: RopeSlice, - pos: usize, - line: usize, - new_line: bool, -) -> Option<usize> { + line_before: usize, + line_before_end_pos: usize, + current_line: usize, +) -> String { if let (Some(query), Some(syntax)) = ( language_config.and_then(|config| config.indent_query()), syntax, ) { - let byte_start = text.char_to_byte(pos); - let node = get_highest_syntax_node_at_bytepos(syntax, byte_start); - // TODO: special case for comments - // TODO: if preserve_leading_whitespace - Some(calculate_indentation(query, node, line, new_line)) - } else { - None + if let Some(indent) = treesitter_indent_for_pos( + query, + syntax, + indent_style, + text, + line_before, + line_before_end_pos, + true, + ) { + return indent; + }; } + let indent_level = indent_level_for_line(text.line(current_line), tab_width); + indent_style.as_str().repeat(indent_level) } pub fn get_scopes(syntax: Option<&Syntax>, text: RopeSlice, pos: usize) -> Vec<&'static str> { @@ -326,156 +632,4 @@ mod test { let line = Rope::from("\t \tfn new"); // 1 tab, 4 spaces, tab assert_eq!(indent_level_for_line(line.slice(..), tab_width), 3); } - - #[test] - fn test_suggested_indent_for_line() { - let doc = Rope::from( - " -use std::{ - io::{self, stdout, Stdout, Write}, - path::PathBuf, - sync::Arc, - time::Duration, -} -mod test { - fn hello_world() { - 1 + 1; - - let does_indentation_work = 1; - - let test_function = function_with_param(this_param, - that_param - ); - - let test_function = function_with_param( - this_param, - that_param - ); - - let test_function = function_with_proper_indent(param1, - param2, - ); - - let selection = Selection::new( - changes - .clone() - .map(|(start, end, text): (usize, usize, Option<Tendril>)| { - let len = text.map(|text| text.len()).unwrap() - 1; // minus newline - let pos = start + len; - Range::new(pos, pos) - }) - .collect(), - 0, - ); - - return; - } -} - -impl<A, D> MyTrait<A, D> for YourType -where - A: TraitB + TraitC, - D: TraitE + TraitF, -{ - -} -#[test] -// -match test { - Some(a) => 1, - None => { - unimplemented!() - } -} -std::panic::set_hook(Box::new(move |info| { - hook(info); -})); - -{ { { - 1 -}}} - -pub fn change<I>(document: &Document, changes: I) -> Self -where - I: IntoIterator<Item = Change> + ExactSizeIterator, -{ - [ - 1, - 2, - 3, - ]; - ( - 1, - 2 - ); - true -} -", - ); - - let doc = doc; - use crate::diagnostic::Severity; - use crate::syntax::{ - Configuration, IndentationConfiguration, LanguageConfiguration, Loader, - }; - use once_cell::sync::OnceCell; - let loader = Loader::new(Configuration { - language: vec![LanguageConfiguration { - scope: "source.rust".to_string(), - file_types: vec!["rs".to_string()], - shebangs: vec![], - language_id: "Rust".to_string(), - highlight_config: OnceCell::new(), - config: None, - // - injection_regex: None, - roots: vec![], - comment_token: None, - auto_format: false, - diagnostic_severity: Severity::Warning, - grammar: None, - language_server: None, - indent: Some(IndentationConfiguration { - tab_width: 4, - unit: String::from(" "), - }), - indent_query: OnceCell::new(), - textobject_query: OnceCell::new(), - debugger: None, - auto_pairs: None, - }], - }); - - // set runtime path so we can find the queries - let mut runtime = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); - runtime.push("../runtime"); - std::env::set_var("HELIX_RUNTIME", runtime.to_str().unwrap()); - - let language_config = loader.language_config_for_scope("source.rust").unwrap(); - let highlight_config = language_config.highlight_config(&[]).unwrap(); - let syntax = Syntax::new(&doc, highlight_config, std::sync::Arc::new(loader)); - let text = doc.slice(..); - let tab_width = 4; - - for i in 0..doc.len_lines() { - let line = text.line(i); - if let Some(pos) = crate::find_first_non_whitespace_char(line) { - let indent = indent_level_for_line(line, tab_width); - assert_eq!( - suggested_indent_for_pos( - Some(&language_config), - Some(&syntax), - text, - text.line_to_char(i) + pos, - i, - false - ), - Some(indent), - "line {}: \"{}\"", - i, - line - ); - } - } - } } diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index d3750e75..dde7e90c 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -92,7 +92,7 @@ pub struct LanguageConfiguration { pub indent: Option<IndentationConfiguration>, #[serde(skip)] - pub(crate) indent_query: OnceCell<Option<IndentQuery>>, + pub(crate) indent_query: OnceCell<Option<Query>>, #[serde(skip)] pub(crate) textobject_query: OnceCell<Option<TextObjectQuery>>, #[serde(skip_serializing_if = "Option::is_none")] @@ -220,17 +220,6 @@ impl FromStr for AutoPairConfig { } } -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct IndentQuery { - #[serde(default)] - #[serde(skip_serializing_if = "HashSet::is_empty")] - pub indent: HashSet<String>, - #[serde(default)] - #[serde(skip_serializing_if = "HashSet::is_empty")] - pub outdent: HashSet<String>, -} - #[derive(Debug)] pub struct TextObjectQuery { pub query: Query, @@ -404,13 +393,13 @@ impl LanguageConfiguration { self.highlight_config.get().is_some() } - pub fn indent_query(&self) -> Option<&IndentQuery> { + pub fn indent_query(&self) -> Option<&Query> { self.indent_query .get_or_init(|| { - let language = self.language_id.to_ascii_lowercase(); - - let toml = load_runtime_file(&language, "indents.toml").ok()?; - toml::from_slice(toml.as_bytes()).ok() + let lang_name = self.language_id.to_ascii_lowercase(); + let query_text = read_query(&lang_name, "indents.scm"); + let lang = self.highlight_config.get()?.as_ref()?.language; + Query::new(lang, &query_text).ok() }) .as_ref() } @@ -557,7 +546,7 @@ impl Loader { pub struct TsParser { parser: tree_sitter::Parser, - cursors: Vec<QueryCursor>, + pub cursors: Vec<QueryCursor>, } // could also just use a pool, or a single instance? @@ -1180,7 +1169,7 @@ struct HighlightIter<'a> { } // Adapter to convert rope chunks to bytes -struct ChunksBytes<'a> { +pub struct ChunksBytes<'a> { chunks: ropey::iter::Chunks<'a>, } impl<'a> Iterator for ChunksBytes<'a> { @@ -1190,7 +1179,7 @@ impl<'a> Iterator for ChunksBytes<'a> { } } -struct RopeProvider<'a>(RopeSlice<'a>); +pub struct RopeProvider<'a>(pub RopeSlice<'a>); impl<'a> TextProvider<'a> for RopeProvider<'a> { type I = ChunksBytes<'a>; @@ -2126,7 +2115,7 @@ mod test { #[test] fn test_load_runtime_file() { // Test to make sure we can load some data from the runtime directory. - let contents = load_runtime_file("rust", "indents.toml").unwrap(); + let contents = load_runtime_file("rust", "indents.scm").unwrap(); assert!(!contents.is_empty()); let results = load_runtime_file("rust", "does-not-exist"); |