use std::collections::HashMap; use tree_sitter::{Query, QueryCursor, QueryPredicateArg}; use crate::{ chars::{char_is_line_ending, char_is_whitespace}, syntax::{LanguageConfiguration, RopeProvider, Syntax}, tree_sitter::Node, Rope, RopeSlice, }; /// Enum representing indentation style. /// /// Only values 1-8 are valid for the `Spaces` variant. #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum IndentStyle { Tabs, Spaces(u8), } impl IndentStyle { /// Creates an `IndentStyle` from an indentation string. /// /// For example, passing `" "` (four spaces) will create `IndentStyle::Spaces(4)`. #[allow(clippy::should_implement_trait)] #[inline] pub fn from_str(indent: &str) -> Self { // XXX: do we care about validating the input more than this? Probably not...? debug_assert!(!indent.is_empty() && indent.len() <= 8); if indent.starts_with(' ') { IndentStyle::Spaces(indent.len() as u8) } else { IndentStyle::Tabs } } #[inline] pub fn as_str(&self) -> &'static str { match *self { IndentStyle::Tabs => "\t", IndentStyle::Spaces(1) => " ", IndentStyle::Spaces(2) => " ", IndentStyle::Spaces(3) => " ", IndentStyle::Spaces(4) => " ", IndentStyle::Spaces(5) => " ", IndentStyle::Spaces(6) => " ", IndentStyle::Spaces(7) => " ", IndentStyle::Spaces(8) => " ", // Unsupported indentation style. This should never happen, // but just in case fall back to two spaces. IndentStyle::Spaces(n) => { debug_assert!(n > 0 && n <= 8); // Always triggers. `debug_panic!()` wanted. " " } } } } /// Attempts to detect the indentation style used in a document. /// /// Returns the indentation style if the auto-detect confidence is /// reasonably high, otherwise returns `None`. pub fn auto_detect_indent_style(document_text: &Rope) -> Option<IndentStyle> { // Build a histogram of the indentation *increases* between // subsequent lines, ignoring lines that are all whitespace. // // Index 0 is for tabs, the rest are 1-8 spaces. let histogram: [usize; 9] = { let mut histogram = [0; 9]; let mut prev_line_is_tabs = false; let mut prev_line_leading_count = 0usize; // Loop through the lines, checking for and recording indentation // increases as we go. 'outer: for line in document_text.lines().take(1000) { let mut c_iter = line.chars(); // Is first character a tab or space? let is_tabs = match c_iter.next() { Some('\t') => true, Some(' ') => false, // Ignore blank lines. Some(c) if char_is_line_ending(c) => continue, _ => { prev_line_is_tabs = false; prev_line_leading_count = 0; continue; } }; // Count the line's total leading tab/space characters. let mut leading_count = 1; let mut count_is_done = false; for c in c_iter { match c { '\t' if is_tabs && !count_is_done => leading_count += 1, ' ' if !is_tabs && !count_is_done => leading_count += 1, // We stop counting if we hit whitespace that doesn't // qualify as indent or doesn't match the leading // whitespace, but we don't exit the loop yet because // we still want to determine if the line is blank. c if char_is_whitespace(c) => count_is_done = true, // Ignore blank lines. c if char_is_line_ending(c) => continue 'outer, _ => break, } // Bound the worst-case execution time for weird text files. if leading_count > 256 { continue 'outer; } } // If there was an increase in indentation over the previous // line, update the histogram with that increase. if (prev_line_is_tabs == is_tabs || prev_line_leading_count == 0) && prev_line_leading_count < leading_count { if is_tabs { histogram[0] += 1; } else { let amount = leading_count - prev_line_leading_count; if amount <= 8 { histogram[amount] += 1; } } } // Store this line's leading whitespace info for use with // the next line. prev_line_is_tabs = is_tabs; prev_line_leading_count = leading_count; } // Give more weight to tabs, because their presence is a very // strong indicator. histogram[0] *= 2; histogram }; // Find the most frequent indent, its frequency, and the frequency of // the next-most frequent indent. let indent = histogram .iter() .enumerate() .max_by_key(|kv| kv.1) .unwrap() .0; let indent_freq = histogram[indent]; let indent_freq_2 = *histogram .iter() .enumerate() .filter(|kv| kv.0 != indent) .map(|kv| kv.1) .max() .unwrap(); // Return the the auto-detected result if we're confident enough in its // accuracy, based on some heuristics. if indent_freq >= 1 && (indent_freq_2 as f64 / indent_freq as f64) < 0.66 { Some(match indent { 0 => IndentStyle::Tabs, _ => IndentStyle::Spaces(indent as u8), }) } else { None } } /// To determine indentation of a newly inserted line, figure out the indentation at the last col /// of the previous line. pub fn indent_level_for_line(line: RopeSlice, tab_width: usize) -> usize { let mut len = 0; for ch in line.chars() { match ch { '\t' => len += tab_width, ' ' => len += 1, _ => break, } } len / tab_width } /// Computes for node and all ancestors whether they are the first node on their line. /// The first entry in the return value represents the root node, the last one the node itself fn get_first_in_line(mut node: Node, byte_pos: usize, new_line: bool) -> Vec<bool> { let mut first_in_line = Vec::new(); loop { if let Some(prev) = node.prev_sibling() { // If we insert a new line, the first node at/after the cursor is considered to be the first in its line let first = prev.end_position().row != node.start_position().row || (new_line && node.start_byte() >= byte_pos && prev.start_byte() < byte_pos); first_in_line.push(Some(first)); } else { // Nodes that have no previous siblings are first in their line if and only if their parent is // (which we don't know yet) first_in_line.push(None); } if let Some(parent) = node.parent() { node = parent; } else { break; } } let mut result = Vec::with_capacity(first_in_line.len()); let mut parent_is_first = true; // The root node is by definition the first node in its line for first in first_in_line.into_iter().rev() { if let Some(first) = first { result.push(first); parent_is_first = first; } else { result.push(parent_is_first); } } result } /// The total indent for some line of code. /// This is usually constructed in one of 2 ways: /// - Successively add indent captures to get the (added) indent from a single line /// - Successively add the indent results for each line #[derive(Default)] struct Indentation { /// The total indent (the number of indent levels) is defined as max(0, indent-outdent). /// The string that this results in depends on the indent style (spaces or tabs, etc.) indent: usize, outdent: usize, } impl Indentation { /// Add some other [IndentResult] to this. /// The added indent should be the total added indent from one line fn add_line(&mut self, added: &Indentation) { if added.indent > 0 && added.outdent == 0 { self.indent += 1; } else if added.outdent > 0 && added.indent == 0 { self.outdent += 1; } } /// Add an indent capture to this indent. /// All the captures that are added in this way should be on the same line. fn add_capture(&mut self, added: IndentCaptureType) { match added { IndentCaptureType::Indent => { self.indent = 1; } IndentCaptureType::Outdent => { self.outdent = 1; } } } fn as_string(&self, indent_style: &IndentStyle) -> String { let indent_level = if self.indent >= self.outdent { self.indent - self.outdent } else { log::warn!("Encountered more outdent than indent nodes while calculating indentation: {} outdent, {} indent", self.outdent, self.indent); 0 }; indent_style.as_str().repeat(indent_level) } } /// An indent definition which corresponds to a capture from the indent query struct IndentCapture { capture_type: IndentCaptureType, scope: IndentScope, } #[derive(Clone, Copy)] enum IndentCaptureType { Indent, Outdent, } impl IndentCaptureType { fn default_scope(&self) -> IndentScope { match self { IndentCaptureType::Indent => IndentScope::Tail, IndentCaptureType::Outdent => IndentScope::All, } } } /// This defines which part of a node an [IndentCapture] applies to. /// Each [IndentCaptureType] has a default scope, but the scope can be changed /// with `#set!` property declarations. #[derive(Clone, Copy)] enum IndentScope { /// The indent applies to the whole node All, /// The indent applies to everything except for the first line of the node Tail, } /// Execute the indent query. /// Returns for each node (identified by its id) a list of indent captures for that node. fn query_indents( query: &Query, syntax: &Syntax, cursor: &mut QueryCursor, text: RopeSlice, range: std::ops::Range<usize>, // Position of the (optional) newly inserted line break. // Given as (line, byte_pos) new_line_break: Option<(usize, usize)>, ) -> HashMap<usize, Vec<IndentCapture>> { let mut indent_captures: HashMap<usize, Vec<IndentCapture>> = HashMap::new(); cursor.set_byte_range(range); // Iterate over all captures from the query for m in cursor.matches(query, syntax.tree().root_node(), RopeProvider(text)) { // Skip matches where not all custom predicates are fulfilled if !query.general_predicates(m.pattern_index).iter().all(|pred| { match pred.operator.as_ref() { "not-kind-eq?" => match (pred.args.get(0), pred.args.get(1)) { ( Some(QueryPredicateArg::Capture(capture_idx)), Some(QueryPredicateArg::String(kind)), ) => { let node = m.nodes_for_capture_index(*capture_idx).next(); match node { Some(node) => node.kind()!=kind.as_ref(), _ => true, } } _ => { panic!("Invalid indent query: Arguments to \"not-kind-eq?\" must be a capture and a string"); } }, "same-line?" | "not-same-line?" => { match (pred.args.get(0), pred.args.get(1)) { ( Some(QueryPredicateArg::Capture(capt1)), Some(QueryPredicateArg::Capture(capt2)) ) => { let get_line_num = |node: Node| { let mut node_line = node.start_position().row; // Adjust for the new line that will be inserted if let Some((line, byte)) = new_line_break { if node_line==line && node.start_byte()>=byte { node_line += 1; } } node_line }; let n1 = m.nodes_for_capture_index(*capt1).next(); let n2 = m.nodes_for_capture_index(*capt2).next(); match (n1, n2) { (Some(n1), Some(n2)) => { let same_line = get_line_num(n1)==get_line_num(n2); same_line==(pred.operator.as_ref()=="same-line?") } _ => true, } } _ => { panic!("Invalid indent query: Arguments to \"{}\" must be 2 captures", pred.operator); } } } _ => { panic!( "Invalid indent query: Unknown predicate (\"{}\")", pred.operator ); } } }) { continue; } for capture in m.captures { let capture_type = query.capture_names()[capture.index as usize].as_str(); let capture_type = match capture_type { "indent" => IndentCaptureType::Indent, "outdent" => IndentCaptureType::Outdent, _ => { // Ignore any unknown captures (these may be needed for predicates such as #match?) continue; } }; let scope = capture_type.default_scope(); let mut indent_capture = IndentCapture { capture_type, scope, }; // Apply additional settings for this capture for property in query.property_settings(m.pattern_index) { match property.key.as_ref() { "scope" => { indent_capture.scope = match property.value.as_deref() { Some("all") => IndentScope::All, Some("tail") => IndentScope::Tail, Some(s) => { panic!("Invalid indent query: Unknown value for \"scope\" property (\"{}\")", s); } None => { panic!( "Invalid indent query: Missing value for \"scope\" property" ); } } } _ => { panic!( "Invalid indent query: Unknown property \"{}\"", property.key ); } } } indent_captures .entry(capture.node.id()) // Most entries only need to contain a single IndentCapture .or_insert_with(|| Vec::with_capacity(1)) .push(indent_capture); } } indent_captures } /// Use the syntax tree to determine the indentation for a given position. /// This can be used in 2 ways: /// /// - To get the correct indentation for an existing line (new_line=false), not necessarily equal to the current indentation. /// - In this case, pos should be inside the first tree-sitter node on that line. /// In most cases, this can just be the first non-whitespace on that line. /// - To get the indentation for a new line (new_line=true). This behaves like the first usecase if the part of the current line /// after pos were moved to a new line. /// /// The indentation is determined by traversing all the tree-sitter nodes containing the position. /// Each of these nodes produces some [AddedIndent] for: /// /// - The line of the (beginning of the) node. This is defined by the scope `all` if this is the first node on its line. /// - The line after the node. This is defined by: /// - The scope `tail`. /// - The scope `all` if this node is not the first node on its line. /// Intuitively, `all` applies to everything contained in this node while `tail` applies to everything except for the first line of the node. /// The indents from different nodes for the same line are then combined. /// The [IndentResult] is simply the sum of the [AddedIndent] for all lines. /// /// Specifying which line exactly an [AddedIndent] applies to is important because indents on the same line combine differently than indents on different lines: /// ```ignore /// some_function(|| { /// // Both the function parameters as well as the contained block should be indented. /// // Because they are on the same line, this only yields one indent level /// }); /// ``` /// /// ```ignore /// some_function( /// parm1, /// || { /// // Here we get 2 indent levels because the 'parameters' and the 'block' node begin on different lines /// }, /// ); /// ``` pub fn treesitter_indent_for_pos( query: &Query, syntax: &Syntax, indent_style: &IndentStyle, text: RopeSlice, line: usize, pos: usize, new_line: bool, ) -> Option<String> { let byte_pos = text.char_to_byte(pos); let mut node = syntax .tree() .root_node() .descendant_for_byte_range(byte_pos, byte_pos)?; let mut first_in_line = get_first_in_line(node, byte_pos, new_line); let new_line_break = if new_line { Some((line, byte_pos)) } else { None }; let query_result = crate::syntax::PARSER.with(|ts_parser| { let mut ts_parser = ts_parser.borrow_mut(); let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new); let query_result = query_indents( query, syntax, &mut cursor, text, byte_pos..byte_pos + 1, new_line_break, ); ts_parser.cursors.push(cursor); query_result }); let mut result = Indentation::default(); // We always keep track of all the indent changes on one line, in order to only indent once // even if there are multiple "indent" nodes on the same line let mut indent_for_line = Indentation::default(); let mut indent_for_line_below = Indentation::default(); loop { // This can safely be unwrapped because `first_in_line` contains // one entry for each ancestor of the node (which is what we iterate over) let is_first = *first_in_line.last().unwrap(); // Apply all indent definitions for this node if let Some(definitions) = query_result.get(&node.id()) { for definition in definitions { match definition.scope { IndentScope::All => { if is_first { indent_for_line.add_capture(definition.capture_type); } else { indent_for_line_below.add_capture(definition.capture_type); } } IndentScope::Tail => { indent_for_line_below.add_capture(definition.capture_type); } } } } if let Some(parent) = node.parent() { let mut node_line = node.start_position().row; let mut parent_line = parent.start_position().row; if node_line == line && new_line { // Also consider the line that will be inserted if node.start_byte() >= byte_pos { node_line += 1; } if parent.start_byte() >= byte_pos { parent_line += 1; } }; if node_line != parent_line { if node_line < line + (new_line as usize) { // Don't add indent for the line below the line of the query result.add_line(&indent_for_line_below); } if node_line == parent_line + 1 { indent_for_line_below = indent_for_line; } else { result.add_line(&indent_for_line); indent_for_line_below = Indentation::default(); } indent_for_line = Indentation::default(); } node = parent; first_in_line.pop(); } else { result.add_line(&indent_for_line_below); result.add_line(&indent_for_line); break; } } Some(result.as_string(indent_style)) } /// Returns the indentation for a new line. /// This is done either using treesitter, or if that's not available by copying the indentation from the current line #[allow(clippy::too_many_arguments)] pub fn indent_for_newline( language_config: Option<&LanguageConfiguration>, syntax: Option<&Syntax>, indent_style: &IndentStyle, tab_width: usize, text: RopeSlice, line_before: usize, line_before_end_pos: usize, current_line: usize, ) -> String { if let (Some(query), Some(syntax)) = ( language_config.and_then(|config| config.indent_query()), syntax, ) { if let Some(indent) = treesitter_indent_for_pos( query, syntax, indent_style, text, line_before, line_before_end_pos, true, ) { return indent; }; } let indent_level = indent_level_for_line(text.line(current_line), tab_width); indent_style.as_str().repeat(indent_level) } pub fn get_scopes(syntax: Option<&Syntax>, text: RopeSlice, pos: usize) -> Vec<&'static str> { let mut scopes = Vec::new(); if let Some(syntax) = syntax { let pos = text.char_to_byte(pos); let mut node = match syntax .tree() .root_node() .descendant_for_byte_range(pos, pos) { Some(node) => node, None => return scopes, }; scopes.push(node.kind()); while let Some(parent) = node.parent() { scopes.push(parent.kind()); node = parent; } } scopes.reverse(); scopes } #[cfg(test)] mod test { use super::*; use crate::Rope; #[test] fn test_indent_level() { let tab_width = 4; let line = Rope::from(" fn new"); // 8 spaces assert_eq!(indent_level_for_line(line.slice(..), tab_width), 2); let line = Rope::from("\t\t\tfn new"); // 3 tabs assert_eq!(indent_level_for_line(line.slice(..), tab_width), 3); // mixed indentation let line = Rope::from("\t \tfn new"); // 1 tab, 4 spaces, tab assert_eq!(indent_level_for_line(line.slice(..), tab_width), 3); } }