summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Vegdahl2021-07-29 19:10:59 +0000
committerBlaž Hrastnik2021-07-30 03:22:59 +0000
commitf88d4c1e20d4dbc244599ad3f3a5f301bec239bf (patch)
treed852622a7bae76f0fbb85b2924b1669a59fc1ed2
parente191a75e336885b4dd8c3d0a80b7b24000a0ca5a (diff)
Move indent-style code into `helix_core::indent`.
-rw-r--r--helix-core/src/indent.rs169
-rw-r--r--helix-term/src/commands.rs3
-rw-r--r--helix-view/src/document.rs170
3 files changed, 186 insertions, 156 deletions
diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs
index 5ae66769..d272dd68 100644
--- a/helix-core/src/indent.rs
+++ b/helix-core/src/indent.rs
@@ -1,10 +1,177 @@
use crate::{
+ chars::{char_is_line_ending, char_is_whitespace},
find_first_non_whitespace_char,
syntax::{IndentQuery, LanguageConfiguration, Syntax},
tree_sitter::Node,
- RopeSlice,
+ Rope, RopeSlice,
};
+/// Enum representing indentation style.
+///
+/// Only values 1-8 are valid for the `Spaces` variant.
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+pub enum IndentStyle {
+ Tabs,
+ Spaces(u8),
+}
+
+impl IndentStyle {
+ /// Creates an `IndentStyle` from an indentation string.
+ ///
+ /// For example, passing `" "` (four spaces) will create `IndentStyle::Spaces(4)`.
+ #[allow(clippy::should_implement_trait)]
+ #[inline]
+ pub fn from_str(indent: &str) -> Self {
+ // XXX: do we care about validating the input more than this? Probably not...?
+ debug_assert!(!indent.is_empty() && indent.len() <= 8);
+
+ if indent.starts_with(' ') {
+ IndentStyle::Spaces(indent.len() as u8)
+ } else {
+ IndentStyle::Tabs
+ }
+ }
+
+ #[inline]
+ pub fn as_str(&self) -> &'static str {
+ match *self {
+ IndentStyle::Tabs => "\t",
+ IndentStyle::Spaces(1) => " ",
+ IndentStyle::Spaces(2) => " ",
+ IndentStyle::Spaces(3) => " ",
+ IndentStyle::Spaces(4) => " ",
+ IndentStyle::Spaces(5) => " ",
+ IndentStyle::Spaces(6) => " ",
+ IndentStyle::Spaces(7) => " ",
+ IndentStyle::Spaces(8) => " ",
+
+ // Unsupported indentation style. This should never happen,
+ // but just in case fall back to two spaces.
+ IndentStyle::Spaces(n) => {
+ debug_assert!(n > 0 && n <= 8); // Always triggers. `debug_panic!()` wanted.
+ " "
+ }
+ }
+ }
+}
+
+/// Attempts to detect the indentation style used in a document.
+///
+/// Returns the indentation style if the auto-detect confidence is
+/// reasonably high, otherwise returns `None`.
+pub fn auto_detect_indent_style(document_text: &Rope) -> Option<IndentStyle> {
+ // Build a histogram of the indentation *increases* between
+ // subsequent lines, ignoring lines that are all whitespace.
+ //
+ // Index 0 is for tabs, the rest are 1-8 spaces.
+ let histogram: [usize; 9] = {
+ let mut histogram = [0; 9];
+ let mut prev_line_is_tabs = false;
+ let mut prev_line_leading_count = 0usize;
+
+ // Loop through the lines, checking for and recording indentation
+ // increases as we go.
+ 'outer: for line in document_text.lines().take(1000) {
+ let mut c_iter = line.chars();
+
+ // Is first character a tab or space?
+ let is_tabs = match c_iter.next() {
+ Some('\t') => true,
+ Some(' ') => false,
+
+ // Ignore blank lines.
+ Some(c) if char_is_line_ending(c) => continue,
+
+ _ => {
+ prev_line_is_tabs = false;
+ prev_line_leading_count = 0;
+ continue;
+ }
+ };
+
+ // Count the line's total leading tab/space characters.
+ let mut leading_count = 1;
+ let mut count_is_done = false;
+ for c in c_iter {
+ match c {
+ '\t' if is_tabs && !count_is_done => leading_count += 1,
+ ' ' if !is_tabs && !count_is_done => leading_count += 1,
+
+ // We stop counting if we hit whitespace that doesn't
+ // qualify as indent or doesn't match the leading
+ // whitespace, but we don't exit the loop yet because
+ // we still want to determine if the line is blank.
+ c if char_is_whitespace(c) => count_is_done = true,
+
+ // Ignore blank lines.
+ c if char_is_line_ending(c) => continue 'outer,
+
+ _ => break,
+ }
+
+ // Bound the worst-case execution time for weird text files.
+ if leading_count > 256 {
+ continue 'outer;
+ }
+ }
+
+ // If there was an increase in indentation over the previous
+ // line, update the histogram with that increase.
+ if (prev_line_is_tabs == is_tabs || prev_line_leading_count == 0)
+ && prev_line_leading_count < leading_count
+ {
+ if is_tabs {
+ histogram[0] += 1;
+ } else {
+ let amount = leading_count - prev_line_leading_count;
+ if amount <= 8 {
+ histogram[amount] += 1;
+ }
+ }
+ }
+
+ // Store this line's leading whitespace info for use with
+ // the next line.
+ prev_line_is_tabs = is_tabs;
+ prev_line_leading_count = leading_count;
+ }
+
+ // Give more weight to tabs, because their presence is a very
+ // strong indicator.
+ histogram[0] *= 2;
+
+ histogram
+ };
+
+ // Find the most frequent indent, its frequency, and the frequency of
+ // the next-most frequent indent.
+ let indent = histogram
+ .iter()
+ .enumerate()
+ .max_by_key(|kv| kv.1)
+ .unwrap()
+ .0;
+ let indent_freq = histogram[indent];
+ let indent_freq_2 = *histogram
+ .iter()
+ .enumerate()
+ .filter(|kv| kv.0 != indent)
+ .map(|kv| kv.1)
+ .max()
+ .unwrap();
+
+ // Return the the auto-detected result if we're confident enough in its
+ // accuracy, based on some heuristics.
+ if indent_freq >= 1 && (indent_freq_2 as f64 / indent_freq as f64) < 0.66 {
+ Some(match indent {
+ 0 => IndentStyle::Tabs,
+ _ => IndentStyle::Spaces(indent as u8),
+ })
+ } else {
+ None
+ }
+}
+
/// To determine indentation of a newly inserted line, figure out the indentation at the last col
/// of the previous line.
#[allow(dead_code)]
diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs
index 2f071306..7403f5b2 100644
--- a/helix-term/src/commands.rs
+++ b/helix-term/src/commands.rs
@@ -1,5 +1,6 @@
use helix_core::{
comment, coords_at_pos, find_first_non_whitespace_char, find_root, graphemes, indent,
+ indent::IndentStyle,
line_ending::{get_line_ending_of_str, line_end_char_index, str_is_line_ending},
match_brackets,
movement::{self, Direction},
@@ -11,7 +12,7 @@ use helix_core::{
};
use helix_view::{
- document::{IndentStyle, Mode},
+ document::Mode,
editor::Action,
input::KeyEvent,
keyboard::KeyCode,
diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs
index 9eabda46..c02d6656 100644
--- a/helix-view/src/document.rs
+++ b/helix-view/src/document.rs
@@ -9,8 +9,8 @@ use std::str::FromStr;
use std::sync::Arc;
use helix_core::{
- chars::{char_is_line_ending, char_is_whitespace},
history::History,
+ indent::{auto_detect_indent_style, IndentStyle},
line_ending::auto_detect_line_ending,
syntax::{self, LanguageConfiguration},
ChangeSet, Diagnostic, LineEnding, Rope, RopeBuilder, Selection, State, Syntax, Transaction,
@@ -63,12 +63,6 @@ impl<'de> Deserialize<'de> for Mode {
}
}
-#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
-pub enum IndentStyle {
- Tabs,
- Spaces(u8),
-}
-
pub struct Document {
pub(crate) id: DocumentId,
text: Rope,
@@ -460,9 +454,7 @@ impl Document {
doc.detect_language(theme, loader);
}
- // Detect indentation style and line ending.
- doc.detect_indent_style();
- doc.line_ending = auto_detect_line_ending(&doc.text).unwrap_or(DEFAULT_LINE_ENDING);
+ doc.detect_indent_and_line_ending();
Ok(doc)
}
@@ -580,6 +572,18 @@ impl Document {
}
}
+ pub fn detect_indent_and_line_ending(&mut self) {
+ self.indent_style = auto_detect_indent_style(&self.text).unwrap_or_else(|| {
+ IndentStyle::from_str(
+ self.language
+ .as_ref()
+ .and_then(|config| config.indent.as_ref())
+ .map_or(" ", |config| config.unit.as_str()), // Fallback to 2 spaces.
+ )
+ });
+ self.line_ending = auto_detect_line_ending(&self.text).unwrap_or(DEFAULT_LINE_ENDING);
+ }
+
/// Reload the document from its path.
pub fn reload(&mut self, view_id: ViewId) -> Result<(), Error> {
let encoding = &self.encoding;
@@ -598,9 +602,7 @@ impl Document {
self.append_changes_to_history(view_id);
self.reset_modified();
- // Detect indentation style and line ending.
- self.detect_indent_style();
- self.line_ending = auto_detect_line_ending(&self.text).unwrap_or(DEFAULT_LINE_ENDING);
+ self.detect_indent_and_line_ending();
Ok(())
}
@@ -619,132 +621,6 @@ impl Document {
self.encoding
}
- fn detect_indent_style(&mut self) {
- // Build a histogram of the indentation *increases* between
- // subsequent lines, ignoring lines that are all whitespace.
- //
- // Index 0 is for tabs, the rest are 1-8 spaces.
- let histogram: [usize; 9] = {
- let mut histogram = [0; 9];
- let mut prev_line_is_tabs = false;
- let mut prev_line_leading_count = 0usize;
-
- // Loop through the lines, checking for and recording indentation
- // increases as we go.
- 'outer: for line in self.text.lines().take(1000) {
- let mut c_iter = line.chars();
-
- // Is first character a tab or space?
- let is_tabs = match c_iter.next() {
- Some('\t') => true,
- Some(' ') => false,
-
- // Ignore blank lines.
- Some(c) if char_is_line_ending(c) => continue,
-
- _ => {
- prev_line_is_tabs = false;
- prev_line_leading_count = 0;
- continue;
- }
- };
-
- // Count the line's total leading tab/space characters.
- let mut leading_count = 1;
- let mut count_is_done = false;
- for c in c_iter {
- match c {
- '\t' if is_tabs && !count_is_done => leading_count += 1,
- ' ' if !is_tabs && !count_is_done => leading_count += 1,
-
- // We stop counting if we hit whitespace that doesn't
- // qualify as indent or doesn't match the leading
- // whitespace, but we don't exit the loop yet because
- // we still want to determine if the line is blank.
- c if char_is_whitespace(c) => count_is_done = true,
-
- // Ignore blank lines.
- c if char_is_line_ending(c) => continue 'outer,
-
- _ => break,
- }
-
- // Bound the worst-case execution time for weird text files.
- if leading_count > 256 {
- continue 'outer;
- }
- }
-
- // If there was an increase in indentation over the previous
- // line, update the histogram with that increase.
- if (prev_line_is_tabs == is_tabs || prev_line_leading_count == 0)
- && prev_line_leading_count < leading_count
- {
- if is_tabs {
- histogram[0] += 1;
- } else {
- let amount = leading_count - prev_line_leading_count;
- if amount <= 8 {
- histogram[amount] += 1;
- }
- }
- }
-
- // Store this line's leading whitespace info for use with
- // the next line.
- prev_line_is_tabs = is_tabs;
- prev_line_leading_count = leading_count;
- }
-
- // Give more weight to tabs, because their presence is a very
- // strong indicator.
- histogram[0] *= 2;
-
- histogram
- };
-
- // Find the most frequent indent, its frequency, and the frequency of
- // the next-most frequent indent.
- let indent = histogram
- .iter()
- .enumerate()
- .max_by_key(|kv| kv.1)
- .unwrap()
- .0;
- let indent_freq = histogram[indent];
- let indent_freq_2 = *histogram
- .iter()
- .enumerate()
- .filter(|kv| kv.0 != indent)
- .map(|kv| kv.1)
- .max()
- .unwrap();
-
- // Use the auto-detected result if we're confident enough in its
- // accuracy, based on some heuristics. Otherwise fall back to
- // the language-based setting.
- if indent_freq >= 1 && (indent_freq_2 as f64 / indent_freq as f64) < 0.66 {
- // Use the auto-detected setting.
- self.indent_style = match indent {
- 0 => IndentStyle::Tabs,
- _ => IndentStyle::Spaces(indent as u8),
- };
- } else {
- // Fall back to language-based setting.
- let indent = self
- .language
- .as_ref()
- .and_then(|config| config.indent.as_ref())
- .map_or(" ", |config| config.unit.as_str()); // fallback to 2 spaces
-
- self.indent_style = if indent.starts_with(' ') {
- IndentStyle::Spaces(indent.len() as u8)
- } else {
- IndentStyle::Tabs
- };
- }
- }
-
pub fn set_path(&mut self, path: &Path) -> Result<(), std::io::Error> {
let path = canonicalize_path(path)?;
@@ -1002,21 +878,7 @@ impl Document {
/// TODO: we might not need this function anymore, since the information
/// is conveniently available in `Document::indent_style` now.
pub fn indent_unit(&self) -> &'static str {
- match self.indent_style {
- IndentStyle::Tabs => "\t",
- IndentStyle::Spaces(1) => " ",
- IndentStyle::Spaces(2) => " ",
- IndentStyle::Spaces(3) => " ",
- IndentStyle::Spaces(4) => " ",
- IndentStyle::Spaces(5) => " ",
- IndentStyle::Spaces(6) => " ",
- IndentStyle::Spaces(7) => " ",
- IndentStyle::Spaces(8) => " ",
-
- // Unsupported indentation style. This should never happen,
- // but just in case fall back to two spaces.
- _ => " ",
- }
+ self.indent_style.as_str()
}
#[inline]