aboutsummaryrefslogtreecommitdiff
path: root/helix-view/src/document.rs
diff options
context:
space:
mode:
Diffstat (limited to 'helix-view/src/document.rs')
-rw-r--r--helix-view/src/document.rs170
1 files changed, 16 insertions, 154 deletions
diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs
index 9eabda46..c02d6656 100644
--- a/helix-view/src/document.rs
+++ b/helix-view/src/document.rs
@@ -9,8 +9,8 @@ use std::str::FromStr;
use std::sync::Arc;
use helix_core::{
- chars::{char_is_line_ending, char_is_whitespace},
history::History,
+ indent::{auto_detect_indent_style, IndentStyle},
line_ending::auto_detect_line_ending,
syntax::{self, LanguageConfiguration},
ChangeSet, Diagnostic, LineEnding, Rope, RopeBuilder, Selection, State, Syntax, Transaction,
@@ -63,12 +63,6 @@ impl<'de> Deserialize<'de> for Mode {
}
}
-#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
-pub enum IndentStyle {
- Tabs,
- Spaces(u8),
-}
-
pub struct Document {
pub(crate) id: DocumentId,
text: Rope,
@@ -460,9 +454,7 @@ impl Document {
doc.detect_language(theme, loader);
}
- // Detect indentation style and line ending.
- doc.detect_indent_style();
- doc.line_ending = auto_detect_line_ending(&doc.text).unwrap_or(DEFAULT_LINE_ENDING);
+ doc.detect_indent_and_line_ending();
Ok(doc)
}
@@ -580,6 +572,18 @@ impl Document {
}
}
+ pub fn detect_indent_and_line_ending(&mut self) {
+ self.indent_style = auto_detect_indent_style(&self.text).unwrap_or_else(|| {
+ IndentStyle::from_str(
+ self.language
+ .as_ref()
+ .and_then(|config| config.indent.as_ref())
+ .map_or(" ", |config| config.unit.as_str()), // Fallback to 2 spaces.
+ )
+ });
+ self.line_ending = auto_detect_line_ending(&self.text).unwrap_or(DEFAULT_LINE_ENDING);
+ }
+
/// Reload the document from its path.
pub fn reload(&mut self, view_id: ViewId) -> Result<(), Error> {
let encoding = &self.encoding;
@@ -598,9 +602,7 @@ impl Document {
self.append_changes_to_history(view_id);
self.reset_modified();
- // Detect indentation style and line ending.
- self.detect_indent_style();
- self.line_ending = auto_detect_line_ending(&self.text).unwrap_or(DEFAULT_LINE_ENDING);
+ self.detect_indent_and_line_ending();
Ok(())
}
@@ -619,132 +621,6 @@ impl Document {
self.encoding
}
- fn detect_indent_style(&mut self) {
- // Build a histogram of the indentation *increases* between
- // subsequent lines, ignoring lines that are all whitespace.
- //
- // Index 0 is for tabs, the rest are 1-8 spaces.
- let histogram: [usize; 9] = {
- let mut histogram = [0; 9];
- let mut prev_line_is_tabs = false;
- let mut prev_line_leading_count = 0usize;
-
- // Loop through the lines, checking for and recording indentation
- // increases as we go.
- 'outer: for line in self.text.lines().take(1000) {
- let mut c_iter = line.chars();
-
- // Is first character a tab or space?
- let is_tabs = match c_iter.next() {
- Some('\t') => true,
- Some(' ') => false,
-
- // Ignore blank lines.
- Some(c) if char_is_line_ending(c) => continue,
-
- _ => {
- prev_line_is_tabs = false;
- prev_line_leading_count = 0;
- continue;
- }
- };
-
- // Count the line's total leading tab/space characters.
- let mut leading_count = 1;
- let mut count_is_done = false;
- for c in c_iter {
- match c {
- '\t' if is_tabs && !count_is_done => leading_count += 1,
- ' ' if !is_tabs && !count_is_done => leading_count += 1,
-
- // We stop counting if we hit whitespace that doesn't
- // qualify as indent or doesn't match the leading
- // whitespace, but we don't exit the loop yet because
- // we still want to determine if the line is blank.
- c if char_is_whitespace(c) => count_is_done = true,
-
- // Ignore blank lines.
- c if char_is_line_ending(c) => continue 'outer,
-
- _ => break,
- }
-
- // Bound the worst-case execution time for weird text files.
- if leading_count > 256 {
- continue 'outer;
- }
- }
-
- // If there was an increase in indentation over the previous
- // line, update the histogram with that increase.
- if (prev_line_is_tabs == is_tabs || prev_line_leading_count == 0)
- && prev_line_leading_count < leading_count
- {
- if is_tabs {
- histogram[0] += 1;
- } else {
- let amount = leading_count - prev_line_leading_count;
- if amount <= 8 {
- histogram[amount] += 1;
- }
- }
- }
-
- // Store this line's leading whitespace info for use with
- // the next line.
- prev_line_is_tabs = is_tabs;
- prev_line_leading_count = leading_count;
- }
-
- // Give more weight to tabs, because their presence is a very
- // strong indicator.
- histogram[0] *= 2;
-
- histogram
- };
-
- // Find the most frequent indent, its frequency, and the frequency of
- // the next-most frequent indent.
- let indent = histogram
- .iter()
- .enumerate()
- .max_by_key(|kv| kv.1)
- .unwrap()
- .0;
- let indent_freq = histogram[indent];
- let indent_freq_2 = *histogram
- .iter()
- .enumerate()
- .filter(|kv| kv.0 != indent)
- .map(|kv| kv.1)
- .max()
- .unwrap();
-
- // Use the auto-detected result if we're confident enough in its
- // accuracy, based on some heuristics. Otherwise fall back to
- // the language-based setting.
- if indent_freq >= 1 && (indent_freq_2 as f64 / indent_freq as f64) < 0.66 {
- // Use the auto-detected setting.
- self.indent_style = match indent {
- 0 => IndentStyle::Tabs,
- _ => IndentStyle::Spaces(indent as u8),
- };
- } else {
- // Fall back to language-based setting.
- let indent = self
- .language
- .as_ref()
- .and_then(|config| config.indent.as_ref())
- .map_or(" ", |config| config.unit.as_str()); // fallback to 2 spaces
-
- self.indent_style = if indent.starts_with(' ') {
- IndentStyle::Spaces(indent.len() as u8)
- } else {
- IndentStyle::Tabs
- };
- }
- }
-
pub fn set_path(&mut self, path: &Path) -> Result<(), std::io::Error> {
let path = canonicalize_path(path)?;
@@ -1002,21 +878,7 @@ impl Document {
/// TODO: we might not need this function anymore, since the information
/// is conveniently available in `Document::indent_style` now.
pub fn indent_unit(&self) -> &'static str {
- match self.indent_style {
- IndentStyle::Tabs => "\t",
- IndentStyle::Spaces(1) => " ",
- IndentStyle::Spaces(2) => " ",
- IndentStyle::Spaces(3) => " ",
- IndentStyle::Spaces(4) => " ",
- IndentStyle::Spaces(5) => " ",
- IndentStyle::Spaces(6) => " ",
- IndentStyle::Spaces(7) => " ",
- IndentStyle::Spaces(8) => " ",
-
- // Unsupported indentation style. This should never happen,
- // but just in case fall back to two spaces.
- _ => " ",
- }
+ self.indent_style.as_str()
}
#[inline]