diff options
author | Jan Hrastnik | 2021-06-16 15:00:21 +0000 |
---|---|---|
committer | Jan Hrastnik | 2021-06-16 15:00:21 +0000 |
commit | 3756c21baefa6182beaa9a3d5ced9d720cf9adcb (patch) | |
tree | 1b5593b5b74a41c42ee22958350f4795cde12509 /helix-view/src/document.rs | |
parent | a364d6c3837c36225dcd4ec9b15ef2c192feef0b (diff) |
rebase on branch line_ending_detection
Diffstat (limited to 'helix-view/src/document.rs')
-rw-r--r-- | helix-view/src/document.rs | 77 |
1 files changed, 74 insertions, 3 deletions
diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index a1c4b407..8b735b9d 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -8,7 +8,7 @@ use helix_core::{ chars::{char_is_linebreak, char_is_whitespace}, history::History, syntax::{LanguageConfiguration, LOADER}, - ChangeSet, Diagnostic, Rope, Selection, State, Syntax, Transaction, + ChangeSet, Diagnostic, History, Rope, RopeSlice, RopeGraphemes, Selection, State, Syntax, Transaction, }; use crate::{DocumentId, ViewId}; @@ -22,12 +22,28 @@ pub enum Mode { Insert, } + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum IndentStyle { Tabs, Spaces(u8), } +/// Represents one of the valid Unicode line endings. +/// Also acts as an index into `LINE_ENDINGS`. +#[derive(PartialEq, Copy, Clone, Debug)] +pub enum LineEnding { + None = 0, // No line ending + CRLF = 1, // CarriageReturn followed by LineFeed + LF = 2, // U+000A -- LineFeed + VT = 3, // U+000B -- VerticalTab + FF = 4, // U+000C -- FormFeed + CR = 5, // U+000D -- CarriageReturn + NEL = 6, // U+0085 -- NextLine + LS = 7, // U+2028 -- Line Separator + PS = 8, // U+2029 -- ParagraphSeparator +} + pub struct Document { // rope + selection pub(crate) id: DocumentId, @@ -61,6 +77,7 @@ pub struct Document { diagnostics: Vec<Diagnostic>, language_server: Option<Arc<helix_lsp::Client>>, + line_ending: LineEnding } use std::fmt; @@ -146,11 +163,61 @@ pub fn canonicalize_path(path: &Path) -> std::io::Result<PathBuf> { std::env::current_dir().map(|current_dir| normalize_path(¤t_dir.join(path))) } +pub fn auto_detect_line_ending(doc: &Rope) -> LineEnding { + // based on https://github.com/cessen/led/blob/27572c8838a1c664ee378a19358604063881cc1d/src/editor/mod.rs#L88-L162 + + let mut ending = LineEnding::None; + for line in doc.lines().take(1) { // check first line only - unsure how sound this is + // Get the line ending + ending = if line.len_chars() == 1 { + let g = RopeGraphemes::new(line.slice((line.len_chars() - 1)..)) + .last() + .unwrap(); + rope_slice_to_line_ending(&g) + } else if line.len_chars() > 1 { + let g = RopeGraphemes::new(line.slice((line.len_chars() - 2)..)) + .last() + .unwrap(); + rope_slice_to_line_ending(&g) + } else { + LineEnding::None + }; + } + ending +} + +pub fn rope_slice_to_line_ending(g: &RopeSlice) -> LineEnding { + if let Some(text) = g.as_str() { + str_to_line_ending(text) + } else if g == "\u{000D}\u{000A}" { + LineEnding::CRLF + } else { + // Not a line ending + LineEnding::None + } +} + +pub fn str_to_line_ending(g: &str) -> LineEnding { + match g { + "\u{000D}\u{000A}" => LineEnding::CRLF, + "\u{000A}" => LineEnding::LF, + "\u{000B}" => LineEnding::VT, + "\u{000C}" => LineEnding::FF, + "\u{000D}" => LineEnding::CR, + "\u{0085}" => LineEnding::NEL, + "\u{2028}" => LineEnding::LS, + "\u{2029}" => LineEnding::PS, + + // Not a line ending + _ => LineEnding::None, + } +} + use helix_lsp::lsp; use url::Url; impl Document { - pub fn new(text: Rope) -> Self { + pub fn new(text: Rope, line_ending: LineEnding) -> Self { let changes = ChangeSet::new(&text); let old_state = None; @@ -171,6 +238,7 @@ impl Document { history: Cell::new(History::default()), last_saved_revision: 0, language_server: None, + line_ending: line_ending } } @@ -190,7 +258,10 @@ impl Document { doc }; - let mut doc = Self::new(doc); + // search for line endings + let line_ending = auto_detect_line_ending(&doc); + + let mut doc = Self::new(doc, line_ending); // set the path and try detecting the language doc.set_path(&path)?; doc.detect_indent_style(); |