diff options
-rw-r--r-- | helix-core/src/lib.rs | 6 | ||||
-rw-r--r-- | helix-core/src/line_ending.rs | 169 | ||||
-rw-r--r-- | helix-term/src/commands.rs | 50 | ||||
-rw-r--r-- | helix-term/src/ui/editor.rs | 4 | ||||
-rw-r--r-- | helix-view/src/document.rs | 18 |
5 files changed, 223 insertions, 24 deletions
diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 03741719..9ac506a6 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -6,6 +6,7 @@ pub mod diagnostic; pub mod graphemes; pub mod history; pub mod indent; +pub mod line_ending; pub mod macros; pub mod match_brackets; pub mod movement; @@ -102,6 +103,7 @@ pub use unicode_general_category::get_general_category; #[doc(inline)] pub use {regex, tree_sitter}; +pub use graphemes::RopeGraphemes; pub use position::{coords_at_pos, pos_at_coords, Position}; pub use selection::{Range, Selection}; pub use smallvec::SmallVec; @@ -110,4 +112,8 @@ pub use syntax::Syntax; pub use diagnostic::Diagnostic; pub use state::State; +pub use line_ending::{ + auto_detect_line_ending, get_line_ending, rope_slice_to_line_ending, LineEnding, + DEFAULT_LINE_ENDING, +}; pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction}; diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs new file mode 100644 index 00000000..2cc5b5d8 --- /dev/null +++ b/helix-core/src/line_ending.rs @@ -0,0 +1,169 @@ +use crate::{Rope, RopeGraphemes, RopeSlice}; + +/// Represents one of the valid Unicode line endings. +#[derive(PartialEq, Copy, Clone, Debug)] +pub enum LineEnding { + Crlf, // CarriageReturn followed by LineFeed + LF, // U+000A -- LineFeed + CR, // U+000D -- CarriageReturn + Nel, // U+0085 -- NextLine + LS, // U+2028 -- Line Separator + VT, // U+000B -- VerticalTab + FF, // U+000C -- FormFeed + PS, // U+2029 -- ParagraphSeparator +} + +impl LineEnding { + pub fn len_chars(&self) -> usize { + match self { + Self::Crlf => 2, + _ => 1, + } + } + + pub fn as_str(&self) -> &str { + match self { + Self::Crlf => "\u{000D}\u{000A}", + Self::LF => "\u{000A}", + Self::Nel => "\u{0085}", + Self::LS => "\u{2028}", + Self::CR => "\u{000D}", + Self::VT => "\u{000B}", + Self::FF => "\u{000C}", + Self::PS => "\u{2029}", + } + } +} + +pub fn rope_slice_to_line_ending(g: &RopeSlice) -> Option<LineEnding> { + if let Some(text) = g.as_str() { + str_to_line_ending(text) + } else if g == "\u{000D}\u{000A}" { + Some(LineEnding::Crlf) + } else { + // Not a line ending + None + } +} + +pub fn str_to_line_ending(g: &str) -> Option<LineEnding> { + match g { + "\u{000D}\u{000A}" => Some(LineEnding::Crlf), + "\u{000A}" => Some(LineEnding::LF), + "\u{000D}" => Some(LineEnding::CR), + "\u{0085}" => Some(LineEnding::Nel), + "\u{2028}" => Some(LineEnding::LS), + "\u{000B}" => Some(LineEnding::VT), + "\u{000C}" => Some(LineEnding::FF), + "\u{2029}" => Some(LineEnding::PS), + // Not a line ending + _ => None, + } +} + +pub fn auto_detect_line_ending(doc: &Rope) -> Option<LineEnding> { + // based on https://github.com/cessen/led/blob/27572c8838a1c664ee378a19358604063881cc1d/src/editor/mod.rs#L88-L162 + + let mut ending = None; + // return first matched line ending. Not all possible line endings are being matched, as they might be special-use only + for line in doc.lines().take(100) { + ending = match line.len_chars() { + 1 => { + let g = RopeGraphemes::new(line.slice((line.len_chars() - 1)..)) + .last() + .unwrap(); + rope_slice_to_line_ending(&g) + } + n if n > 1 => { + let g = RopeGraphemes::new(line.slice((line.len_chars() - 2)..)) + .last() + .unwrap(); + rope_slice_to_line_ending(&g) + } + _ => None, + }; + if ending.is_some() { + match ending { + Some(LineEnding::VT) | Some(LineEnding::FF) | Some(LineEnding::PS) => {} + _ => return ending, + } + } + } + ending +} + +/// Returns the passed line's line ending, if any. +pub fn get_line_ending(line: &RopeSlice) -> Option<LineEnding> { + // Last character as str. + let g1 = line + .slice(line.len_chars().saturating_sub(1)..) + .as_str() + .unwrap(); + + // Last two characters as str, or empty str if they're not contiguous. + // It's fine to punt on the non-contiguous case, because Ropey guarantees + // that CRLF is always contiguous. + let g2 = line + .slice(line.len_chars().saturating_sub(2)..) + .as_str() + .unwrap_or(""); + + // First check the two-character case for CRLF, then check the single-character case. + str_to_line_ending(g2).or_else(|| str_to_line_ending(g1)) +} + +#[cfg(target_os = "windows")] +pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf; +#[cfg(not(target_os = "windows"))] +pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF; + +#[cfg(test)] +mod line_ending_tests { + use super::*; + + #[test] + fn test_autodetect() { + assert_eq!( + auto_detect_line_ending(&Rope::from_str("\n")), + Some(LineEnding::LF) + ); + assert_eq!( + auto_detect_line_ending(&Rope::from_str("\r\n")), + Some(LineEnding::Crlf) + ); + assert_eq!(auto_detect_line_ending(&Rope::from_str("hello")), None); + assert_eq!(auto_detect_line_ending(&Rope::from_str("")), None); + assert_eq!( + auto_detect_line_ending(&Rope::from_str("hello\nhelix\r\n")), + Some(LineEnding::LF) + ); + assert_eq!( + auto_detect_line_ending(&Rope::from_str("a formfeed\u{000C}")), + None + ); + assert_eq!( + auto_detect_line_ending(&Rope::from_str("\n\u{000A}\n \u{000A}")), + Some(LineEnding::LF) + ); + assert_eq!( + auto_detect_line_ending(&Rope::from_str( + "a formfeed\u{000C} with a\u{000C} linefeed\u{000A}" + )), + Some(LineEnding::LF) + ); + assert_eq!(auto_detect_line_ending(&Rope::from_str("a formfeed\u{000C} with a\u{000C} carriage return linefeed\u{000D}\u{000A} and a linefeed\u{000A}")), Some(LineEnding::Crlf)); + } + + #[test] + fn test_rope_slice_to_line_ending() { + let r = Rope::from_str("\r\n"); + assert_eq!( + rope_slice_to_line_ending(&r.slice(1..2)), + Some(LineEnding::LF) + ); + assert_eq!( + rope_slice_to_line_ending(&r.slice(0..2)), + Some(LineEnding::Crlf) + ); + } +} diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 1243a86f..07d2999b 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -1,12 +1,12 @@ use helix_core::{ - comment, coords_at_pos, find_first_non_whitespace_char, find_root, graphemes, indent, - match_brackets, + comment, coords_at_pos, find_first_non_whitespace_char, find_root, get_line_ending, graphemes, + indent, match_brackets, movement::{self, Direction}, object, pos_at_coords, regex::{self, Regex}, register::{self, Register, Registers}, - search, selection, Change, ChangeSet, Position, Range, Rope, RopeSlice, Selection, SmallVec, - Tendril, Transaction, + search, selection, Change, ChangeSet, LineEnding, Position, Range, Rope, RopeSlice, Selection, + SmallVec, Tendril, Transaction, }; use helix_view::{ @@ -342,9 +342,12 @@ fn move_line_end(cx: &mut Context) { let text = doc.text(); let line = text.char_to_line(range.head); - // Line end is pos at the start of next line - 1 - // subtract another 1 because the line ends with \n - let pos = text.line_to_char(line + 1).saturating_sub(2); + let pos = text.line_to_char(line + 1).saturating_sub( + get_line_ending(&text.line(line)) + .map(|le| le.len_chars()) + .unwrap_or(0), + ); + Range::new(pos, pos) }); @@ -764,9 +767,12 @@ fn extend_line_end(cx: &mut Context) { let text = doc.text(); let line = text.char_to_line(range.head); - // Line end is pos at the start of next line - 1 - // subtract another 1 because the line ends with \n - let pos = text.line_to_char(line + 1).saturating_sub(2); + let pos = text.line_to_char(line + 1).saturating_sub( + get_line_ending(&text.line(line)) + .map(|le| le.len_chars()) + .unwrap_or(0), + ); + Range::new(range.anchor, pos) }); @@ -1057,7 +1063,7 @@ fn append_mode(cx: &mut Context) { if selection.iter().any(|range| range.head == end) { let transaction = Transaction::change( doc.text(), - std::array::IntoIter::new([(end, end, Some(Tendril::from_char('\n')))]), + std::array::IntoIter::new([(end, end, Some(doc.line_ending().as_str().into()))]), ); doc.apply(&transaction, view.id); } @@ -1662,16 +1668,16 @@ fn open(cx: &mut Context, open: Open) { let mut ranges = SmallVec::with_capacity(selection.len()); let mut offs = 0; + let line = match open { + // adjust position to the end of the line (next line - 1) + Open::Below => line + 1, + // adjust position to the end of the previous line (current line - 1) + Open::Above => line, + }; + let mut transaction = Transaction::change_by_selection(contents, selection, |range| { let line = text.char_to_line(range.head); - let line = match open { - // adjust position to the end of the line (next line - 1) - Open::Below => line + 1, - // adjust position to the end of the previous line (current line - 1) - Open::Above => line, - }; - // insert newlines after this index for both Above and Below variants let linend_index = doc.text().line_to_char(line).saturating_sub(1); @@ -2299,7 +2305,7 @@ pub mod insert { ); let indent = doc.indent_unit().repeat(indent_level); let mut text = String::with_capacity(1 + indent.len()); - text.push('\n'); + text.push_str(doc.line_ending().as_str()); text.push_str(&indent); let head = pos + offs + text.chars().count(); @@ -2320,7 +2326,7 @@ pub mod insert { if helix_core::auto_pairs::PAIRS.contains(&(prev, curr)) { // another newline, indent the end bracket one level less let indent = doc.indent_unit().repeat(indent_level.saturating_sub(1)); - text.push('\n'); + text.push_str(doc.line_ending().as_str()); text.push_str(&indent); } @@ -2439,7 +2445,9 @@ fn paste_impl( ); // if any of values ends \n it's linewise paste - let linewise = values.iter().any(|value| value.ends_with('\n')); + let linewise = values + .iter() + .any(|value| value.ends_with(doc.line_ending().as_str())); let mut values = values.iter().cloned().map(Tendril::from).chain(repeat); diff --git a/helix-term/src/ui/editor.rs b/helix-term/src/ui/editor.rs index d0eedad6..42bb3ba8 100644 --- a/helix-term/src/ui/editor.rs +++ b/helix-term/src/ui/editor.rs @@ -7,7 +7,7 @@ use crate::{ }; use helix_core::{ - coords_at_pos, + coords_at_pos, rope_slice_to_line_ending, syntax::{self, HighlightEvent}, Position, Range, }; @@ -177,7 +177,7 @@ impl EditorView { // iterate over range char by char for grapheme in RopeGraphemes::new(text) { - if grapheme == "\n" { + if rope_slice_to_line_ending(&grapheme).is_some() { visual_x = 0; line += 1; diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 8875f70d..49d270e4 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -9,10 +9,12 @@ use std::str::FromStr; use std::sync::Arc; use helix_core::{ + auto_detect_line_ending, chars::{char_is_linebreak, char_is_whitespace}, history::History, syntax::{LanguageConfiguration, LOADER}, - ChangeSet, Diagnostic, Rope, Selection, State, Syntax, Transaction, + ChangeSet, Diagnostic, LineEnding, Rope, Selection, State, Syntax, Transaction, + DEFAULT_LINE_ENDING, }; use crate::{DocumentId, ViewId}; @@ -97,6 +99,7 @@ pub struct Document { diagnostics: Vec<Diagnostic>, language_server: Option<Arc<helix_lsp::Client>>, + line_ending: LineEnding, } use std::fmt; @@ -243,6 +246,7 @@ impl Document { history: Cell::new(History::default()), last_saved_revision: 0, language_server: None, + line_ending: DEFAULT_LINE_ENDING, } } @@ -262,10 +266,14 @@ impl Document { doc }; + // search for line endings + let line_ending = auto_detect_line_ending(&doc).unwrap_or(DEFAULT_LINE_ENDING); + let mut doc = Self::new(doc); // set the path and try detecting the language doc.set_path(&path)?; doc.detect_indent_style(); + doc.set_line_ending(line_ending); Ok(doc) } @@ -525,6 +533,10 @@ impl Document { self.selections.insert(view_id, selection); } + pub fn set_line_ending(&mut self, line_ending: LineEnding) { + self.line_ending = line_ending; + } + fn _apply(&mut self, transaction: &Transaction, view_id: ViewId) -> bool { let old_doc = self.text().clone(); @@ -795,6 +807,10 @@ impl Document { pub fn set_diagnostics(&mut self, diagnostics: Vec<Diagnostic>) { self.diagnostics = diagnostics; } + + pub fn line_ending(&self) -> LineEnding { + self.line_ending + } } #[cfg(test)] |