Work on moving code over to LineEnding instead of assuming '\n'.

Also some general cleanup and some minor fixes along the way.
author: Nathan Vegdahl 2021-06-20 22:09:10 +0000
committer: Nathan Vegdahl 2021-06-20 22:33:02 +0000
commit: 4efd6713c5b30b33c497a1f85b77a7b0a7fd17e0 (patch)
tree: 7661f09f2279a3f9ae6a8f76770a69fd08f95981
parent: 5d22e3c4e574eb24260966de7f20f582e6184e24 (diff)
16 files changed, 228 insertions, 190 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 24c277e1..a1de7138 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -331,6 +331,7 @@ dependencies = [
  "bitflags",
  "cassowary",
  "crossterm",
+ "helix-core",
  "serde",
  "unicode-segmentation",
  "unicode-width",
diff --git a/helix-core/src/auto_pairs.rs b/helix-core/src/auto_pairs.rs
index 74e25ac9..746f201a 100644
--- a/helix-core/src/auto_pairs.rs
+++ b/helix-core/src/auto_pairs.rs
@@ -12,7 +12,7 @@ pub const PAIRS: &[(char, char)] = &[
     ('`', '`'),
 ];
 
-const CLOSE_BEFORE: &str = ")]}'\":;> \n"; // includes space and newline
+const CLOSE_BEFORE: &str = ")]}'\":;> \n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}"; // includes space and newlines
 
 // insert hook:
 // Fn(doc, selection, char) => Option<Transaction>
diff --git a/helix-core/src/chars.rs b/helix-core/src/chars.rs
index 243a1374..24133dd3 100644
--- a/helix-core/src/chars.rs
+++ b/helix-core/src/chars.rs
@@ -1,25 +1,44 @@
-/// Determine whether a character is a line break.
-pub fn char_is_linebreak(c: char) -> bool {
-    matches!(
-        c,
-        '\u{000A}' | // LineFeed
-        '\u{000B}' | // VerticalTab
-        '\u{000C}' | // FormFeed
-        '\u{000D}' | // CarriageReturn
-        '\u{0085}' | // NextLine
-        '\u{2028}' | // Line Separator
-        '\u{2029}' // ParagraphSeparator
-    )
+use crate::LineEnding;
+
+#[derive(Debug, Eq, PartialEq)]
+pub enum CharCategory {
+    Whitespace,
+    Eol,
+    Word,
+    Punctuation,
+    Unknown,
+}
+
+#[inline]
+pub fn categorize_char(ch: char) -> CharCategory {
+    if char_is_line_ending(ch) {
+        CharCategory::Eol
+    } else if ch.is_whitespace() {
+        CharCategory::Whitespace
+    } else if char_is_word(ch) {
+        CharCategory::Word
+    } else if char_is_punctuation(ch) {
+        CharCategory::Punctuation
+    } else {
+        CharCategory::Unknown
+    }
+}
+
+/// Determine whether a character is a line ending.
+#[inline]
+pub fn char_is_line_ending(ch: char) -> bool {
+    LineEnding::from_char(ch).is_some()
 }
 
 /// Determine whether a character qualifies as (non-line-break)
 /// whitespace.
-pub fn char_is_whitespace(c: char) -> bool {
+#[inline]
+pub fn char_is_whitespace(ch: char) -> bool {
     // TODO: this is a naive binary categorization of whitespace
     // characters.  For display, word wrapping, etc. we'll need a better
     // categorization based on e.g. breaking vs non-breaking spaces
     // and whether they're zero-width or not.
-    match c {
+    match ch {
         //'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace)
         '\u{0009}' | // Character Tabulation
         '\u{0020}' | // Space
@@ -34,8 +53,81 @@ pub fn char_is_whitespace(c: char) -> bool {
         // En Quad, Em Quad, En Space, Em Space, Three-per-em Space,
         // Four-per-em Space, Six-per-em Space, Figure Space,
         // Punctuation Space, Thin Space, Hair Space, Zero Width Space.
-        c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true,
+        ch if ('\u{2000}' ..= '\u{200B}').contains(&ch) => true,
 
         _ => false,
     }
 }
+
+#[inline]
+pub fn char_is_punctuation(ch: char) -> bool {
+    use unicode_general_category::{get_general_category, GeneralCategory};
+
+    matches!(
+        get_general_category(ch),
+        GeneralCategory::OtherPunctuation
+            | GeneralCategory::OpenPunctuation
+            | GeneralCategory::ClosePunctuation
+            | GeneralCategory::InitialPunctuation
+            | GeneralCategory::FinalPunctuation
+            | GeneralCategory::ConnectorPunctuation
+            | GeneralCategory::DashPunctuation
+            | GeneralCategory::MathSymbol
+            | GeneralCategory::CurrencySymbol
+            | GeneralCategory::ModifierSymbol
+    )
+}
+
+#[inline]
+pub fn char_is_word(ch: char) -> bool {
+    ch.is_alphanumeric() || ch == '_'
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_categorize() {
+        const EOL_TEST_CASE: &'static str = "\n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}";
+        const WORD_TEST_CASE: &'static str =
+            "_hello_world_あいうえおー1234567890１２３４５６７８９０";
+        const PUNCTUATION_TEST_CASE: &'static str =
+            "!\"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~！”＃＄％＆’（）＊＋、。：；＜＝＞？＠「」＾｀｛｜｝～";
+        const WHITESPACE_TEST_CASE: &'static str = "  　   ";
+
+        for ch in EOL_TEST_CASE.chars() {
+            assert_eq!(CharCategory::Eol, categorize_char(ch));
+        }
+
+        for ch in WHITESPACE_TEST_CASE.chars() {
+            assert_eq!(
+                CharCategory::Whitespace,
+                categorize_char(ch),
+                "Testing '{}', but got `{:?}` instead of `Category::Whitespace`",
+                ch,
+                categorize_char(ch)
+            );
+        }
+
+        for ch in WORD_TEST_CASE.chars() {
+            assert_eq!(
+                CharCategory::Word,
+                categorize_char(ch),
+                "Testing '{}', but got `{:?}` instead of `Category::Word`",
+                ch,
+                categorize_char(ch)
+            );
+        }
+
+        for ch in PUNCTUATION_TEST_CASE.chars() {
+            assert_eq!(
+                CharCategory::Punctuation,
+                categorize_char(ch),
+                "Testing '{}', but got `{:?}` instead of `Category::Punctuation`",
+                ch,
+                categorize_char(ch)
+            );
+        }
+    }
+}
diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs
index e00e56be..183b9f0a 100644
--- a/helix-core/src/lib.rs
+++ b/helix-core/src/lib.rs
@@ -113,6 +113,6 @@ pub use diagnostic::Diagnostic;
 pub use state::State;
 
 pub use line_ending::{
-    auto_detect_line_ending, get_line_ending, line_end, LineEnding, DEFAULT_LINE_ENDING,
+    auto_detect_line_ending, get_line_ending, line_end_char_index, LineEnding, DEFAULT_LINE_ENDING,
 };
 pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction};
diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs
index 45e20c88..c4636c63 100644
--- a/helix-core/src/line_ending.rs
+++ b/helix-core/src/line_ending.rs
@@ -1,5 +1,10 @@
 use crate::{Rope, RopeGraphemes, RopeSlice};
 
+#[cfg(target_os = "windows")]
+pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf;
+#[cfg(not(target_os = "windows"))]
+pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF;
+
 /// Represents one of the valid Unicode line endings.
 #[derive(PartialEq, Copy, Clone, Debug)]
 pub enum LineEnding {
@@ -14,6 +19,7 @@ pub enum LineEnding {
 }
 
 impl LineEnding {
+    #[inline]
     pub fn len_chars(&self) -> usize {
         match self {
             Self::Crlf => 2,
@@ -21,6 +27,7 @@ impl LineEnding {
         }
     }
 
+    #[inline]
     pub fn as_str(&self) -> &'static str {
         match self {
             Self::Crlf => "\u{000D}\u{000A}",
@@ -34,6 +41,22 @@ impl LineEnding {
         }
     }
 
+    #[inline]
+    pub fn from_char(ch: char) -> Option<LineEnding> {
+        match ch {
+            '\u{000A}' => Some(LineEnding::LF),
+            '\u{000B}' => Some(LineEnding::VT),
+            '\u{000C}' => Some(LineEnding::FF),
+            '\u{000D}' => Some(LineEnding::CR),
+            '\u{0085}' => Some(LineEnding::Nel),
+            '\u{2028}' => Some(LineEnding::LS),
+            '\u{2029}' => Some(LineEnding::PS),
+            // Not a line ending
+            _ => None,
+        }
+    }
+
+    #[inline]
     pub fn from_str(g: &str) -> Option<LineEnding> {
         match g {
             "\u{000D}\u{000A}" => Some(LineEnding::Crlf),
@@ -49,6 +72,7 @@ impl LineEnding {
         }
     }
 
+    #[inline]
     pub fn from_rope_slice(g: &RopeSlice) -> Option<LineEnding> {
         if let Some(text) = g.as_str() {
             LineEnding::from_str(text)
@@ -62,6 +86,11 @@ impl LineEnding {
     }
 }
 
+#[inline]
+pub fn str_is_line_ending(s: &str) -> bool {
+    LineEnding::from_str(s).is_some()
+}
+
 /// Attempts to detect what line ending the passed document uses.
 pub fn auto_detect_line_ending(doc: &Rope) -> Option<LineEnding> {
     // Return first matched line ending. Not all possible line endings
@@ -96,19 +125,13 @@ pub fn get_line_ending(line: &RopeSlice) -> Option<LineEnding> {
 }
 
 /// Returns the char index of the end of the given line, not including its line ending.
-pub fn line_end(slice: &RopeSlice, line: usize) -> usize {
-    slice.line_to_char(line + 1).saturating_sub(
-        get_line_ending(&slice.line(line))
+pub fn line_end_char_index(slice: &RopeSlice, line: usize) -> usize {
+    slice.line_to_char(line + 1)
+        - get_line_ending(&slice.line(line))
             .map(|le| le.len_chars())
-            .unwrap_or(0),
-    )
+            .unwrap_or(0)
 }
 
-#[cfg(target_os = "windows")]
-pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf;
-#[cfg(not(target_os = "windows"))]
-pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF;
-
 #[cfg(test)]
 mod line_ending_tests {
     use super::*;
@@ -150,11 +173,11 @@ mod line_ending_tests {
     fn test_rope_slice_to_line_ending() {
         let r = Rope::from_str("\r\n");
         assert_eq!(
-            rope_slice_to_line_ending(&r.slice(1..2)),
+            LineEnding::from_rope_slice(&r.slice(1..2)),
             Some(LineEnding::LF)
         );
         assert_eq!(
-            rope_slice_to_line_ending(&r.slice(0..2)),
+            LineEnding::from_rope_slice(&r.slice(0..2)),
             Some(LineEnding::Crlf)
         );
     }
diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs
index 7f47e662..d0023e9f 100644
--- a/helix-core/src/movement.rs
+++ b/helix-core/src/movement.rs
@@ -3,9 +3,13 @@ use std::iter::{self, from_fn, Peekable, SkipWhile};
 use ropey::iter::Chars;
 
 use crate::{
+    chars::{
+        categorize_char, char_is_line_ending, char_is_punctuation, char_is_whitespace,
+        char_is_word, CharCategory,
+    },
     coords_at_pos, get_line_ending,
     graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary},
-    line_end, pos_at_coords, Position, Range, RopeSlice,
+    line_end_char_index, pos_at_coords, Position, Range, RopeSlice,
 };
 
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
@@ -37,9 +41,8 @@ pub fn move_horizontally(
             nth_prev_grapheme_boundary(slice, pos, count).max(start)
         }
         Direction::Forward => {
-            // Line end is pos at the start of next line - 1
-            let end = line_end(&slice, line);
-            nth_next_grapheme_boundary(slice, pos, count).min(end)
+            let end_char_idx = line_end_char_index(&slice, line);
+            nth_next_grapheme_boundary(slice, pos, count).min(end_char_idx)
         }
     };
     let anchor = match behaviour {
@@ -68,8 +71,11 @@ pub fn move_vertically(
         ),
     };
 
-    // convert to 0-indexed, subtract another 1 because len_chars() counts \n
-    let new_line_len = slice.line(new_line).len_chars().saturating_sub(2);
+    // Length of the line sans line-ending.
+    let new_line_len = {
+        let line = slice.line(new_line);
+        line.len_chars() - get_line_ending(&line).map(|le| le.len_chars()).unwrap_or(0)
+    };
 
     let new_col = std::cmp::min(horiz as usize, new_line_len);
 
@@ -104,64 +110,6 @@ fn word_move(slice: RopeSlice, mut range: Range, count: usize, target: WordMotio
 }
 
 // ---- util ------------
-#[inline]
-pub(crate) fn is_word(ch: char) -> bool {
-    ch.is_alphanumeric() || ch == '_'
-}
-
-#[inline]
-pub(crate) fn is_end_of_line(ch: char) -> bool {
-    ch == '\n'
-}
-
-#[inline]
-// Whitespace, but not end of line
-pub(crate) fn is_strict_whitespace(ch: char) -> bool {
-    ch.is_whitespace() && !is_end_of_line(ch)
-}
-
-#[inline]
-pub(crate) fn is_punctuation(ch: char) -> bool {
-    use unicode_general_category::{get_general_category, GeneralCategory};
-
-    matches!(
-        get_general_category(ch),
-        GeneralCategory::OtherPunctuation
-            | GeneralCategory::OpenPunctuation
-            | GeneralCategory::ClosePunctuation
-            | GeneralCategory::InitialPunctuation
-            | GeneralCategory::FinalPunctuation
-            | GeneralCategory::ConnectorPunctuation
-            | GeneralCategory::DashPunctuation
-            | GeneralCategory::MathSymbol
-            | GeneralCategory::CurrencySymbol
-            | GeneralCategory::ModifierSymbol
-    )
-}
-
-#[derive(Debug, Eq, PartialEq)]
-pub enum Category {
-    Whitespace,
-    Eol,
-    Word,
-    Punctuation,
-    Unknown,
-}
-
-#[inline]
-pub(crate) fn categorize(ch: char) -> Category {
-    if is_end_of_line(ch) {
-        Category::Eol
-    } else if ch.is_whitespace() {
-        Category::Whitespace
-    } else if is_word(ch) {
-        Category::Word
-    } else if is_punctuation(ch) {
-        Category::Punctuation
-    } else {
-        Category::Unknown
-    }
-}
 
 #[inline]
 /// Returns first index that doesn't satisfy a given predicate when
@@ -235,7 +183,8 @@ impl CharHelpers for Chars<'_> {
         let mut phase = WordMotionPhase::Start;
         let mut head = origin.head;
         let mut anchor: Option<usize> = None;
-        let is_boundary = |a: char, b: Option<char>| categorize(a) != categorize(b.unwrap_or(a));
+        let is_boundary =
+            |a: char, b: Option<char>| categorize_char(a) != categorize_char(b.unwrap_or(a));
         while let Some(peek) = characters.peek().copied() {
             phase = match phase {
                 WordMotionPhase::Start => {
@@ -244,7 +193,8 @@ impl CharHelpers for Chars<'_> {
                         break; // We're at the end, so there's nothing to do.
                     }
                     // Anchor may remain here if the head wasn't at a boundary
-                    if !is_boundary(peek, characters.peek().copied()) && !is_end_of_line(peek) {
+                    if !is_boundary(peek, characters.peek().copied()) && !char_is_line_ending(peek)
+                    {
                         anchor = Some(head);
                     }
                     // First character is always skipped by the head
@@ -252,7 +202,7 @@ impl CharHelpers for Chars<'_> {
                     WordMotionPhase::SkipNewlines
                 }
                 WordMotionPhase::SkipNewlines => {
-                    if is_end_of_line(peek) {
+                    if char_is_line_ending(peek) {
                         characters.next();
                         if characters.peek().is_some() {
                             advance(&mut head);
@@ -286,12 +236,12 @@ fn reached_target(target: WordMotionTarget, peek: char, next_peek: Option<&char>
 
     match target {
         WordMotionTarget::NextWordStart => {
-            ((categorize(peek) != categorize(*next_peek))
-                && (is_end_of_line(*next_peek) || !next_peek.is_whitespace()))
+            ((categorize_char(peek) != categorize_char(*next_peek))
+                && (char_is_line_ending(*next_peek) || !next_peek.is_whitespace()))
         }
         WordMotionTarget::NextWordEnd | WordMotionTarget::PrevWordStart => {
-            ((categorize(peek) != categorize(*next_peek))
-                && (!peek.is_whitespace() || is_end_of_line(*next_peek)))
+            ((categorize_char(peek) != categorize_char(*next_peek))
+                && (!peek.is_whitespace() || char_is_line_ending(*next_peek)))
         }
     }
 }
@@ -330,7 +280,7 @@ mod test {
                 slice,
                 move_vertically(slice, range, Direction::Forward, 1, Movement::Move).head
             ),
-            (1, 2).into()
+            (1, 3).into()
         );
     }
 
@@ -343,12 +293,12 @@ mod test {
         let mut range = Range::point(position);
 
         let moves_and_expected_coordinates = [
-            ((Direction::Forward, 1usize), (0, 1)),
-            ((Direction::Forward, 2usize), (0, 3)),
-            ((Direction::Forward, 0usize), (0, 3)),
-            ((Direction::Forward, 999usize), (0, 31)),
-            ((Direction::Forward, 999usize), (0, 31)),
-            ((Direction::Backward, 999usize), (0, 0)),
+            ((Direction::Forward, 1usize), (0, 1)), // T|his is a simple alphabetic line
+            ((Direction::Forward, 2usize), (0, 3)), // Thi|s is a simple alphabetic line
+            ((Direction::Forward, 0usize), (0, 3)), // Thi|s is a simple alphabetic line
+            ((Direction::Forward, 999usize), (0, 32)), // This is a simple alphabetic line|
+            ((Direction::Forward, 999usize), (0, 32)), // This is a simple alphabetic line|
+            ((Direction::Backward, 999usize), (0, 0)), // |This is a simple alphabetic line
         ];
 
         for ((direction, amount), coordinates) in IntoIter::new(moves_and_expected_coordinates) {
@@ -366,15 +316,15 @@ mod test {
         let mut range = Range::point(position);
 
         let moves_and_expected_coordinates = IntoIter::new([
-            ((Direction::Forward, 1usize), (0, 1)),    // M_ltiline
-            ((Direction::Forward, 2usize), (0, 3)),    // Mul_iline
-            ((Direction::Backward, 6usize), (0, 0)),   // _ultiline
-            ((Direction::Backward, 999usize), (0, 0)), // _ultiline
-            ((Direction::Forward, 3usize), (0, 3)),    // Mul_iline
-            ((Direction::Forward, 0usize), (0, 3)),    // Mul_iline
-            ((Direction::Backward, 0usize), (0, 3)),   // Mul_iline
-            ((Direction::Forward, 999usize), (0, 9)),  // Multilin_
-            ((Direction::Forward, 999usize), (0, 9)),  // Multilin_
+            ((Direction::Forward, 1usize), (0, 1)),    // M|ultiline\n
+            ((Direction::Forward, 2usize), (0, 3)),    // Mul|tiline\n
+            ((Direction::Backward, 6usize), (0, 0)),   // |Multiline\n
+            ((Direction::Backward, 999usize), (0, 0)), // |Multiline\n
+            ((Direction::Forward, 3usize), (0, 3)),    // Mul|tiline\n
+            ((Direction::Forward, 0usize), (0, 3)),    // Mul|tiline\n
+            ((Direction::Backward, 0usize), (0, 3)),   // Mul|tiline\n
+            ((Direction::Forward, 999usize), (0, 9)),  // Multiline|\n
+            ((Direction::Forward, 999usize), (0, 9)),  // Multiline|\n
         ]);
 
         for ((direction, amount), coordinates) in moves_and_expected_coordinates {
@@ -446,7 +396,7 @@ mod test {
             // First descent preserves column as the target line is wider
             ((Axis::V, Direction::Forward, 1usize), (1, 8)),
             // Second descent clamps column as the target line is shorter
-            ((Axis::V, Direction::Forward, 1usize), (2, 4)),
+            ((Axis::V, Direction::Forward, 1usize), (2, 5)),
             // Third descent restores the original column
             ((Axis::V, Direction::Forward, 1usize), (3, 8)),
             // Behaviour is preserved even through long jumps
@@ -760,45 +710,4 @@ mod test {
             }
         }
     }
-
-    #[test]
-    fn test_categorize() {
-        const WORD_TEST_CASE: &'static str =
-            "_hello_world_あいうえおー1234567890１２３４５６７８９０";
-        const PUNCTUATION_TEST_CASE: &'static str =
-            "!\"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~！”＃＄％＆’（）＊＋、。：；＜＝＞？＠「」＾｀｛｜｝～";
-        const WHITESPACE_TEST_CASE: &'static str = "  　   ";
-
-        assert_eq!(Category::Eol, categorize('\n'));
-
-        for ch in WHITESPACE_TEST_CASE.chars() {
-            assert_eq!(
-                Category::Whitespace,
-                categorize(ch),
-                "Testing '{}', but got `{:?}` instead of `Category::Whitespace`",
-                ch,
-                categorize(ch)
-            );
-        }
-
-        for ch in WORD_TEST_CASE.chars() {
-            assert_eq!(
-                Category::Word,
-                categorize(ch),
-                "Testing '{}', but got `{:?}` instead of `Category::Word`",
-                ch,
-                categorize(ch)
-            );
-        }
-
-        for ch in PUNCTUATION_TEST_CASE.chars() {
-            assert_eq!(
-                Category::Punctuation,
-                categorize(ch),
-                "Testing '{}', but got `{:?}` instead of `Category::Punctuation`",
-                ch,
-                categorize(ch)
-            );
-        }
-    }
 }
diff --git a/helix-core/src/position.rs b/helix-core/src/position.rs
index 3d85ff2f..392eee9c 100644
--- a/helix-core/src/position.rs
+++ b/helix-core/src/position.rs
@@ -1,4 +1,5 @@
 use crate::{
+    chars::char_is_line_ending,
     graphemes::{nth_next_grapheme_boundary, RopeGraphemes},
     Rope, RopeSlice,
 };
@@ -23,8 +24,9 @@ impl Position {
     pub fn traverse(self, text: &crate::Tendril) -> Self {
         let Self { mut row, mut col } = self;
         // TODO: there should be a better way here
-        for ch in text.chars() {
-            if ch == '\n' {
+        let mut chars = text.chars().peekable();
+        while let Some(ch) = chars.next() {
+            if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
                 row += 1;
                 col = 0;
             } else {
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index ae058eb1..92e52d73 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -1,4 +1,4 @@
-use crate::{regex::Regex, Change, Rope, RopeSlice, Transaction};
+use crate::{chars::char_is_line_ending, regex::Regex, Change, Rope, RopeSlice, Transaction};
 pub use helix_syntax::{get_language, get_language_name, Lang};
 
 use std::{
@@ -579,9 +579,10 @@ impl LanguageLayer {
                 mut column,
             } = point;
 
-            // TODO: there should be a better way here
-            for ch in text.bytes() {
-                if ch == b'\n' {
+            // TODO: there should be a better way here.
+            let mut chars = text.chars().peekable();
+            while let Some(ch) = chars.next() {
+                if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
                     row += 1;
                     column = 0;
                 } else {
diff --git a/helix-lsp/src/client.rs b/helix-lsp/src/client.rs
index 101d2f9b..7f136fe8 100644
--- a/helix-lsp/src/client.rs
+++ b/helix-lsp/src/client.rs
@@ -3,7 +3,7 @@ use crate::{
     Call, Error, OffsetEncoding, Result,
 };
 
-use helix_core::{find_root, ChangeSet, Rope};
+use helix_core::{chars::char_is_line_ending, find_root, ChangeSet, Rope};
 use jsonrpc_core as jsonrpc;
 use lsp_types as lsp;
 use serde_json::Value;
@@ -337,8 +337,9 @@ impl Client {
                 mut character,
             } = pos;
 
-            for ch in text.chars() {
-                if ch == '\n' {
+            let mut chars = text.chars().peekable();
+            while let Some(ch) = chars.next() {
+                if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
                     line += 1;
                     character = 0;
                 } else {
diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs
index 8124c17a..b006504b 100644
--- a/helix-term/src/commands.rs
+++ b/helix-term/src/commands.rs
@@ -1,6 +1,6 @@
 use helix_core::{
     comment, coords_at_pos, find_first_non_whitespace_char, find_root, get_line_ending, graphemes,
-    indent, line_end, match_brackets,
+    indent, line_end_char_index, match_brackets,
     movement::{self, Direction},
     object, pos_at_coords,
     regex::{self, Regex},
@@ -342,7 +342,7 @@ fn move_line_end(cx: &mut Context) {
         let text = doc.text();
         let line = text.char_to_line(range.head);
 
-        let pos = line_end(&text.slice(..), line);
+        let pos = line_end_char_index(&text.slice(..), line);
 
         Range::new(pos, pos)
     });
@@ -490,6 +490,8 @@ where
     let count = cx.count();
 
     // need to wait for next key
+    // TODO: should this be done by grapheme rather than char?  For example,
+    // we can't properly handle the line-ending case here in terms of char.
     cx.on_next_key(move |cx, event| {
         let ch = match event {
             KeyEvent {
@@ -623,7 +625,7 @@ fn replace(cx: &mut Context) {
             KeyEvent {
                 code: KeyCode::Enter,
                 ..
-            } => Some('\n'), // TODO: replace this with DEFAULT_LINE_ENDING
+            } => Some('\n'), // TODO: use the document's default line ending.
             _ => None,
         };
 
@@ -763,7 +765,7 @@ fn extend_line_end(cx: &mut Context) {
         let text = doc.text();
         let line = text.char_to_line(range.head);
 
-        let pos = line_end(&text.slice(..), line);
+        let pos = line_end_char_index(&text.slice(..), line);
 
         Range::new(range.anchor, pos)
     });
@@ -1642,7 +1644,7 @@ fn append_to_line(cx: &mut Context) {
     let selection = doc.selection(view.id).transform(|range| {
         let text = doc.text();
         let line = text.char_to_line(range.head);
-        let pos = line_end(&text.slice(..), line);
+        let pos = line_end_char_index(&text.slice(..), line);
         Range::new(pos, pos)
     });
     doc.set_selection(view.id, selection);
diff --git a/helix-term/src/ui/markdown.rs b/helix-term/src/ui/markdown.rs
index be113747..3ce3a5b8 100644
--- a/helix-term/src/ui/markdown.rs
+++ b/helix-term/src/ui/markdown.rs
@@ -110,6 +110,8 @@ fn parse<'a>(contents: &'a str, theme: Option<&Theme>) -> tui::text::Text<'a> {
                                         // TODO: replace tabs with indentation
 
                                         let mut slice = &text[start..end];
+                                        // TODO: do we need to handle all unicode line endings
+                                        // here, or is just '\n' okay?
                                         while let Some(end) = slice.find('\n') {
                                             // emit span up to newline
                                             let text = &slice[..end];
diff --git a/helix-tui/Cargo.toml b/helix-tui/Cargo.toml
index 89fa755d..30e2374d 100644
--- a/helix-tui/Cargo.toml
+++ b/helix-tui/Cargo.toml
@@ -22,3 +22,4 @@ unicode-segmentation = "1.2"
 unicode-width = "0.1"
 crossterm = { version = "0.20", optional = true }
 serde = { version = "1", "optional" = true, features = ["derive"]}
+helix-core = { version = "0.2", path = "../helix-core" }
diff --git a/helix-tui/src/text.rs b/helix-tui/src/text.rs
index c671e918..b23bfd81 100644
--- a/helix-tui/src/text.rs
+++ b/helix-tui/src/text.rs
@@ -47,6 +47,7 @@
 //! ]);
 //! ```
 use crate::style::Style;
+use helix_core::line_ending::str_is_line_ending;
 use std::borrow::Cow;
 use unicode_segmentation::UnicodeSegmentation;
 use unicode_width::UnicodeWidthStr;
@@ -177,7 +178,7 @@ impl<'a> Span<'a> {
                 symbol: g,
                 style: base_style.patch(self.style),
             })
-            .filter(|s| s.symbol != "\n")
+            .filter(|s| !str_is_line_ending(s.symbol))
     }
 }
 
diff --git a/helix-tui/src/widgets/reflow.rs b/helix-tui/src/widgets/reflow.rs
index 94ff7330..ae561a4f 100644
--- a/helix-tui/src/widgets/reflow.rs
+++ b/helix-tui/src/widgets/reflow.rs
@@ -1,4 +1,5 @@
 use crate::text::StyledGrapheme;
+use helix_core::line_ending::str_is_line_ending;
 use unicode_segmentation::UnicodeSegmentation;
 use unicode_width::UnicodeWidthStr;
 
@@ -62,13 +63,13 @@ impl<'a, 'b> LineComposer<'a> for WordWrapper<'a, 'b> {
             // Ignore characters wider that the total max width.
             if symbol.width() as u16 > self.max_line_width
                 // Skip leading whitespace when trim is enabled.
-                || self.trim && symbol_whitespace && symbol != "\n" && current_line_width == 0
+                || self.trim && symbol_whitespace && !str_is_line_ending(symbol) && current_line_width == 0
             {
                 continue;
             }
 
             // Break on newline and discard it.
-            if symbol == "\n" {
+            if str_is_line_ending(symbol) {
                 if prev_whitespace {
                     current_line_width = width_to_last_word_end;
                     self.current_line.truncate(symbols_to_last_word_end);
@@ -170,7 +171,7 @@ impl<'a, 'b> LineComposer<'a> for LineTruncator<'a, 'b> {
             }
 
             // Break on newline and discard it.
-            if symbol == "\n" {
+            if str_is_line_ending(symbol) {
                 break;
             }
 
@@ -199,7 +200,7 @@ impl<'a, 'b> LineComposer<'a> for LineTruncator<'a, 'b> {
 
         if skip_rest {
             for StyledGrapheme { symbol, .. } in &mut self.symbols {
-                if symbol == "\n" {
+                if str_is_line_ending(symbol) {
                     break;
                 }
             }
diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs
index 80be1ed2..3e38c24d 100644
--- a/helix-view/src/document.rs
+++ b/helix-view/src/document.rs
@@ -10,7 +10,7 @@ use std::sync::Arc;
 
 use helix_core::{
     auto_detect_line_ending,
-    chars::{char_is_linebreak, char_is_whitespace},
+    chars::{char_is_line_ending, char_is_whitespace},
     history::History,
     syntax::{LanguageConfiguration, LOADER},
     ChangeSet, Diagnostic, LineEnding, Rope, Selection, State, Syntax, Transaction,
@@ -81,6 +81,9 @@ pub struct Document {
     /// Current indent style.
     pub indent_style: IndentStyle,
 
+    /// The document's default line ending.
+    pub line_ending: LineEnding,
+
     syntax: Option<Syntax>,
     // /// Corresponding language scope name. Usually `source.<lang>`.
     pub(crate) language: Option<Arc<LanguageConfiguration>>,
@@ -99,7 +102,6 @@ pub struct Document {
 
     diagnostics: Vec<Diagnostic>,
     language_server: Option<Arc<helix_lsp::Client>>,
-    line_ending: LineEnding,
 }
 
 use std::fmt;
@@ -254,21 +256,21 @@ impl Document {
     pub fn load(path: PathBuf) -> Result<Self, Error> {
         use std::{fs::File, io::BufReader};
 
-        let doc = if !path.exists() {
+        let mut doc = if !path.exists() {
             Rope::from(DEFAULT_LINE_ENDING.as_str())
         } else {
             let file = File::open(&path).context(format!("unable to open {:?}", path))?;
-            let mut doc = Rope::from_reader(BufReader::new(file))?;
-            // add missing newline at the end of file
-            if doc.len_bytes() == 0 || doc.byte(doc.len_bytes() - 1) != b'\n' {
-                doc.insert_char(doc.len_chars(), '\n');
-            }
-            doc
+            Rope::from_reader(BufReader::new(file))?
         };
 
         // search for line endings
         let line_ending = auto_detect_line_ending(&doc).unwrap_or(DEFAULT_LINE_ENDING);
 
+        // add missing newline at the end of file
+        if doc.len_bytes() == 0 || char_is_line_ending(doc.char(doc.len_chars() - 1)) {
+            doc.insert(doc.len_chars(), line_ending.as_str());
+        }
+
         let mut doc = Self::new(doc);
         // set the path and try detecting the language
         doc.set_path(&path)?;
@@ -379,7 +381,7 @@ impl Document {
                     Some(' ') => false,
 
                     // Ignore blank lines.
-                    Some(c) if char_is_linebreak(c) => continue,
+                    Some(c) if char_is_line_ending(c) => continue,
 
                     _ => {
                         prev_line_is_tabs = false;
@@ -403,7 +405,7 @@ impl Document {
                         c if char_is_whitespace(c) => count_is_done = true,
 
                         // Ignore blank lines.
-                        c if char_is_linebreak(c) => continue 'outer,
+                        c if char_is_line_ending(c) => continue 'outer,
 
                         _ => break,
                     }
diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs
index db8ae87a..fb2eb36d 100644
--- a/helix-view/src/editor.rs
+++ b/helix-view/src/editor.rs
@@ -12,7 +12,7 @@ use anyhow::Error;
 
 pub use helix_core::diagnostic::Severity;
 pub use helix_core::register::Registers;
-use helix_core::Position;
+use helix_core::{Position, DEFAULT_LINE_ENDING};
 
 #[derive(Debug)]
 pub struct Editor {
@@ -150,7 +150,7 @@ impl Editor {
 
     pub fn new_file(&mut self, action: Action) -> DocumentId {
         use helix_core::Rope;
-        let doc = Document::new(Rope::from("\n"));
+        let doc = Document::new(Rope::from(DEFAULT_LINE_ENDING.as_str()));
         let id = self.documents.insert(doc);
         self.documents[id].id = id;
         self.switch(id, action);
author	Nathan Vegdahl	2021-06-20 22:09:10 +0000
committer	Nathan Vegdahl	2021-06-20 22:33:02 +0000
commit	4efd6713c5b30b33c497a1f85b77a7b0a7fd17e0 (patch)
tree	7661f09f2279a3f9ae6a8f76770a69fd08f95981
parent	5d22e3c4e574eb24260966de7f20f582e6184e24 (diff)