aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock1
-rw-r--r--helix-core/src/auto_pairs.rs2
-rw-r--r--helix-core/src/chars.rs122
-rw-r--r--helix-core/src/lib.rs5
-rw-r--r--helix-core/src/line_ending.rs184
-rw-r--r--helix-core/src/movement.rs169
-rw-r--r--helix-core/src/position.rs6
-rw-r--r--helix-core/src/syntax.rs9
-rw-r--r--helix-lsp/src/client.rs7
-rw-r--r--helix-term/src/commands.rs43
-rw-r--r--helix-term/src/ui/editor.rs4
-rw-r--r--helix-term/src/ui/markdown.rs2
-rw-r--r--helix-tui/Cargo.toml1
-rw-r--r--helix-tui/src/text.rs3
-rw-r--r--helix-tui/src/widgets/reflow.rs9
-rw-r--r--helix-view/src/document.rs42
-rw-r--r--helix-view/src/editor.rs5
17 files changed, 419 insertions, 195 deletions
diff --git a/Cargo.lock b/Cargo.lock
index f360117b..e90f5482 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -344,6 +344,7 @@ dependencies = [
"bitflags",
"cassowary",
"crossterm",
+ "helix-core",
"serde",
"unicode-segmentation",
"unicode-width",
diff --git a/helix-core/src/auto_pairs.rs b/helix-core/src/auto_pairs.rs
index 74e25ac9..746f201a 100644
--- a/helix-core/src/auto_pairs.rs
+++ b/helix-core/src/auto_pairs.rs
@@ -12,7 +12,7 @@ pub const PAIRS: &[(char, char)] = &[
('`', '`'),
];
-const CLOSE_BEFORE: &str = ")]}'\":;> \n"; // includes space and newline
+const CLOSE_BEFORE: &str = ")]}'\":;> \n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}"; // includes space and newlines
// insert hook:
// Fn(doc, selection, char) => Option<Transaction>
diff --git a/helix-core/src/chars.rs b/helix-core/src/chars.rs
index 243a1374..24133dd3 100644
--- a/helix-core/src/chars.rs
+++ b/helix-core/src/chars.rs
@@ -1,25 +1,44 @@
-/// Determine whether a character is a line break.
-pub fn char_is_linebreak(c: char) -> bool {
- matches!(
- c,
- '\u{000A}' | // LineFeed
- '\u{000B}' | // VerticalTab
- '\u{000C}' | // FormFeed
- '\u{000D}' | // CarriageReturn
- '\u{0085}' | // NextLine
- '\u{2028}' | // Line Separator
- '\u{2029}' // ParagraphSeparator
- )
+use crate::LineEnding;
+
+#[derive(Debug, Eq, PartialEq)]
+pub enum CharCategory {
+ Whitespace,
+ Eol,
+ Word,
+ Punctuation,
+ Unknown,
+}
+
+#[inline]
+pub fn categorize_char(ch: char) -> CharCategory {
+ if char_is_line_ending(ch) {
+ CharCategory::Eol
+ } else if ch.is_whitespace() {
+ CharCategory::Whitespace
+ } else if char_is_word(ch) {
+ CharCategory::Word
+ } else if char_is_punctuation(ch) {
+ CharCategory::Punctuation
+ } else {
+ CharCategory::Unknown
+ }
+}
+
+/// Determine whether a character is a line ending.
+#[inline]
+pub fn char_is_line_ending(ch: char) -> bool {
+ LineEnding::from_char(ch).is_some()
}
/// Determine whether a character qualifies as (non-line-break)
/// whitespace.
-pub fn char_is_whitespace(c: char) -> bool {
+#[inline]
+pub fn char_is_whitespace(ch: char) -> bool {
// TODO: this is a naive binary categorization of whitespace
// characters. For display, word wrapping, etc. we'll need a better
// categorization based on e.g. breaking vs non-breaking spaces
// and whether they're zero-width or not.
- match c {
+ match ch {
//'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace)
'\u{0009}' | // Character Tabulation
'\u{0020}' | // Space
@@ -34,8 +53,81 @@ pub fn char_is_whitespace(c: char) -> bool {
// En Quad, Em Quad, En Space, Em Space, Three-per-em Space,
// Four-per-em Space, Six-per-em Space, Figure Space,
// Punctuation Space, Thin Space, Hair Space, Zero Width Space.
- c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true,
+ ch if ('\u{2000}' ..= '\u{200B}').contains(&ch) => true,
_ => false,
}
}
+
+#[inline]
+pub fn char_is_punctuation(ch: char) -> bool {
+ use unicode_general_category::{get_general_category, GeneralCategory};
+
+ matches!(
+ get_general_category(ch),
+ GeneralCategory::OtherPunctuation
+ | GeneralCategory::OpenPunctuation
+ | GeneralCategory::ClosePunctuation
+ | GeneralCategory::InitialPunctuation
+ | GeneralCategory::FinalPunctuation
+ | GeneralCategory::ConnectorPunctuation
+ | GeneralCategory::DashPunctuation
+ | GeneralCategory::MathSymbol
+ | GeneralCategory::CurrencySymbol
+ | GeneralCategory::ModifierSymbol
+ )
+}
+
+#[inline]
+pub fn char_is_word(ch: char) -> bool {
+ ch.is_alphanumeric() || ch == '_'
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn test_categorize() {
+ const EOL_TEST_CASE: &'static str = "\n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}";
+ const WORD_TEST_CASE: &'static str =
+ "_hello_world_あいうえおー12345678901234567890";
+ const PUNCTUATION_TEST_CASE: &'static str =
+ "!\"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~!”#$%&’()*+、。:;<=>?@「」^`{|}~";
+ const WHITESPACE_TEST_CASE: &'static str = "      ";
+
+ for ch in EOL_TEST_CASE.chars() {
+ assert_eq!(CharCategory::Eol, categorize_char(ch));
+ }
+
+ for ch in WHITESPACE_TEST_CASE.chars() {
+ assert_eq!(
+ CharCategory::Whitespace,
+ categorize_char(ch),
+ "Testing '{}', but got `{:?}` instead of `Category::Whitespace`",
+ ch,
+ categorize_char(ch)
+ );
+ }
+
+ for ch in WORD_TEST_CASE.chars() {
+ assert_eq!(
+ CharCategory::Word,
+ categorize_char(ch),
+ "Testing '{}', but got `{:?}` instead of `Category::Word`",
+ ch,
+ categorize_char(ch)
+ );
+ }
+
+ for ch in PUNCTUATION_TEST_CASE.chars() {
+ assert_eq!(
+ CharCategory::Punctuation,
+ categorize_char(ch),
+ "Testing '{}', but got `{:?}` instead of `Category::Punctuation`",
+ ch,
+ categorize_char(ch)
+ );
+ }
+ }
+}
diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs
index 4a9ac891..69294688 100644
--- a/helix-core/src/lib.rs
+++ b/helix-core/src/lib.rs
@@ -6,6 +6,7 @@ pub mod diagnostic;
pub mod graphemes;
pub mod history;
pub mod indent;
+pub mod line_ending;
pub mod macros;
pub mod match_brackets;
pub mod movement;
@@ -106,6 +107,7 @@ pub use tendril::StrTendril as Tendril;
#[doc(inline)]
pub use {regex, tree_sitter};
+pub use graphemes::RopeGraphemes;
pub use position::{coords_at_pos, pos_at_coords, Position};
pub use selection::{Range, Selection};
pub use smallvec::SmallVec;
@@ -114,4 +116,7 @@ pub use syntax::Syntax;
pub use diagnostic::Diagnostic;
pub use state::State;
+pub use line_ending::{
+ auto_detect_line_ending, get_line_ending, line_end_char_index, LineEnding, DEFAULT_LINE_ENDING,
+};
pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction};
diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs
new file mode 100644
index 00000000..c4636c63
--- /dev/null
+++ b/helix-core/src/line_ending.rs
@@ -0,0 +1,184 @@
+use crate::{Rope, RopeGraphemes, RopeSlice};
+
+#[cfg(target_os = "windows")]
+pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf;
+#[cfg(not(target_os = "windows"))]
+pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF;
+
+/// Represents one of the valid Unicode line endings.
+#[derive(PartialEq, Copy, Clone, Debug)]
+pub enum LineEnding {
+ Crlf, // CarriageReturn followed by LineFeed
+ LF, // U+000A -- LineFeed
+ VT, // U+000B -- VerticalTab
+ FF, // U+000C -- FormFeed
+ CR, // U+000D -- CarriageReturn
+ Nel, // U+0085 -- NextLine
+ LS, // U+2028 -- Line Separator
+ PS, // U+2029 -- ParagraphSeparator
+}
+
+impl LineEnding {
+ #[inline]
+ pub fn len_chars(&self) -> usize {
+ match self {
+ Self::Crlf => 2,
+ _ => 1,
+ }
+ }
+
+ #[inline]
+ pub fn as_str(&self) -> &'static str {
+ match self {
+ Self::Crlf => "\u{000D}\u{000A}",
+ Self::LF => "\u{000A}",
+ Self::VT => "\u{000B}",
+ Self::FF => "\u{000C}",
+ Self::CR => "\u{000D}",
+ Self::Nel => "\u{0085}",
+ Self::LS => "\u{2028}",
+ Self::PS => "\u{2029}",
+ }
+ }
+
+ #[inline]
+ pub fn from_char(ch: char) -> Option<LineEnding> {
+ match ch {
+ '\u{000A}' => Some(LineEnding::LF),
+ '\u{000B}' => Some(LineEnding::VT),
+ '\u{000C}' => Some(LineEnding::FF),
+ '\u{000D}' => Some(LineEnding::CR),
+ '\u{0085}' => Some(LineEnding::Nel),
+ '\u{2028}' => Some(LineEnding::LS),
+ '\u{2029}' => Some(LineEnding::PS),
+ // Not a line ending
+ _ => None,
+ }
+ }
+
+ #[inline]
+ pub fn from_str(g: &str) -> Option<LineEnding> {
+ match g {
+ "\u{000D}\u{000A}" => Some(LineEnding::Crlf),
+ "\u{000A}" => Some(LineEnding::LF),
+ "\u{000B}" => Some(LineEnding::VT),
+ "\u{000C}" => Some(LineEnding::FF),
+ "\u{000D}" => Some(LineEnding::CR),
+ "\u{0085}" => Some(LineEnding::Nel),
+ "\u{2028}" => Some(LineEnding::LS),
+ "\u{2029}" => Some(LineEnding::PS),
+ // Not a line ending
+ _ => None,
+ }
+ }
+
+ #[inline]
+ pub fn from_rope_slice(g: &RopeSlice) -> Option<LineEnding> {
+ if let Some(text) = g.as_str() {
+ LineEnding::from_str(text)
+ } else {
+ // Non-contiguous, so it can't be a line ending.
+ // Specifically, Ropey guarantees that CRLF is always
+ // contiguous. And the remaining line endings are all
+ // single `char`s, and therefore trivially contiguous.
+ None
+ }
+ }
+}
+
+#[inline]
+pub fn str_is_line_ending(s: &str) -> bool {
+ LineEnding::from_str(s).is_some()
+}
+
+/// Attempts to detect what line ending the passed document uses.
+pub fn auto_detect_line_ending(doc: &Rope) -> Option<LineEnding> {
+ // Return first matched line ending. Not all possible line endings
+ // are being matched, as they might be special-use only
+ for line in doc.lines().take(100) {
+ match get_line_ending(&line) {
+ None | Some(LineEnding::VT) | Some(LineEnding::FF) | Some(LineEnding::PS) => {}
+ ending => return ending,
+ }
+ }
+ None
+}
+
+/// Returns the passed line's line ending, if any.
+pub fn get_line_ending(line: &RopeSlice) -> Option<LineEnding> {
+ // Last character as str.
+ let g1 = line
+ .slice(line.len_chars().saturating_sub(1)..)
+ .as_str()
+ .unwrap();
+
+ // Last two characters as str, or empty str if they're not contiguous.
+ // It's fine to punt on the non-contiguous case, because Ropey guarantees
+ // that CRLF is always contiguous.
+ let g2 = line
+ .slice(line.len_chars().saturating_sub(2)..)
+ .as_str()
+ .unwrap_or("");
+
+ // First check the two-character case for CRLF, then check the single-character case.
+ LineEnding::from_str(g2).or_else(|| LineEnding::from_str(g1))
+}
+
+/// Returns the char index of the end of the given line, not including its line ending.
+pub fn line_end_char_index(slice: &RopeSlice, line: usize) -> usize {
+ slice.line_to_char(line + 1)
+ - get_line_ending(&slice.line(line))
+ .map(|le| le.len_chars())
+ .unwrap_or(0)
+}
+
+#[cfg(test)]
+mod line_ending_tests {
+ use super::*;
+
+ #[test]
+ fn test_autodetect() {
+ assert_eq!(
+ auto_detect_line_ending(&Rope::from_str("\n")),
+ Some(LineEnding::LF)
+ );
+ assert_eq!(
+ auto_detect_line_ending(&Rope::from_str("\r\n")),
+ Some(LineEnding::Crlf)
+ );
+ assert_eq!(auto_detect_line_ending(&Rope::from_str("hello")), None);
+ assert_eq!(auto_detect_line_ending(&Rope::from_str("")), None);
+ assert_eq!(
+ auto_detect_line_ending(&Rope::from_str("hello\nhelix\r\n")),
+ Some(LineEnding::LF)
+ );
+ assert_eq!(
+ auto_detect_line_ending(&Rope::from_str("a formfeed\u{000C}")),
+ None
+ );
+ assert_eq!(
+ auto_detect_line_ending(&Rope::from_str("\n\u{000A}\n \u{000A}")),
+ Some(LineEnding::LF)
+ );
+ assert_eq!(
+ auto_detect_line_ending(&Rope::from_str(
+ "a formfeed\u{000C} with a\u{000C} linefeed\u{000A}"
+ )),
+ Some(LineEnding::LF)
+ );
+ assert_eq!(auto_detect_line_ending(&Rope::from_str("a formfeed\u{000C} with a\u{000C} carriage return linefeed\u{000D}\u{000A} and a linefeed\u{000A}")), Some(LineEnding::Crlf));
+ }
+
+ #[test]
+ fn test_rope_slice_to_line_ending() {
+ let r = Rope::from_str("\r\n");
+ assert_eq!(
+ LineEnding::from_rope_slice(&r.slice(1..2)),
+ Some(LineEnding::LF)
+ );
+ assert_eq!(
+ LineEnding::from_rope_slice(&r.slice(0..2)),
+ Some(LineEnding::Crlf)
+ );
+ }
+}
diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs
index 8b1e802f..d0023e9f 100644
--- a/helix-core/src/movement.rs
+++ b/helix-core/src/movement.rs
@@ -3,9 +3,13 @@ use std::iter::{self, from_fn, Peekable, SkipWhile};
use ropey::iter::Chars;
use crate::{
- coords_at_pos,
+ chars::{
+ categorize_char, char_is_line_ending, char_is_punctuation, char_is_whitespace,
+ char_is_word, CharCategory,
+ },
+ coords_at_pos, get_line_ending,
graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary},
- pos_at_coords, Position, Range, RopeSlice,
+ line_end_char_index, pos_at_coords, Position, Range, RopeSlice,
};
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
@@ -37,9 +41,8 @@ pub fn move_horizontally(
nth_prev_grapheme_boundary(slice, pos, count).max(start)
}
Direction::Forward => {
- // Line end is pos at the start of next line - 1
- let end = slice.line_to_char(line + 1).saturating_sub(1);
- nth_next_grapheme_boundary(slice, pos, count).min(end)
+ let end_char_idx = line_end_char_index(&slice, line);
+ nth_next_grapheme_boundary(slice, pos, count).min(end_char_idx)
}
};
let anchor = match behaviour {
@@ -68,8 +71,11 @@ pub fn move_vertically(
),
};
- // convert to 0-indexed, subtract another 1 because len_chars() counts \n
- let new_line_len = slice.line(new_line).len_chars().saturating_sub(2);
+ // Length of the line sans line-ending.
+ let new_line_len = {
+ let line = slice.line(new_line);
+ line.len_chars() - get_line_ending(&line).map(|le| le.len_chars()).unwrap_or(0)
+ };
let new_col = std::cmp::min(horiz as usize, new_line_len);
@@ -104,64 +110,6 @@ fn word_move(slice: RopeSlice, mut range: Range, count: usize, target: WordMotio
}
// ---- util ------------
-#[inline]
-pub(crate) fn is_word(ch: char) -> bool {
- ch.is_alphanumeric() || ch == '_'
-}
-
-#[inline]
-pub(crate) fn is_end_of_line(ch: char) -> bool {
- ch == '\n'
-}
-
-#[inline]
-// Whitespace, but not end of line
-pub(crate) fn is_strict_whitespace(ch: char) -> bool {
- ch.is_whitespace() && !is_end_of_line(ch)
-}
-
-#[inline]
-pub(crate) fn is_punctuation(ch: char) -> bool {
- use unicode_general_category::{get_general_category, GeneralCategory};
-
- matches!(
- get_general_category(ch),
- GeneralCategory::OtherPunctuation
- | GeneralCategory::OpenPunctuation
- | GeneralCategory::ClosePunctuation
- | GeneralCategory::InitialPunctuation
- | GeneralCategory::FinalPunctuation
- | GeneralCategory::ConnectorPunctuation
- | GeneralCategory::DashPunctuation
- | GeneralCategory::MathSymbol
- | GeneralCategory::CurrencySymbol
- | GeneralCategory::ModifierSymbol
- )
-}
-
-#[derive(Debug, Eq, PartialEq)]
-pub enum Category {
- Whitespace,
- Eol,
- Word,
- Punctuation,
- Unknown,
-}
-
-#[inline]
-pub(crate) fn categorize(ch: char) -> Category {
- if is_end_of_line(ch) {
- Category::Eol
- } else if ch.is_whitespace() {
- Category::Whitespace
- } else if is_word(ch) {
- Category::Word
- } else if is_punctuation(ch) {
- Category::Punctuation
- } else {
- Category::Unknown
- }
-}
#[inline]
/// Returns first index that doesn't satisfy a given predicate when
@@ -235,7 +183,8 @@ impl CharHelpers for Chars<'_> {
let mut phase = WordMotionPhase::Start;
let mut head = origin.head;
let mut anchor: Option<usize> = None;
- let is_boundary = |a: char, b: Option<char>| categorize(a) != categorize(b.unwrap_or(a));
+ let is_boundary =
+ |a: char, b: Option<char>| categorize_char(a) != categorize_char(b.unwrap_or(a));
while let Some(peek) = characters.peek().copied() {
phase = match phase {
WordMotionPhase::Start => {
@@ -244,7 +193,8 @@ impl CharHelpers for Chars<'_> {
break; // We're at the end, so there's nothing to do.
}
// Anchor may remain here if the head wasn't at a boundary
- if !is_boundary(peek, characters.peek().copied()) && !is_end_of_line(peek) {
+ if !is_boundary(peek, characters.peek().copied()) && !char_is_line_ending(peek)
+ {
anchor = Some(head);
}
// First character is always skipped by the head
@@ -252,7 +202,7 @@ impl CharHelpers for Chars<'_> {
WordMotionPhase::SkipNewlines
}
WordMotionPhase::SkipNewlines => {
- if is_end_of_line(peek) {
+ if char_is_line_ending(peek) {
characters.next();
if characters.peek().is_some() {
advance(&mut head);
@@ -286,12 +236,12 @@ fn reached_target(target: WordMotionTarget, peek: char, next_peek: Option<&char>
match target {
WordMotionTarget::NextWordStart => {
- ((categorize(peek) != categorize(*next_peek))
- && (is_end_of_line(*next_peek) || !next_peek.is_whitespace()))
+ ((categorize_char(peek) != categorize_char(*next_peek))
+ && (char_is_line_ending(*next_peek) || !next_peek.is_whitespace()))
}
WordMotionTarget::NextWordEnd | WordMotionTarget::PrevWordStart => {
- ((categorize(peek) != categorize(*next_peek))
- && (!peek.is_whitespace() || is_end_of_line(*next_peek)))
+ ((categorize_char(peek) != categorize_char(*next_peek))
+ && (!peek.is_whitespace() || char_is_line_ending(*next_peek)))
}
}
}
@@ -330,7 +280,7 @@ mod test {
slice,
move_vertically(slice, range, Direction::Forward, 1, Movement::Move).head
),
- (1, 2).into()
+ (1, 3).into()
);
}
@@ -343,12 +293,12 @@ mod test {
let mut range = Range::point(position);
let moves_and_expected_coordinates = [
- ((Direction::Forward, 1usize), (0, 1)),
- ((Direction::Forward, 2usize), (0, 3)),
- ((Direction::Forward, 0usize), (0, 3)),
- ((Direction::Forward, 999usize), (0, 31)),
- ((Direction::Forward, 999usize), (0, 31)),
- ((Direction::Backward, 999usize), (0, 0)),
+ ((Direction::Forward, 1usize), (0, 1)), // T|his is a simple alphabetic line
+ ((Direction::Forward, 2usize), (0, 3)), // Thi|s is a simple alphabetic line
+ ((Direction::Forward, 0usize), (0, 3)), // Thi|s is a simple alphabetic line
+ ((Direction::Forward, 999usize), (0, 32)), // This is a simple alphabetic line|
+ ((Direction::Forward, 999usize), (0, 32)), // This is a simple alphabetic line|
+ ((Direction::Backward, 999usize), (0, 0)), // |This is a simple alphabetic line
];
for ((direction, amount), coordinates) in IntoIter::new(moves_and_expected_coordinates) {
@@ -366,15 +316,15 @@ mod test {
let mut range = Range::point(position);
let moves_and_expected_coordinates = IntoIter::new([
- ((Direction::Forward, 1usize), (0, 1)), // M_ltiline
- ((Direction::Forward, 2usize), (0, 3)), // Mul_iline
- ((Direction::Backward, 6usize), (0, 0)), // _ultiline
- ((Direction::Backward, 999usize), (0, 0)), // _ultiline
- ((Direction::Forward, 3usize), (0, 3)), // Mul_iline
- ((Direction::Forward, 0usize), (0, 3)), // Mul_iline
- ((Direction::Backward, 0usize), (0, 3)), // Mul_iline
- ((Direction::Forward, 999usize), (0, 9)), // Multilin_
- ((Direction::Forward, 999usize), (0, 9)), // Multilin_
+ ((Direction::Forward, 1usize), (0, 1)), // M|ultiline\n
+ ((Direction::Forward, 2usize), (0, 3)), // Mul|tiline\n
+ ((Direction::Backward, 6usize), (0, 0)), // |Multiline\n
+ ((Direction::Backward, 999usize), (0, 0)), // |Multiline\n
+ ((Direction::Forward, 3usize), (0, 3)), // Mul|tiline\n
+ ((Direction::Forward, 0usize), (0, 3)), // Mul|tiline\n
+ ((Direction::Backward, 0usize), (0, 3)), // Mul|tiline\n
+ ((Direction::Forward, 999usize), (0, 9)), // Multiline|\n
+ ((Direction::Forward, 999usize), (0, 9)), // Multiline|\n
]);
for ((direction, amount), coordinates) in moves_and_expected_coordinates {
@@ -446,7 +396,7 @@ mod test {
// First descent preserves column as the target line is wider
((Axis::V, Direction::Forward, 1usize), (1, 8)),
// Second descent clamps column as the target line is shorter
- ((Axis::V, Direction::Forward, 1usize), (2, 4)),
+ ((Axis::V, Direction::Forward, 1usize), (2, 5)),
// Third descent restores the original column
((Axis::V, Direction::Forward, 1usize), (3, 8)),
// Behaviour is preserved even through long jumps
@@ -760,45 +710,4 @@ mod test {
}
}
}
-
- #[test]
- fn test_categorize() {
- const WORD_TEST_CASE: &'static str =
- "_hello_world_あいうえおー12345678901234567890";
- const PUNCTUATION_TEST_CASE: &'static str =
- "!\"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~!”#$%&’()*+、。:;<=>?@「」^`{|}~";
- const WHITESPACE_TEST_CASE: &'static str = "      ";
-
- assert_eq!(Category::Eol, categorize('\n'));
-
- for ch in WHITESPACE_TEST_CASE.chars() {
- assert_eq!(
- Category::Whitespace,
- categorize(ch),
- "Testing '{}', but got `{:?}` instead of `Category::Whitespace`",
- ch,
- categorize(ch)
- );
- }
-
- for ch in WORD_TEST_CASE.chars() {
- assert_eq!(
- Category::Word,
- categorize(ch),
- "Testing '{}', but got `{:?}` instead of `Category::Word`",
- ch,
- categorize(ch)
- );
- }
-
- for ch in PUNCTUATION_TEST_CASE.chars() {
- assert_eq!(
- Category::Punctuation,
- categorize(ch),
- "Testing '{}', but got `{:?}` instead of `Category::Punctuation`",
- ch,
- categorize(ch)
- );
- }
- }
}
diff --git a/helix-core/src/position.rs b/helix-core/src/position.rs
index 3d85ff2f..392eee9c 100644
--- a/helix-core/src/position.rs
+++ b/helix-core/src/position.rs
@@ -1,4 +1,5 @@
use crate::{
+ chars::char_is_line_ending,
graphemes::{nth_next_grapheme_boundary, RopeGraphemes},
Rope, RopeSlice,
};
@@ -23,8 +24,9 @@ impl Position {
pub fn traverse(self, text: &crate::Tendril) -> Self {
let Self { mut row, mut col } = self;
// TODO: there should be a better way here
- for ch in text.chars() {
- if ch == '\n' {
+ let mut chars = text.chars().peekable();
+ while let Some(ch) = chars.next() {
+ if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
row += 1;
col = 0;
} else {
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index 81b6d5a0..63ca424e 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -1,4 +1,4 @@
-use crate::{regex::Regex, Change, Rope, RopeSlice, Transaction};
+use crate::{chars::char_is_line_ending, regex::Regex, Change, Rope, RopeSlice, Transaction};
pub use helix_syntax::{get_language, get_language_name, Lang};
use arc_swap::ArcSwap;
@@ -589,9 +589,10 @@ impl LanguageLayer {
mut column,
} = point;
- // TODO: there should be a better way here
- for ch in text.bytes() {
- if ch == b'\n' {
+ // TODO: there should be a better way here.
+ let mut chars = text.chars().peekable();
+ while let Some(ch) = chars.next() {
+ if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
row += 1;
column = 0;
} else {
diff --git a/helix-lsp/src/client.rs b/helix-lsp/src/client.rs
index 101d2f9b..7f136fe8 100644
--- a/helix-lsp/src/client.rs
+++ b/helix-lsp/src/client.rs
@@ -3,7 +3,7 @@ use crate::{
Call, Error, OffsetEncoding, Result,
};
-use helix_core::{find_root, ChangeSet, Rope};
+use helix_core::{chars::char_is_line_ending, find_root, ChangeSet, Rope};
use jsonrpc_core as jsonrpc;
use lsp_types as lsp;
use serde_json::Value;
@@ -337,8 +337,9 @@ impl Client {
mut character,
} = pos;
- for ch in text.chars() {
- if ch == '\n' {
+ let mut chars = text.chars().peekable();
+ while let Some(ch) = chars.next() {
+ if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
line += 1;
character = 0;
} else {
diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs
index bcf946b7..28c4fe3a 100644
--- a/helix-term/src/commands.rs
+++ b/helix-term/src/commands.rs
@@ -1,12 +1,12 @@
use helix_core::{
- comment, coords_at_pos, find_first_non_whitespace_char, find_root, graphemes, indent,
- match_brackets,
+ comment, coords_at_pos, find_first_non_whitespace_char, find_root, get_line_ending, graphemes,
+ indent, line_end_char_index, match_brackets,
movement::{self, Direction},
object, pos_at_coords,
regex::{self, Regex},
register::{self, Register, Registers},
- search, selection, Change, ChangeSet, Position, Range, Rope, RopeSlice, Selection, SmallVec,
- Tendril, Transaction,
+ search, selection, Change, ChangeSet, LineEnding, Position, Range, Rope, RopeSlice, Selection,
+ SmallVec, Tendril, Transaction, DEFAULT_LINE_ENDING,
};
use helix_view::{
@@ -303,9 +303,8 @@ fn move_line_end(cx: &mut Context) {
let text = doc.text();
let line = text.char_to_line(range.head);
- // Line end is pos at the start of next line - 1
- // subtract another 1 because the line ends with \n
- let pos = text.line_to_char(line + 1).saturating_sub(2);
+ let pos = line_end_char_index(&text.slice(..), line);
+
Range::new(pos, pos)
});
@@ -452,6 +451,8 @@ where
let count = cx.count();
// need to wait for next key
+ // TODO: should this be done by grapheme rather than char? For example,
+ // we can't properly handle the line-ending case here in terms of char.
cx.on_next_key(move |cx, event| {
let ch = match event {
KeyEvent {
@@ -585,7 +586,7 @@ fn replace(cx: &mut Context) {
KeyEvent {
code: KeyCode::Enter,
..
- } => Some('\n'),
+ } => Some('\n'), // TODO: use the document's default line ending.
_ => None,
};
@@ -725,9 +726,8 @@ fn extend_line_end(cx: &mut Context) {
let text = doc.text();
let line = text.char_to_line(range.head);
- // Line end is pos at the start of next line - 1
- // subtract another 1 because the line ends with \n
- let pos = text.line_to_char(line + 1).saturating_sub(2);
+ let pos = line_end_char_index(&text.slice(..), line);
+
Range::new(range.anchor, pos)
});
@@ -922,7 +922,13 @@ fn delete_selection_impl(reg: &mut Register, doc: &mut Document, view_id: ViewId
// then delete
let transaction =
Transaction::change_by_selection(doc.text(), doc.selection(view_id), |range| {
- let max_to = doc.text().len_chars().saturating_sub(1);
+ let alltext = doc.text();
+ let line = alltext.char_to_line(range.head);
+ let max_to = doc.text().len_chars().saturating_sub(
+ get_line_ending(&alltext.line(line))
+ .map(|le| le.len_chars())
+ .unwrap_or(0),
+ );
let to = std::cmp::min(max_to, range.to() + 1);
(range.from(), to, None)
});
@@ -1003,7 +1009,7 @@ fn append_mode(cx: &mut Context) {
if selection.iter().any(|range| range.head == end) {
let transaction = Transaction::change(
doc.text(),
- std::array::IntoIter::new([(end, end, Some(Tendril::from_char('\n')))]),
+ std::array::IntoIter::new([(end, end, Some(doc.line_ending().as_str().into()))]),
);
doc.apply(&transaction, view.id);
}
@@ -1683,8 +1689,7 @@ fn append_to_line(cx: &mut Context) {
let selection = doc.selection(view.id).transform(|range| {
let text = doc.text();
let line = text.char_to_line(range.head);
- // we can't use line_to_char(line + 1) - 2 because the last line might not contain \n
- let pos = (text.line_to_char(line) + text.line(line).len_chars()).saturating_sub(1);
+ let pos = line_end_char_index(&text.slice(..), line);
Range::new(pos, pos)
});
doc.set_selection(view.id, selection);
@@ -2344,7 +2349,7 @@ pub mod insert {
);
let indent = doc.indent_unit().repeat(indent_level);
let mut text = String::with_capacity(1 + indent.len());
- text.push('\n');
+ text.push_str(doc.line_ending().as_str());
text.push_str(&indent);
let head = pos + offs + text.chars().count();
@@ -2365,7 +2370,7 @@ pub mod insert {
if helix_core::auto_pairs::PAIRS.contains(&(prev, curr)) {
// another newline, indent the end bracket one level less
let indent = doc.indent_unit().repeat(indent_level.saturating_sub(1));
- text.push('\n');
+ text.push_str(doc.line_ending().as_str());
text.push_str(&indent);
}
@@ -2530,7 +2535,9 @@ fn paste_impl(
);
// if any of values ends \n it's linewise paste
- let linewise = values.iter().any(|value| value.ends_with('\n'));
+ let linewise = values
+ .iter()
+ .any(|value| value.ends_with(doc.line_ending().as_str()));
let mut values = values.iter().cloned().map(Tendril::from).chain(repeat);
diff --git a/helix-term/src/ui/editor.rs b/helix-term/src/ui/editor.rs
index 7f0d06e9..faede58c 100644
--- a/helix-term/src/ui/editor.rs
+++ b/helix-term/src/ui/editor.rs
@@ -9,7 +9,7 @@ use crate::{
use helix_core::{
coords_at_pos,
syntax::{self, HighlightEvent},
- Position, Range,
+ LineEnding, Position, Range,
};
use helix_view::{document::Mode, Document, Editor, Theme, View};
use std::borrow::Cow;
@@ -176,7 +176,7 @@ impl EditorView {
// iterate over range char by char
for grapheme in RopeGraphemes::new(text) {
- if grapheme == "\n" {
+ if LineEnding::from_rope_slice(&grapheme).is_some() {
visual_x = 0;
line += 1;
diff --git a/helix-term/src/ui/markdown.rs b/helix-term/src/ui/markdown.rs
index 75e2f4b4..72a3e4ff 100644
--- a/helix-term/src/ui/markdown.rs
+++ b/helix-term/src/ui/markdown.rs
@@ -115,6 +115,8 @@ fn parse<'a>(
// TODO: replace tabs with indentation
let mut slice = &text[start..end];
+ // TODO: do we need to handle all unicode line endings
+ // here, or is just '\n' okay?
while let Some(end) = slice.find('\n') {
// emit span up to newline
let text = &slice[..end];
diff --git a/helix-tui/Cargo.toml b/helix-tui/Cargo.toml
index 89fa755d..30e2374d 100644
--- a/helix-tui/Cargo.toml
+++ b/helix-tui/Cargo.toml
@@ -22,3 +22,4 @@ unicode-segmentation = "1.2"
unicode-width = "0.1"
crossterm = { version = "0.20", optional = true }
serde = { version = "1", "optional" = true, features = ["derive"]}
+helix-core = { version = "0.2", path = "../helix-core" }
diff --git a/helix-tui/src/text.rs b/helix-tui/src/text.rs
index c671e918..b23bfd81 100644
--- a/helix-tui/src/text.rs
+++ b/helix-tui/src/text.rs
@@ -47,6 +47,7 @@
//! ]);
//! ```
use crate::style::Style;
+use helix_core::line_ending::str_is_line_ending;
use std::borrow::Cow;
use unicode_segmentation::UnicodeSegmentation;
use unicode_width::UnicodeWidthStr;
@@ -177,7 +178,7 @@ impl<'a> Span<'a> {
symbol: g,
style: base_style.patch(self.style),
})
- .filter(|s| s.symbol != "\n")
+ .filter(|s| !str_is_line_ending(s.symbol))
}
}
diff --git a/helix-tui/src/widgets/reflow.rs b/helix-tui/src/widgets/reflow.rs
index 94ff7330..ae561a4f 100644
--- a/helix-tui/src/widgets/reflow.rs
+++ b/helix-tui/src/widgets/reflow.rs
@@ -1,4 +1,5 @@
use crate::text::StyledGrapheme;
+use helix_core::line_ending::str_is_line_ending;
use unicode_segmentation::UnicodeSegmentation;
use unicode_width::UnicodeWidthStr;
@@ -62,13 +63,13 @@ impl<'a, 'b> LineComposer<'a> for WordWrapper<'a, 'b> {
// Ignore characters wider that the total max width.
if symbol.width() as u16 > self.max_line_width
// Skip leading whitespace when trim is enabled.
- || self.trim && symbol_whitespace && symbol != "\n" && current_line_width == 0
+ || self.trim && symbol_whitespace && !str_is_line_ending(symbol) && current_line_width == 0
{
continue;
}
// Break on newline and discard it.
- if symbol == "\n" {
+ if str_is_line_ending(symbol) {
if prev_whitespace {
current_line_width = width_to_last_word_end;
self.current_line.truncate(symbols_to_last_word_end);
@@ -170,7 +171,7 @@ impl<'a, 'b> LineComposer<'a> for LineTruncator<'a, 'b> {
}
// Break on newline and discard it.
- if symbol == "\n" {
+ if str_is_line_ending(symbol) {
break;
}
@@ -199,7 +200,7 @@ impl<'a, 'b> LineComposer<'a> for LineTruncator<'a, 'b> {
if skip_rest {
for StyledGrapheme { symbol, .. } in &mut self.symbols {
- if symbol == "\n" {
+ if str_is_line_ending(symbol) {
break;
}
}
diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs
index fd127e1b..9326fb79 100644
--- a/helix-view/src/document.rs
+++ b/helix-view/src/document.rs
@@ -7,10 +7,12 @@ use std::str::FromStr;
use std::sync::Arc;
use helix_core::{
- chars::{char_is_linebreak, char_is_whitespace},
+ auto_detect_line_ending,
+ chars::{char_is_line_ending, char_is_whitespace},
history::History,
syntax::{self, LanguageConfiguration},
- ChangeSet, Diagnostic, Rope, Selection, State, Syntax, Transaction,
+ ChangeSet, Diagnostic, LineEnding, Rope, Selection, State, Syntax, Transaction,
+ DEFAULT_LINE_ENDING,
};
use crate::{DocumentId, Theme, ViewId};
@@ -45,6 +47,9 @@ pub struct Document {
/// Current indent style.
pub indent_style: IndentStyle,
+ /// The document's default line ending.
+ pub line_ending: LineEnding,
+
syntax: Option<Syntax>,
// /// Corresponding language scope name. Usually `source.<lang>`.
pub(crate) language: Option<Arc<LanguageConfiguration>>,
@@ -232,6 +237,7 @@ impl Document {
history: Cell::new(History::default()),
last_saved_revision: 0,
language_server: None,
+ line_ending: DEFAULT_LINE_ENDING,
}
}
@@ -243,22 +249,26 @@ impl Document {
) -> Result<Self, Error> {
use std::{fs::File, io::BufReader};
- let doc = if !path.exists() {
- Rope::from("\n")
+ let mut doc = if !path.exists() {
+ Rope::from(DEFAULT_LINE_ENDING.as_str())
} else {
let file = File::open(&path).context(format!("unable to open {:?}", path))?;
- let mut doc = Rope::from_reader(BufReader::new(file))?;
- // add missing newline at the end of file
- if doc.len_bytes() == 0 || doc.byte(doc.len_bytes() - 1) != b'\n' {
- doc.insert_char(doc.len_chars(), '\n');
- }
- doc
+ Rope::from_reader(BufReader::new(file))?
};
+ // search for line endings
+ let line_ending = auto_detect_line_ending(&doc).unwrap_or(DEFAULT_LINE_ENDING);
+
+ // add missing newline at the end of file
+ if doc.len_bytes() == 0 || char_is_line_ending(doc.char(doc.len_chars() - 1)) {
+ doc.insert(doc.len_chars(), line_ending.as_str());
+ }
+
let mut doc = Self::new(doc);
// set the path and try detecting the language
doc.set_path(&path)?;
doc.detect_indent_style();
+ doc.set_line_ending(line_ending);
if let Some(loader) = config_loader {
doc.detect_language(theme, loader);
@@ -366,7 +376,7 @@ impl Document {
Some(' ') => false,
// Ignore blank lines.
- Some(c) if char_is_linebreak(c) => continue,
+ Some(c) if char_is_line_ending(c) => continue,
_ => {
prev_line_is_tabs = false;
@@ -390,7 +400,7 @@ impl Document {
c if char_is_whitespace(c) => count_is_done = true,
// Ignore blank lines.
- c if char_is_linebreak(c) => continue 'outer,
+ c if char_is_line_ending(c) => continue 'outer,
_ => break,
}
@@ -521,6 +531,10 @@ impl Document {
self.selections.insert(view_id, selection);
}
+ pub fn set_line_ending(&mut self, line_ending: LineEnding) {
+ self.line_ending = line_ending;
+ }
+
fn _apply(&mut self, transaction: &Transaction, view_id: ViewId) -> bool {
let old_doc = self.text().clone();
@@ -791,6 +805,10 @@ impl Document {
pub fn set_diagnostics(&mut self, diagnostics: Vec<Diagnostic>) {
self.diagnostics = diagnostics;
}
+
+ pub fn line_ending(&self) -> LineEnding {
+ self.line_ending
+ }
}
#[cfg(test)]
diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs
index 5d18030a..839bcdcd 100644
--- a/helix-view/src/editor.rs
+++ b/helix-view/src/editor.rs
@@ -15,10 +15,9 @@ use slotmap::SlotMap;
use anyhow::Error;
-use helix_core::Position;
-
pub use helix_core::diagnostic::Severity;
pub use helix_core::register::Registers;
+use helix_core::{Position, DEFAULT_LINE_ENDING};
#[derive(Debug)]
pub struct Editor {
@@ -173,7 +172,7 @@ impl Editor {
pub fn new_file(&mut self, action: Action) -> DocumentId {
use helix_core::Rope;
- let doc = Document::new(Rope::from("\n"));
+ let doc = Document::new(Rope::from(DEFAULT_LINE_ENDING.as_str()));
let id = self.documents.insert(doc);
self.documents[id].id = id;
self.switch(id, action);