aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--helix-core/src/lib.rs6
-rw-r--r--helix-core/src/line_ending.rs169
-rw-r--r--helix-term/src/commands.rs50
-rw-r--r--helix-term/src/ui/editor.rs4
-rw-r--r--helix-view/src/document.rs18
5 files changed, 223 insertions, 24 deletions
diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs
index 03741719..9ac506a6 100644
--- a/helix-core/src/lib.rs
+++ b/helix-core/src/lib.rs
@@ -6,6 +6,7 @@ pub mod diagnostic;
pub mod graphemes;
pub mod history;
pub mod indent;
+pub mod line_ending;
pub mod macros;
pub mod match_brackets;
pub mod movement;
@@ -102,6 +103,7 @@ pub use unicode_general_category::get_general_category;
#[doc(inline)]
pub use {regex, tree_sitter};
+pub use graphemes::RopeGraphemes;
pub use position::{coords_at_pos, pos_at_coords, Position};
pub use selection::{Range, Selection};
pub use smallvec::SmallVec;
@@ -110,4 +112,8 @@ pub use syntax::Syntax;
pub use diagnostic::Diagnostic;
pub use state::State;
+pub use line_ending::{
+ auto_detect_line_ending, get_line_ending, rope_slice_to_line_ending, LineEnding,
+ DEFAULT_LINE_ENDING,
+};
pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction};
diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs
new file mode 100644
index 00000000..2cc5b5d8
--- /dev/null
+++ b/helix-core/src/line_ending.rs
@@ -0,0 +1,169 @@
+use crate::{Rope, RopeGraphemes, RopeSlice};
+
+/// Represents one of the valid Unicode line endings.
+#[derive(PartialEq, Copy, Clone, Debug)]
+pub enum LineEnding {
+ Crlf, // CarriageReturn followed by LineFeed
+ LF, // U+000A -- LineFeed
+ CR, // U+000D -- CarriageReturn
+ Nel, // U+0085 -- NextLine
+ LS, // U+2028 -- Line Separator
+ VT, // U+000B -- VerticalTab
+ FF, // U+000C -- FormFeed
+ PS, // U+2029 -- ParagraphSeparator
+}
+
+impl LineEnding {
+ pub fn len_chars(&self) -> usize {
+ match self {
+ Self::Crlf => 2,
+ _ => 1,
+ }
+ }
+
+ pub fn as_str(&self) -> &str {
+ match self {
+ Self::Crlf => "\u{000D}\u{000A}",
+ Self::LF => "\u{000A}",
+ Self::Nel => "\u{0085}",
+ Self::LS => "\u{2028}",
+ Self::CR => "\u{000D}",
+ Self::VT => "\u{000B}",
+ Self::FF => "\u{000C}",
+ Self::PS => "\u{2029}",
+ }
+ }
+}
+
+pub fn rope_slice_to_line_ending(g: &RopeSlice) -> Option<LineEnding> {
+ if let Some(text) = g.as_str() {
+ str_to_line_ending(text)
+ } else if g == "\u{000D}\u{000A}" {
+ Some(LineEnding::Crlf)
+ } else {
+ // Not a line ending
+ None
+ }
+}
+
+pub fn str_to_line_ending(g: &str) -> Option<LineEnding> {
+ match g {
+ "\u{000D}\u{000A}" => Some(LineEnding::Crlf),
+ "\u{000A}" => Some(LineEnding::LF),
+ "\u{000D}" => Some(LineEnding::CR),
+ "\u{0085}" => Some(LineEnding::Nel),
+ "\u{2028}" => Some(LineEnding::LS),
+ "\u{000B}" => Some(LineEnding::VT),
+ "\u{000C}" => Some(LineEnding::FF),
+ "\u{2029}" => Some(LineEnding::PS),
+ // Not a line ending
+ _ => None,
+ }
+}
+
+pub fn auto_detect_line_ending(doc: &Rope) -> Option<LineEnding> {
+ // based on https://github.com/cessen/led/blob/27572c8838a1c664ee378a19358604063881cc1d/src/editor/mod.rs#L88-L162
+
+ let mut ending = None;
+ // return first matched line ending. Not all possible line endings are being matched, as they might be special-use only
+ for line in doc.lines().take(100) {
+ ending = match line.len_chars() {
+ 1 => {
+ let g = RopeGraphemes::new(line.slice((line.len_chars() - 1)..))
+ .last()
+ .unwrap();
+ rope_slice_to_line_ending(&g)
+ }
+ n if n > 1 => {
+ let g = RopeGraphemes::new(line.slice((line.len_chars() - 2)..))
+ .last()
+ .unwrap();
+ rope_slice_to_line_ending(&g)
+ }
+ _ => None,
+ };
+ if ending.is_some() {
+ match ending {
+ Some(LineEnding::VT) | Some(LineEnding::FF) | Some(LineEnding::PS) => {}
+ _ => return ending,
+ }
+ }
+ }
+ ending
+}
+
+/// Returns the passed line's line ending, if any.
+pub fn get_line_ending(line: &RopeSlice) -> Option<LineEnding> {
+ // Last character as str.
+ let g1 = line
+ .slice(line.len_chars().saturating_sub(1)..)
+ .as_str()
+ .unwrap();
+
+ // Last two characters as str, or empty str if they're not contiguous.
+ // It's fine to punt on the non-contiguous case, because Ropey guarantees
+ // that CRLF is always contiguous.
+ let g2 = line
+ .slice(line.len_chars().saturating_sub(2)..)
+ .as_str()
+ .unwrap_or("");
+
+ // First check the two-character case for CRLF, then check the single-character case.
+ str_to_line_ending(g2).or_else(|| str_to_line_ending(g1))
+}
+
+#[cfg(target_os = "windows")]
+pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf;
+#[cfg(not(target_os = "windows"))]
+pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF;
+
+#[cfg(test)]
+mod line_ending_tests {
+ use super::*;
+
+ #[test]
+ fn test_autodetect() {
+ assert_eq!(
+ auto_detect_line_ending(&Rope::from_str("\n")),
+ Some(LineEnding::LF)
+ );
+ assert_eq!(
+ auto_detect_line_ending(&Rope::from_str("\r\n")),
+ Some(LineEnding::Crlf)
+ );
+ assert_eq!(auto_detect_line_ending(&Rope::from_str("hello")), None);
+ assert_eq!(auto_detect_line_ending(&Rope::from_str("")), None);
+ assert_eq!(
+ auto_detect_line_ending(&Rope::from_str("hello\nhelix\r\n")),
+ Some(LineEnding::LF)
+ );
+ assert_eq!(
+ auto_detect_line_ending(&Rope::from_str("a formfeed\u{000C}")),
+ None
+ );
+ assert_eq!(
+ auto_detect_line_ending(&Rope::from_str("\n\u{000A}\n \u{000A}")),
+ Some(LineEnding::LF)
+ );
+ assert_eq!(
+ auto_detect_line_ending(&Rope::from_str(
+ "a formfeed\u{000C} with a\u{000C} linefeed\u{000A}"
+ )),
+ Some(LineEnding::LF)
+ );
+ assert_eq!(auto_detect_line_ending(&Rope::from_str("a formfeed\u{000C} with a\u{000C} carriage return linefeed\u{000D}\u{000A} and a linefeed\u{000A}")), Some(LineEnding::Crlf));
+ }
+
+ #[test]
+ fn test_rope_slice_to_line_ending() {
+ let r = Rope::from_str("\r\n");
+ assert_eq!(
+ rope_slice_to_line_ending(&r.slice(1..2)),
+ Some(LineEnding::LF)
+ );
+ assert_eq!(
+ rope_slice_to_line_ending(&r.slice(0..2)),
+ Some(LineEnding::Crlf)
+ );
+ }
+}
diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs
index 1243a86f..07d2999b 100644
--- a/helix-term/src/commands.rs
+++ b/helix-term/src/commands.rs
@@ -1,12 +1,12 @@
use helix_core::{
- comment, coords_at_pos, find_first_non_whitespace_char, find_root, graphemes, indent,
- match_brackets,
+ comment, coords_at_pos, find_first_non_whitespace_char, find_root, get_line_ending, graphemes,
+ indent, match_brackets,
movement::{self, Direction},
object, pos_at_coords,
regex::{self, Regex},
register::{self, Register, Registers},
- search, selection, Change, ChangeSet, Position, Range, Rope, RopeSlice, Selection, SmallVec,
- Tendril, Transaction,
+ search, selection, Change, ChangeSet, LineEnding, Position, Range, Rope, RopeSlice, Selection,
+ SmallVec, Tendril, Transaction,
};
use helix_view::{
@@ -342,9 +342,12 @@ fn move_line_end(cx: &mut Context) {
let text = doc.text();
let line = text.char_to_line(range.head);
- // Line end is pos at the start of next line - 1
- // subtract another 1 because the line ends with \n
- let pos = text.line_to_char(line + 1).saturating_sub(2);
+ let pos = text.line_to_char(line + 1).saturating_sub(
+ get_line_ending(&text.line(line))
+ .map(|le| le.len_chars())
+ .unwrap_or(0),
+ );
+
Range::new(pos, pos)
});
@@ -764,9 +767,12 @@ fn extend_line_end(cx: &mut Context) {
let text = doc.text();
let line = text.char_to_line(range.head);
- // Line end is pos at the start of next line - 1
- // subtract another 1 because the line ends with \n
- let pos = text.line_to_char(line + 1).saturating_sub(2);
+ let pos = text.line_to_char(line + 1).saturating_sub(
+ get_line_ending(&text.line(line))
+ .map(|le| le.len_chars())
+ .unwrap_or(0),
+ );
+
Range::new(range.anchor, pos)
});
@@ -1057,7 +1063,7 @@ fn append_mode(cx: &mut Context) {
if selection.iter().any(|range| range.head == end) {
let transaction = Transaction::change(
doc.text(),
- std::array::IntoIter::new([(end, end, Some(Tendril::from_char('\n')))]),
+ std::array::IntoIter::new([(end, end, Some(doc.line_ending().as_str().into()))]),
);
doc.apply(&transaction, view.id);
}
@@ -1662,16 +1668,16 @@ fn open(cx: &mut Context, open: Open) {
let mut ranges = SmallVec::with_capacity(selection.len());
let mut offs = 0;
+ let line = match open {
+ // adjust position to the end of the line (next line - 1)
+ Open::Below => line + 1,
+ // adjust position to the end of the previous line (current line - 1)
+ Open::Above => line,
+ };
+
let mut transaction = Transaction::change_by_selection(contents, selection, |range| {
let line = text.char_to_line(range.head);
- let line = match open {
- // adjust position to the end of the line (next line - 1)
- Open::Below => line + 1,
- // adjust position to the end of the previous line (current line - 1)
- Open::Above => line,
- };
-
// insert newlines after this index for both Above and Below variants
let linend_index = doc.text().line_to_char(line).saturating_sub(1);
@@ -2299,7 +2305,7 @@ pub mod insert {
);
let indent = doc.indent_unit().repeat(indent_level);
let mut text = String::with_capacity(1 + indent.len());
- text.push('\n');
+ text.push_str(doc.line_ending().as_str());
text.push_str(&indent);
let head = pos + offs + text.chars().count();
@@ -2320,7 +2326,7 @@ pub mod insert {
if helix_core::auto_pairs::PAIRS.contains(&(prev, curr)) {
// another newline, indent the end bracket one level less
let indent = doc.indent_unit().repeat(indent_level.saturating_sub(1));
- text.push('\n');
+ text.push_str(doc.line_ending().as_str());
text.push_str(&indent);
}
@@ -2439,7 +2445,9 @@ fn paste_impl(
);
// if any of values ends \n it's linewise paste
- let linewise = values.iter().any(|value| value.ends_with('\n'));
+ let linewise = values
+ .iter()
+ .any(|value| value.ends_with(doc.line_ending().as_str()));
let mut values = values.iter().cloned().map(Tendril::from).chain(repeat);
diff --git a/helix-term/src/ui/editor.rs b/helix-term/src/ui/editor.rs
index d0eedad6..42bb3ba8 100644
--- a/helix-term/src/ui/editor.rs
+++ b/helix-term/src/ui/editor.rs
@@ -7,7 +7,7 @@ use crate::{
};
use helix_core::{
- coords_at_pos,
+ coords_at_pos, rope_slice_to_line_ending,
syntax::{self, HighlightEvent},
Position, Range,
};
@@ -177,7 +177,7 @@ impl EditorView {
// iterate over range char by char
for grapheme in RopeGraphemes::new(text) {
- if grapheme == "\n" {
+ if rope_slice_to_line_ending(&grapheme).is_some() {
visual_x = 0;
line += 1;
diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs
index 8875f70d..49d270e4 100644
--- a/helix-view/src/document.rs
+++ b/helix-view/src/document.rs
@@ -9,10 +9,12 @@ use std::str::FromStr;
use std::sync::Arc;
use helix_core::{
+ auto_detect_line_ending,
chars::{char_is_linebreak, char_is_whitespace},
history::History,
syntax::{LanguageConfiguration, LOADER},
- ChangeSet, Diagnostic, Rope, Selection, State, Syntax, Transaction,
+ ChangeSet, Diagnostic, LineEnding, Rope, Selection, State, Syntax, Transaction,
+ DEFAULT_LINE_ENDING,
};
use crate::{DocumentId, ViewId};
@@ -97,6 +99,7 @@ pub struct Document {
diagnostics: Vec<Diagnostic>,
language_server: Option<Arc<helix_lsp::Client>>,
+ line_ending: LineEnding,
}
use std::fmt;
@@ -243,6 +246,7 @@ impl Document {
history: Cell::new(History::default()),
last_saved_revision: 0,
language_server: None,
+ line_ending: DEFAULT_LINE_ENDING,
}
}
@@ -262,10 +266,14 @@ impl Document {
doc
};
+ // search for line endings
+ let line_ending = auto_detect_line_ending(&doc).unwrap_or(DEFAULT_LINE_ENDING);
+
let mut doc = Self::new(doc);
// set the path and try detecting the language
doc.set_path(&path)?;
doc.detect_indent_style();
+ doc.set_line_ending(line_ending);
Ok(doc)
}
@@ -525,6 +533,10 @@ impl Document {
self.selections.insert(view_id, selection);
}
+ pub fn set_line_ending(&mut self, line_ending: LineEnding) {
+ self.line_ending = line_ending;
+ }
+
fn _apply(&mut self, transaction: &Transaction, view_id: ViewId) -> bool {
let old_doc = self.text().clone();
@@ -795,6 +807,10 @@ impl Document {
pub fn set_diagnostics(&mut self, diagnostics: Vec<Diagnostic>) {
self.diagnostics = diagnostics;
}
+
+ pub fn line_ending(&self) -> LineEnding {
+ self.line_ending
+ }
}
#[cfg(test)]