summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--helix-core/src/line_ending.rs7
-rw-r--r--helix-core/src/transaction.rs5
-rw-r--r--helix-lsp/src/client.rs2
-rw-r--r--helix-lsp/src/lib.rs112
4 files changed, 107 insertions, 19 deletions
diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs
index 09e92523..953d567d 100644
--- a/helix-core/src/line_ending.rs
+++ b/helix-core/src/line_ending.rs
@@ -203,6 +203,13 @@ pub fn line_end_char_index(slice: &RopeSlice, line: usize) -> usize {
.unwrap_or(0)
}
+pub fn line_end_byte_index(slice: &RopeSlice, line: usize) -> usize {
+ slice.line_to_byte(line + 1)
+ - get_line_ending(&slice.line(line))
+ .map(|le| le.as_str().len())
+ .unwrap_or(0)
+}
+
/// Fetches line `line_idx` from the passed rope slice, sans any line ending.
pub fn line_without_line_ending<'a>(slice: &'a RopeSlice, line_idx: usize) -> RopeSlice<'a> {
let start = slice.line_to_char(line_idx);
diff --git a/helix-core/src/transaction.rs b/helix-core/src/transaction.rs
index 482fd6d9..d2f4de07 100644
--- a/helix-core/src/transaction.rs
+++ b/helix-core/src/transaction.rs
@@ -481,6 +481,11 @@ impl Transaction {
for (from, to, tendril) in changes {
// Verify ranges are ordered and not overlapping
debug_assert!(last <= from);
+ // Verify ranges are correct
+ debug_assert!(
+ from <= to,
+ "Edit end must end before it starts (should {from} <= {to})"
+ );
// Retain from last "to" to current "from"
changeset.retain(from - last);
diff --git a/helix-lsp/src/client.rs b/helix-lsp/src/client.rs
index 6827f568..365c6990 100644
--- a/helix-lsp/src/client.rs
+++ b/helix-lsp/src/client.rs
@@ -104,7 +104,7 @@ impl Client {
server_tx,
request_counter: AtomicU64::new(0),
capabilities: OnceCell::new(),
- offset_encoding: OffsetEncoding::Utf8,
+ offset_encoding: OffsetEncoding::Utf16,
config,
req_timeout,
diff --git a/helix-lsp/src/lib.rs b/helix-lsp/src/lib.rs
index 8418896c..bcaff10a 100644
--- a/helix-lsp/src/lib.rs
+++ b/helix-lsp/src/lib.rs
@@ -57,6 +57,7 @@ pub enum OffsetEncoding {
pub mod util {
use super::*;
+ use helix_core::line_ending::{line_end_byte_index, line_end_char_index};
use helix_core::{diagnostic::NumberOrString, Range, Rope, Selection, Tendril, Transaction};
/// Converts a diagnostic in the document to [`lsp::Diagnostic`].
@@ -117,7 +118,7 @@ pub mod util {
/// Converts [`lsp::Position`] to a position in the document.
///
- /// Returns `None` if position exceeds document length or an operation overflows.
+ /// Returns `None` if position.line is out of bounds or an overflow occurs
pub fn lsp_pos_to_pos(
doc: &Rope,
pos: lsp::Position,
@@ -128,22 +129,58 @@ pub mod util {
return None;
}
- match offset_encoding {
+ // We need to be careful here to fully comply ith the LSP spec.
+ // Two relevant quotes from the spec:
+ //
+ // https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#position
+ // > If the character value is greater than the line length it defaults back
+ // > to the line length.
+ //
+ // https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocuments
+ // > To ensure that both client and server split the string into the same
+ // > line representation the protocol specifies the following end-of-line sequences:
+ // > ‘\n’, ‘\r\n’ and ‘\r’. Positions are line end character agnostic.
+ // > So you can not specify a position that denotes \r|\n or \n| where | represents the character offset.
+ //
+ // This means that while the line must be in bounds the `charater`
+ // must be capped to the end of the line.
+ // Note that the end of the line here is **before** the line terminator
+ // so we must use `line_end_char_index` istead of `doc.line_to_char(pos_line + 1)`
+ //
+ // FIXME: Helix does not fully comply with the LSP spec for line terminators.
+ // The LSP standard requires that line terminators are ['\n', '\r\n', '\r'].
+ // Without the unicode-linebreak feature disabled, the `\r` terminator is not handled by helix.
+ // With the unicode-linebreak feature, helix recognizes multiple extra line break chars
+ // which means that positions will be decoded/encoded incorrectly in their presence
+
+ let line = match offset_encoding {
OffsetEncoding::Utf8 => {
- let line = doc.line_to_char(pos_line);
- let pos = line.checked_add(pos.character as usize)?;
- if pos <= doc.len_chars() {
- Some(pos)
- } else {
- None
- }
+ let line_start = doc.line_to_byte(pos_line);
+ let line_end = line_end_byte_index(&doc.slice(..), pos_line);
+ line_start..line_end
}
OffsetEncoding::Utf16 => {
- let line = doc.line_to_char(pos_line);
- let line_start = doc.char_to_utf16_cu(line);
- let pos = line_start.checked_add(pos.character as usize)?;
- doc.try_utf16_cu_to_char(pos).ok()
+ // TODO directly translate line index to char-idx
+ // ropey can do this just as easily as utf-8 byte translation
+ // but the functions are just missing.
+ // Translate to char first and then utf-16 as a workaround
+ let line_start = doc.line_to_char(pos_line);
+ let line_end = line_end_char_index(&doc.slice(..), pos_line);
+ doc.char_to_utf16_cu(line_start)..doc.char_to_utf16_cu(line_end)
}
+ };
+
+ // The LSP spec demands that the offset is capped to the end of the line
+ let pos = line
+ .start
+ .checked_add(pos.character as usize)
+ .unwrap_or(line.end)
+ .min(line.end);
+
+ // TODO prefer UTF32/char indices to avoid this step
+ match offset_encoding {
+ OffsetEncoding::Utf8 => doc.try_byte_to_char(pos).ok(),
+ OffsetEncoding::Utf16 => doc.try_utf16_cu_to_char(pos).ok(),
}
}
@@ -158,8 +195,8 @@ pub mod util {
match offset_encoding {
OffsetEncoding::Utf8 => {
let line = doc.char_to_line(pos);
- let line_start = doc.line_to_char(line);
- let col = pos - line_start;
+ let line_start = doc.line_to_byte(line);
+ let col = doc.char_to_byte(pos) - line_start;
lsp::Position::new(line as u32, col as u32)
}
@@ -606,16 +643,55 @@ mod tests {
}
test_case!("", (0, 0) => Some(0));
- test_case!("", (0, 1) => None);
+ test_case!("", (0, 1) => Some(0));
test_case!("", (1, 0) => None);
test_case!("\n\n", (0, 0) => Some(0));
test_case!("\n\n", (1, 0) => Some(1));
- test_case!("\n\n", (1, 1) => Some(2));
+ test_case!("\n\n", (1, 1) => Some(1));
test_case!("\n\n", (2, 0) => Some(2));
test_case!("\n\n", (3, 0) => None);
test_case!("test\n\n\n\ncase", (4, 3) => Some(11));
test_case!("test\n\n\n\ncase", (4, 4) => Some(12));
- test_case!("test\n\n\n\ncase", (4, 5) => None);
+ test_case!("test\n\n\n\ncase", (4, 5) => Some(12));
test_case!("", (u32::MAX, u32::MAX) => None);
}
+
+ #[test]
+ fn emoji_format_gh_4791() {
+ use lsp_types::{Position, Range, TextEdit};
+
+ let edits = vec![
+ TextEdit {
+ range: Range {
+ start: Position {
+ line: 0,
+ character: 1,
+ },
+ end: Position {
+ line: 1,
+ character: 0,
+ },
+ },
+ new_text: "\n ".to_string(),
+ },
+ TextEdit {
+ range: Range {
+ start: Position {
+ line: 1,
+ character: 7,
+ },
+ end: Position {
+ line: 2,
+ character: 0,
+ },
+ },
+ new_text: "\n ".to_string(),
+ },
+ ];
+
+ let mut source = Rope::from_str("[\n\"🇺🇸\",\n\"🎄\",\n]");
+
+ let transaction = generate_transaction_from_edits(&source, edits, OffsetEncoding::Utf8);
+ assert!(transaction.apply(&mut source));
+ }
}