diff options
Diffstat (limited to 'helix-core/src')
-rw-r--r-- | helix-core/src/auto_pairs.rs | 781 | ||||
-rw-r--r-- | helix-core/src/chars.rs | 9 | ||||
-rw-r--r-- | helix-core/src/diagnostic.rs | 15 | ||||
-rw-r--r-- | helix-core/src/diff.rs | 6 | ||||
-rw-r--r-- | helix-core/src/graphemes.rs | 86 | ||||
-rw-r--r-- | helix-core/src/history.rs | 4 | ||||
-rw-r--r-- | helix-core/src/increment/date_time.rs | 490 | ||||
-rw-r--r-- | helix-core/src/increment/mod.rs | 8 | ||||
-rw-r--r-- | helix-core/src/increment/number.rs (renamed from helix-core/src/numbers.rs) | 40 | ||||
-rw-r--r-- | helix-core/src/indent.rs | 147 | ||||
-rw-r--r-- | helix-core/src/lib.rs | 44 | ||||
-rw-r--r-- | helix-core/src/line_ending.rs | 2 | ||||
-rw-r--r-- | helix-core/src/match_brackets.rs | 2 | ||||
-rw-r--r-- | helix-core/src/movement.rs | 52 | ||||
-rw-r--r-- | helix-core/src/object.rs | 73 | ||||
-rw-r--r-- | helix-core/src/position.rs | 12 | ||||
-rw-r--r-- | helix-core/src/register.rs | 4 | ||||
-rw-r--r-- | helix-core/src/selection.rs | 109 | ||||
-rw-r--r-- | helix-core/src/shellwords.rs | 164 | ||||
-rw-r--r-- | helix-core/src/surround.rs | 1 | ||||
-rw-r--r-- | helix-core/src/syntax.rs | 1260 | ||||
-rw-r--r-- | helix-core/src/transaction.rs | 42 |
22 files changed, 2420 insertions, 931 deletions
diff --git a/helix-core/src/auto_pairs.rs b/helix-core/src/auto_pairs.rs index cc966852..f4359a34 100644 --- a/helix-core/src/auto_pairs.rs +++ b/helix-core/src/auto_pairs.rs @@ -1,7 +1,10 @@ //! When typing the opening character of one of the possible pairs defined below, //! this module provides the functionality to insert the paired closing character. -use crate::{Range, Rope, Selection, Tendril, Transaction}; +use crate::{ + graphemes, movement::Direction, Range, Rope, RopeGraphemes, Selection, Tendril, Transaction, +}; +use log::debug; use smallvec::SmallVec; // Heavily based on https://github.com/codemirror/closebrackets/ @@ -15,7 +18,9 @@ pub const PAIRS: &[(char, char)] = &[ ('`', '`'), ]; -const CLOSE_BEFORE: &str = ")]}'\":;> \n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}"; // includes space and newlines +// [TODO] build this dynamically in language config. see #992 +const OPEN_BEFORE: &str = "([{'\":;,> \n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}"; +const CLOSE_BEFORE: &str = ")]}'\":;,> \n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}"; // includes space and newlines // insert hook: // Fn(doc, selection, char) => Option<Transaction> @@ -25,14 +30,19 @@ const CLOSE_BEFORE: &str = ")]}'\":;> \n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{202 // // to simplify, maybe return Option<Transaction> and just reimplement the default -// TODO: delete implementation where it erases the whole bracket (|) -> | +// [TODO] +// * delete implementation where it erases the whole bracket (|) -> | +// * change to multi character pairs to handle cases like placing the cursor in the +// middle of triple quotes, and more exotic pairs like Jinja's {% %} #[must_use] pub fn hook(doc: &Rope, selection: &Selection, ch: char) -> Option<Transaction> { + debug!("autopairs hook selection: {:#?}", selection); + for &(open, close) in PAIRS { if open == ch { if open == close { - return handle_same(doc, selection, open); + return Some(handle_same(doc, selection, open, CLOSE_BEFORE, OPEN_BEFORE)); } else { return Some(handle_open(doc, selection, open, close, CLOSE_BEFORE)); } @@ -47,18 +57,145 @@ pub fn hook(doc: &Rope, selection: &Selection, ch: char) -> Option<Transaction> None } -// TODO: special handling for lifetimes in rust: if preceeded by & or < don't auto close ' -// for example "&'a mut", or "fn<'a>" - -fn next_char(doc: &Rope, pos: usize) -> Option<char> { - if pos >= doc.len_chars() { +fn prev_char(doc: &Rope, pos: usize) -> Option<char> { + if pos == 0 { return None; } - Some(doc.char(pos)) + + doc.get_char(pos - 1) +} + +fn is_single_grapheme(doc: &Rope, range: &Range) -> bool { + let mut graphemes = RopeGraphemes::new(doc.slice(range.from()..range.to())); + let first = graphemes.next(); + let second = graphemes.next(); + debug!("first: {:#?}, second: {:#?}", first, second); + first.is_some() && second.is_none() +} + +/// calculate what the resulting range should be for an auto pair insertion +fn get_next_range( + doc: &Rope, + start_range: &Range, + offset: usize, + typed_char: char, + len_inserted: usize, +) -> Range { + // When the character under the cursor changes due to complete pair + // insertion, we must look backward a grapheme and then add the length + // of the insertion to put the resulting cursor in the right place, e.g. + // + // foo[\r\n] - anchor: 3, head: 5 + // foo([)]\r\n - anchor: 4, head: 5 + // + // foo[\r\n] - anchor: 3, head: 5 + // foo'[\r\n] - anchor: 4, head: 6 + // + // foo([)]\r\n - anchor: 4, head: 5 + // foo()[\r\n] - anchor: 5, head: 7 + // + // [foo]\r\n - anchor: 0, head: 3 + // [foo(])\r\n - anchor: 0, head: 5 + + // inserting at the very end of the document after the last newline + if start_range.head == doc.len_chars() && start_range.anchor == doc.len_chars() { + return Range::new( + start_range.anchor + offset + typed_char.len_utf8(), + start_range.head + offset + typed_char.len_utf8(), + ); + } + + let single_grapheme = is_single_grapheme(doc, start_range); + let doc_slice = doc.slice(..); + + // just skip over graphemes + if len_inserted == 0 { + let end_anchor = if single_grapheme { + graphemes::next_grapheme_boundary(doc_slice, start_range.anchor) + offset + + // even for backward inserts with multiple grapheme selections, + // we want the anchor to stay where it is so that the relative + // selection does not change, e.g.: + // + // foo([) wor]d -> insert ) -> foo()[ wor]d + } else { + start_range.anchor + offset + }; + + return Range::new( + end_anchor, + graphemes::next_grapheme_boundary(doc_slice, start_range.head) + offset, + ); + } + + // trivial case: only inserted a single-char opener, just move the selection + if len_inserted == 1 { + let end_anchor = if single_grapheme || start_range.direction() == Direction::Backward { + start_range.anchor + offset + typed_char.len_utf8() + } else { + start_range.anchor + offset + }; + + return Range::new( + end_anchor, + start_range.head + offset + typed_char.len_utf8(), + ); + } + + // If the head = 0, then we must be in insert mode with a backward + // cursor, which implies the head will just move + let end_head = if start_range.head == 0 || start_range.direction() == Direction::Backward { + start_range.head + offset + typed_char.len_utf8() + } else { + // We must have a forward cursor, which means we must move to the + // other end of the grapheme to get to where the new characters + // are inserted, then move the head to where it should be + let prev_bound = graphemes::prev_grapheme_boundary(doc_slice, start_range.head); + debug!( + "prev_bound: {}, offset: {}, len_inserted: {}", + prev_bound, offset, len_inserted + ); + prev_bound + offset + len_inserted + }; + + let end_anchor = match (start_range.len(), start_range.direction()) { + // if we have a zero width cursor, it shifts to the same number + (0, _) => end_head, + + // If we are inserting for a regular one-width cursor, the anchor + // moves with the head. This is the fast path for ASCII. + (1, Direction::Forward) => end_head - 1, + (1, Direction::Backward) => end_head + 1, + + (_, Direction::Forward) => { + if single_grapheme { + graphemes::prev_grapheme_boundary(doc.slice(..), start_range.head) + + typed_char.len_utf8() + + // if we are appending, the anchor stays where it is; only offset + // for multiple range insertions + } else { + start_range.anchor + offset + } + } + + (_, Direction::Backward) => { + if single_grapheme { + // if we're backward, then the head is at the first char + // of the typed char, so we need to add the length of + // the closing char + graphemes::prev_grapheme_boundary(doc.slice(..), start_range.anchor) + len_inserted + } else { + // when we are inserting in front of a selection, we need to move + // the anchor over by however many characters were inserted overall + start_range.anchor + offset + len_inserted + } + } + }; + + Range::new(end_anchor, end_head) } -// TODO: selections should be extended if range, moved if point. -// TODO: if not cursor but selection, wrap on both sides of selection (surround) fn handle_open( doc: &Rope, selection: &Selection, @@ -66,98 +203,584 @@ fn handle_open( close: char, close_before: &str, ) -> Transaction { - let mut ranges = SmallVec::with_capacity(selection.len()); - + let mut end_ranges = SmallVec::with_capacity(selection.len()); let mut offs = 0; - let transaction = Transaction::change_by_selection(doc, selection, |range| { - let pos = range.head; - let next = next_char(doc, pos); - - let head = pos + offs + open.len_utf8(); - // if selection, retain anchor, if cursor, move over - ranges.push(Range::new( - if range.is_empty() { - head - } else { - range.anchor + offs - }, - head, - )); + let transaction = Transaction::change_by_selection(doc, selection, |start_range| { + let cursor = start_range.cursor(doc.slice(..)); + let next_char = doc.get_char(cursor); + let len_inserted; - match next { + let change = match next_char { Some(ch) if !close_before.contains(ch) => { - offs += 1; - // TODO: else return (use default handler that inserts open) - (pos, pos, Some(Tendril::from_char(open))) + len_inserted = open.len_utf8(); + let mut tendril = Tendril::new(); + tendril.push(open); + (cursor, cursor, Some(tendril)) } // None | Some(ch) if close_before.contains(ch) => {} _ => { // insert open & close - let mut pair = Tendril::with_capacity(2); - pair.push_char(open); - pair.push_char(close); + let pair = Tendril::from_iter([open, close]); + len_inserted = open.len_utf8() + close.len_utf8(); + (cursor, cursor, Some(pair)) + } + }; - offs += 2; + let next_range = get_next_range(doc, start_range, offs, open, len_inserted); + end_ranges.push(next_range); + offs += len_inserted; - (pos, pos, Some(pair)) - } - } + change }); - transaction.with_selection(Selection::new(ranges, selection.primary_index())) + let t = transaction.with_selection(Selection::new(end_ranges, selection.primary_index())); + debug!("auto pair transaction: {:#?}", t); + t } fn handle_close(doc: &Rope, selection: &Selection, _open: char, close: char) -> Transaction { - let mut ranges = SmallVec::with_capacity(selection.len()); + let mut end_ranges = SmallVec::with_capacity(selection.len()); let mut offs = 0; - let transaction = Transaction::change_by_selection(doc, selection, |range| { - let pos = range.head; - let next = next_char(doc, pos); + let transaction = Transaction::change_by_selection(doc, selection, |start_range| { + let cursor = start_range.cursor(doc.slice(..)); + let next_char = doc.get_char(cursor); + let mut len_inserted = 0; - let head = pos + offs + close.len_utf8(); - // if selection, retain anchor, if cursor, move over - ranges.push(Range::new( - if range.is_empty() { - head - } else { - range.anchor + offs - }, - head, - )); + let change = if next_char == Some(close) { + // return transaction that moves past close + (cursor, cursor, None) // no-op + } else { + len_inserted += close.len_utf8(); + let mut tendril = Tendril::new(); + tendril.push(close); + (cursor, cursor, Some(tendril)) + }; + + let next_range = get_next_range(doc, start_range, offs, close, len_inserted); + end_ranges.push(next_range); + offs += len_inserted; + + change + }); + + let t = transaction.with_selection(Selection::new(end_ranges, selection.primary_index())); + debug!("auto pair transaction: {:#?}", t); + t +} + +/// handle cases where open and close is the same, or in triples ("""docstring""") +fn handle_same( + doc: &Rope, + selection: &Selection, + token: char, + close_before: &str, + open_before: &str, +) -> Transaction { + let mut end_ranges = SmallVec::with_capacity(selection.len()); + + let mut offs = 0; - if next == Some(close) { + let transaction = Transaction::change_by_selection(doc, selection, |start_range| { + let cursor = start_range.cursor(doc.slice(..)); + let mut len_inserted = 0; + + let next_char = doc.get_char(cursor); + let prev_char = prev_char(doc, cursor); + + let change = if next_char == Some(token) { // return transaction that moves past close - (pos, pos, None) // no-op + (cursor, cursor, None) // no-op } else { - offs += close.len_utf8(); + let mut pair = Tendril::new(); + pair.push(token); - // TODO: else return (use default handler that inserts close) - (pos, pos, Some(Tendril::from_char(close))) - } + // for equal pairs, don't insert both open and close if either + // side has a non-pair char + if (next_char.is_none() || close_before.contains(next_char.unwrap())) + && (prev_char.is_none() || open_before.contains(prev_char.unwrap())) + { + pair.push(token); + } + + len_inserted += pair.len(); + (cursor, cursor, Some(pair)) + }; + + let next_range = get_next_range(doc, start_range, offs, token, len_inserted); + end_ranges.push(next_range); + offs += len_inserted; + + change }); - transaction.with_selection(Selection::new(ranges, selection.primary_index())) + let t = transaction.with_selection(Selection::new(end_ranges, selection.primary_index())); + debug!("auto pair transaction: {:#?}", t); + t } -// handle cases where open and close is the same, or in triples ("""docstring""") -fn handle_same(_doc: &Rope, _selection: &Selection, _token: char) -> Option<Transaction> { - // if not cursor but selection, wrap - // let next = next char - - // if next == bracket { - // // if start of syntax node, insert token twice (new pair because node is complete) - // // elseif colsedBracketAt - // // is_triple == allow triple && next 3 is equal - // // cursor jump over - // } - //} else if allow_triple && followed by triple { - //} - //} else if next != word char && prev != bracket && prev != word char { - // // condition checks for cases like I' where you don't want I'' (or I'm) - // insert pair ("") - //} - None +#[cfg(test)] +mod test { + use super::*; + use smallvec::smallvec; + + const LINE_END: &str = crate::DEFAULT_LINE_ENDING.as_str(); + + fn differing_pairs() -> impl Iterator<Item = &'static (char, char)> { + PAIRS.iter().filter(|(open, close)| open != close) + } + + fn matching_pairs() -> impl Iterator<Item = &'static (char, char)> { + PAIRS.iter().filter(|(open, close)| open == close) + } + + fn test_hooks( + in_doc: &Rope, + in_sel: &Selection, + ch: char, + expected_doc: &Rope, + expected_sel: &Selection, + ) { + let trans = hook(in_doc, in_sel, ch).unwrap(); + let mut actual_doc = in_doc.clone(); + assert!(trans.apply(&mut actual_doc)); + assert_eq!(expected_doc, &actual_doc); + assert_eq!(expected_sel, trans.selection().unwrap()); + } + + fn test_hooks_with_pairs<I, F, R>( + in_doc: &Rope, + in_sel: &Selection, + pairs: I, + get_expected_doc: F, + actual_sel: &Selection, + ) where + I: IntoIterator<Item = &'static (char, char)>, + F: Fn(char, char) -> R, + R: Into<Rope>, + Rope: From<R>, + { + pairs.into_iter().for_each(|(open, close)| { + test_hooks( + in_doc, + in_sel, + *open, + &Rope::from(get_expected_doc(*open, *close)), + actual_sel, + ) + }); + } + + // [] indicates range + + /// [] -> insert ( -> ([]) + #[test] + fn test_insert_blank() { + test_hooks_with_pairs( + &Rope::from(LINE_END), + &Selection::single(1, 0), + PAIRS, + |open, close| format!("{}{}{}", open, close, LINE_END), + &Selection::single(2, 1), + ); + + let empty_doc = Rope::from(format!("{line_end}{line_end}", line_end = LINE_END)); + + test_hooks_with_pairs( + &empty_doc, + &Selection::single(empty_doc.len_chars(), LINE_END.len()), + PAIRS, + |open, close| { + format!( + "{line_end}{open}{close}{line_end}", + open = open, + close = close, + line_end = LINE_END + ) + }, + &Selection::single(LINE_END.len() + 2, LINE_END.len() + 1), + ); + } + + #[test] + fn test_insert_before_multi_code_point_graphemes() { + test_hooks_with_pairs( + &Rope::from(format!("hello π¨βπ©βπ§βπ¦ goodbye{}", LINE_END)), + &Selection::single(13, 6), + PAIRS, + |open, _| format!("hello {}π¨βπ©βπ§βπ¦ goodbye{}", open, LINE_END), + &Selection::single(14, 7), + ); + } + + #[test] + fn test_insert_at_end_of_document() { + test_hooks_with_pairs( + &Rope::from(LINE_END), + &Selection::single(LINE_END.len(), LINE_END.len()), + PAIRS, + |open, close| format!("{}{}{}", LINE_END, open, close), + &Selection::single(LINE_END.len() + 1, LINE_END.len() + 1), + ); + + test_hooks_with_pairs( + &Rope::from(format!("foo{}", LINE_END)), + &Selection::single(3 + LINE_END.len(), 3 + LINE_END.len()), + PAIRS, + |open, close| format!("foo{}{}{}", LINE_END, open, close), + &Selection::single(LINE_END.len() + 4, LINE_END.len() + 4), + ); + } + + /// [] -> append ( -> ([]) + #[test] + fn test_append_blank() { + test_hooks_with_pairs( + // this is what happens when you have a totally blank document and then append + &Rope::from(format!("{line_end}{line_end}", line_end = LINE_END)), + // before inserting the pair, the cursor covers all of both empty lines + &Selection::single(0, LINE_END.len() * 2), + PAIRS, + |open, close| { + format!( + "{line_end}{open}{close}{line_end}", + line_end = LINE_END, + open = open, + close = close + ) + }, + // after inserting pair, the cursor covers the first new line and the open char + &Selection::single(0, LINE_END.len() + 2), + ); + } + + /// [] ([]) + /// [] -> insert -> ([]) + /// [] ([]) + #[test] + fn test_insert_blank_multi_cursor() { + test_hooks_with_pairs( + &Rope::from("\n\n\n"), + &Selection::new( + smallvec!(Range::new(1, 0), Range::new(2, 1), Range::new(3, 2),), + 0, + ), + PAIRS, + |open, close| { + format!( + "{open}{close}\n{open}{close}\n{open}{close}\n", + open = open, + close = close + ) + }, + &Selection::new( + smallvec!(Range::new(2, 1), Range::new(5, 4), Range::new(8, 7),), + 0, + ), + ); + } + + /// fo[o] -> append ( -> fo[o(]) + #[test] + fn test_append() { + test_hooks_with_pairs( + &Rope::from("foo\n"), + &Selection::single(2, 4), + differing_pairs(), + |open, close| format!("foo{}{}\n", open, close), + &Selection::single(2, 5), + ); + } + + /// foo[] -> append to end of line ( -> foo([]) + #[test] + fn test_append_single_cursor() { + test_hooks_with_pairs( + &Rope::from(format!("foo{}", LINE_END)), + &Selection::single(3, 3 + LINE_END.len()), + differing_pairs(), + |open, close| format!("foo{}{}{}", open, close, LINE_END), + &Selection::single(4, 5), + ); + } + + /// fo[o] fo[o(]) + /// fo[o] -> append ( -> fo[o(]) + /// fo[o] fo[o(]) + #[test] + fn test_append_multi() { + test_hooks_with_pairs( + &Rope::from("foo\nfoo\nfoo\n"), + &Selection::new( + smallvec!(Range::new(2, 4), Range::new(6, 8), Range::new(10, 12)), + 0, + ), + differing_pairs(), + |open, close| { + format!( + "foo{open}{close}\nfoo{open}{close}\nfoo{open}{close}\n", + open = open, + close = close + ) + }, + &Selection::new( + smallvec!(Range::new(2, 5), Range::new(8, 11), Range::new(14, 17)), + 0, + ), + ); + } + + /// ([)] -> insert ) -> ()[] + #[test] + fn test_insert_close_inside_pair() { + for (open, close) in PAIRS { + let doc = Rope::from(format!("{}{}{}", open, close, LINE_END)); + + test_hooks( + &doc, + &Selection::single(2, 1), + *close, + &doc, + &Selection::single(2 + LINE_END.len(), 2), + ); + } + } + + /// [(]) -> append ) -> [()] + #[test] + fn test_append_close_inside_pair() { + for (open, close) in PAIRS { + let doc = Rope::from(format!("{}{}{}", open, close, LINE_END)); + + test_hooks( + &doc, + &Selection::single(0, 2), + *close, + &doc, + &Selection::single(0, 2 + LINE_END.len()), + ); + } + } + + /// ([]) ()[] + /// ([]) -> insert ) -> ()[] + /// ([]) ()[] + #[test] + fn test_insert_close_inside_pair_multi_cursor() { + let sel = Selection::new( + smallvec!(Range::new(2, 1), Range::new(5, 4), Range::new(8, 7),), + 0, + ); + + let expected_sel = Selection::new( + smallvec!(Range::new(3, 2), Range::new(6, 5), Range::new(9, 8),), + 0, + ); + + for (open, close) in PAIRS { + let doc = Rope::from(format!( + "{open}{close}\n{open}{close}\n{open}{close}\n", + open = open, + close = close + )); + + test_hooks(&doc, &sel, *close, &doc, &expected_sel); + } + } + + /// [(]) [()] + /// [(]) -> append ) -> [()] + /// [(]) [()] + #[test] + fn test_append_close_inside_pair_multi_cursor() { + let sel = Selection::new( + smallvec!(Range::new(0, 2), Range::new(3, 5), Range::new(6, 8),), + 0, + ); + + let expected_sel = Selection::new( + smallvec!(Range::new(0, 3), Range::new(3, 6), Range::new(6, 9),), + 0, + ); + + for (open, close) in PAIRS { + let doc = Rope::from(format!( + "{open}{close}\n{open}{close}\n{open}{close}\n", + open = open, + close = close + )); + + test_hooks(&doc, &sel, *close, &doc, &expected_sel); + } + } + + /// ([]) -> insert ( -> (([])) + #[test] + fn test_insert_open_inside_pair() { + let sel = Selection::single(2, 1); + let expected_sel = Selection::single(3, 2); + + for (open, close) in differing_pairs() { + let doc = Rope::from(format!("{}{}", open, close)); + let expected_doc = Rope::from(format!( + "{open}{open}{close}{close}", + open = open, + close = close + )); + + test_hooks(&doc, &sel, *open, &expected_doc, &expected_sel); + } + } + + /// [word(]) -> append ( -> [word((])) + #[test] + fn test_append_open_inside_pair() { + let sel = Selection::single(0, 6); + let expected_sel = Selection::single(0, 7); + + for (open, close) in differing_pairs() { + let doc = Rope::from(format!("word{}{}", open, close)); + let expected_doc = Rope::from(format!( + "word{open}{open}{close}{close}", + open = open, + close = close + )); + + test_hooks(&doc, &sel, *open, &expected_doc, &expected_sel); + } + } + + /// ([]) -> insert " -> ("[]") + #[test] + fn test_insert_nested_open_inside_pair() { + let sel = Selection::single(2, 1); + let expected_sel = Selection::single(3, 2); + + for (outer_open, outer_close) in differing_pairs() { + let doc = Rope::from(format!("{}{}", outer_open, outer_close,)); + + for (inner_open, inner_close) in matching_pairs() { + let expected_doc = Rope::from(format!( + "{}{}{}{}", + outer_open, inner_open, inner_close, outer_close + )); + + test_hooks(&doc, &sel, *inner_open, &expected_doc, &expected_sel); + } + } + } + + /// [(]) -> append " -> [("]") + #[test] + fn test_append_nested_open_inside_pair() { + let sel = Selection::single(0, 2); + let expected_sel = Selection::single(0, 3); + + for (outer_open, outer_close) in differing_pairs() { + let doc = Rope::from(format!("{}{}", outer_open, outer_close,)); + + for (inner_open, inner_close) in matching_pairs() { + let expected_doc = Rope::from(format!( + "{}{}{}{}", + outer_open, inner_open, inner_close, outer_close + )); + + test_hooks(&doc, &sel, *inner_open, &expected_doc, &expected_sel); + } + } + } + + /// []word -> insert ( -> ([]word + #[test] + fn test_insert_open_before_non_pair() { + test_hooks_with_pairs( + &Rope::from("word"), + &Selection::single(1, 0), + PAIRS, + |open, _| format!("{}word", open), + &Selection::single(2, 1), + ) + } + + /// [wor]d -> insert ( -> ([wor]d + #[test] + fn test_insert_open_with_selection() { + test_hooks_with_pairs( + &Rope::from("word"), + &Selection::single(3, 0), + PAIRS, + |open, _| format!("{}word", open), + &Selection::single(4, 1), + ) + } + + /// [wor]d -> append ) -> [wor)]d + #[test] + fn test_append_close_inside_non_pair_with_selection() { + let sel = Selection::single(0, 4); + let expected_sel = Selection::single(0, 5); + + for (_, close) in PAIRS { + let doc = Rope::from("word"); + let expected_doc = Rope::from(format!("wor{}d", close)); + test_hooks(&doc, &sel, *close, &expected_doc, &expected_sel); + } + } + + /// foo[ wor]d -> insert ( -> foo([) wor]d + #[test] + fn test_insert_open_trailing_word_with_selection() { + test_hooks_with_pairs( + &Rope::from("foo word"), + &Selection::single(7, 3), + differing_pairs(), + |open, close| format!("foo{}{} word", open, close), + &Selection::single(9, 4), + ) + } + + /// foo([) wor]d -> insert ) -> foo()[ wor]d + #[test] + fn test_insert_close_inside_pair_trailing_word_with_selection() { + for (open, close) in differing_pairs() { + test_hooks( + &Rope::from(format!("foo{}{} word{}", open, close, LINE_END)), + &Selection::single(9, 4), + *close, + &Rope::from(format!("foo{}{} word{}", open, close, LINE_END)), + &Selection::single(9, 5), + ) + } + } + + /// we want pairs that are *not* the same char to be inserted after + /// a non-pair char, for cases like functions, but for pairs that are + /// the same char, we want to *not* insert a pair to handle cases like "I'm" + /// + /// word[] -> insert ( -> word([]) + /// word[] -> insert ' -> word'[] + #[test] + fn test_insert_open_after_non_pair() { + let doc = Rope::from(format!("word{}", LINE_END)); + let sel = Selection::single(5, 4); + let expected_sel = Selection::single(6, 5); + + test_hooks_with_pairs( + &doc, + &sel, + differing_pairs(), + |open, close| format!("word{}{}{}", open, close, LINE_END), + &expected_sel, + ); + + test_hooks_with_pairs( + &doc, + &sel, + matching_pairs(), + |open, _| format!("word{}{}", open, LINE_END), + &expected_sel, + ); + } } diff --git a/helix-core/src/chars.rs b/helix-core/src/chars.rs index c8e5efbd..54991574 100644 --- a/helix-core/src/chars.rs +++ b/helix-core/src/chars.rs @@ -91,12 +91,11 @@ mod test { #[test] fn test_categorize() { - const EOL_TEST_CASE: &'static str = "\n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}"; - const WORD_TEST_CASE: &'static str = - "_hello_world_γγγγγγΌ1234567890οΌοΌοΌοΌοΌοΌοΌοΌοΌοΌ"; - const PUNCTUATION_TEST_CASE: &'static str = + const EOL_TEST_CASE: &str = "\n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}"; + const WORD_TEST_CASE: &str = "_hello_world_γγγγγγΌ1234567890οΌοΌοΌοΌοΌοΌοΌοΌοΌοΌ"; + const PUNCTUATION_TEST_CASE: &str = "!\"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~οΌβοΌοΌοΌ
οΌβοΌοΌοΌοΌγγοΌοΌοΌοΌοΌοΌοΌ γγοΌΎο½ο½ο½ο½ο½"; - const WHITESPACE_TEST_CASE: &'static str = "α γβ―ββ"; + const WHITESPACE_TEST_CASE: &str = "α γβ―ββ"; for ch in EOL_TEST_CASE.chars() { assert_eq!(CharCategory::Eol, categorize_char(ch)); diff --git a/helix-core/src/diagnostic.rs b/helix-core/src/diagnostic.rs index 4fcf51c9..210ad639 100644 --- a/helix-core/src/diagnostic.rs +++ b/helix-core/src/diagnostic.rs @@ -1,12 +1,19 @@ //! LSP diagnostic utility types. +use serde::{Deserialize, Serialize}; /// Describes the severity level of a [`Diagnostic`]. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Deserialize, Serialize)] pub enum Severity { - Error, - Warning, - Info, Hint, + Info, + Warning, + Error, +} + +impl Default for Severity { + fn default() -> Self { + Self::Hint + } } /// A range of `char`s within the text. diff --git a/helix-core/src/diff.rs b/helix-core/src/diff.rs index a83db333..6960c679 100644 --- a/helix-core/src/diff.rs +++ b/helix-core/src/diff.rs @@ -11,10 +11,6 @@ pub fn compare_ropes(old: &Rope, new: &Rope) -> Transaction { // A timeout is set so after 1 seconds, the algorithm will start // approximating. This is especially important for big `Rope`s or // `Rope`s that are extremely dissimilar to each other. - // - // Note: Ignore the clippy warning, as the trait bounds of - // `Transaction::change()` require an iterator implementing - // `ExactIterator`. let mut config = similar::TextDiff::configure(); config.timeout(std::time::Duration::from_secs(1)); @@ -62,7 +58,7 @@ mod tests { let mut old = Rope::from(a); let new = Rope::from(b); compare_ropes(&old, &new).apply(&mut old); - old.to_string() == new.to_string() + old == new } } } diff --git a/helix-core/src/graphemes.rs b/helix-core/src/graphemes.rs index c6398875..aa898684 100644 --- a/helix-core/src/graphemes.rs +++ b/helix-core/src/graphemes.rs @@ -120,6 +120,43 @@ pub fn nth_next_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) - chunk_char_idx + tmp } +#[must_use] +pub fn nth_next_grapheme_boundary_byte(slice: RopeSlice, mut byte_idx: usize, n: usize) -> usize { + // Bounds check + debug_assert!(byte_idx <= slice.len_bytes()); + + // Get the chunk with our byte index in it. + let (mut chunk, mut chunk_byte_idx, mut _chunk_char_idx, _) = slice.chunk_at_byte(byte_idx); + + // Set up the grapheme cursor. + let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true); + + // Find the nth next grapheme cluster boundary. + for _ in 0..n { + loop { + match gc.next_boundary(chunk, chunk_byte_idx) { + Ok(None) => return slice.len_bytes(), + Ok(Some(n)) => { + byte_idx = n; + break; + } + Err(GraphemeIncomplete::NextChunk) => { + chunk_byte_idx += chunk.len(); + let (a, _, _c, _) = slice.chunk_at_byte(chunk_byte_idx); + chunk = a; + // chunk_char_idx = c; + } + Err(GraphemeIncomplete::PreContext(n)) => { + let ctx_chunk = slice.chunk_at_byte(n - 1).0; + gc.provide_context(ctx_chunk, n - ctx_chunk.len()); + } + _ => unreachable!(), + } + } + } + byte_idx +} + /// Finds the next grapheme boundary after the given char position. #[must_use] #[inline(always)] @@ -127,6 +164,13 @@ pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize { nth_next_grapheme_boundary(slice, char_idx, 1) } +/// Finds the next grapheme boundary after the given byte position. +#[must_use] +#[inline(always)] +pub fn next_grapheme_boundary_byte(slice: RopeSlice, byte_idx: usize) -> usize { + nth_next_grapheme_boundary_byte(slice, byte_idx, 1) +} + /// Returns the passed char index if it's already a grapheme boundary, /// or the next grapheme boundary char index if not. #[must_use] @@ -151,6 +195,23 @@ pub fn ensure_grapheme_boundary_prev(slice: RopeSlice, char_idx: usize) -> usize } } +/// Returns the passed byte index if it's already a grapheme boundary, +/// or the next grapheme boundary byte index if not. +#[must_use] +#[inline] +pub fn ensure_grapheme_boundary_next_byte(slice: RopeSlice, byte_idx: usize) -> usize { + if byte_idx == 0 { + byte_idx + } else { + // TODO: optimize so we're not constructing grapheme cursor twice + if is_grapheme_boundary_byte(slice, byte_idx) { + byte_idx + } else { + next_grapheme_boundary_byte(slice, byte_idx) + } + } +} + /// Returns whether the given char position is a grapheme boundary. #[must_use] pub fn is_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> bool { @@ -179,6 +240,31 @@ pub fn is_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> bool { } } +/// Returns whether the given byte position is a grapheme boundary. +#[must_use] +pub fn is_grapheme_boundary_byte(slice: RopeSlice, byte_idx: usize) -> bool { + // Bounds check + debug_assert!(byte_idx <= slice.len_bytes()); + + // Get the chunk with our byte index in it. + let (chunk, chunk_byte_idx, _, _) = slice.chunk_at_byte(byte_idx); + + // Set up the grapheme cursor. + let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true); + + // Determine if the given position is a grapheme cluster boundary. + loop { + match gc.is_boundary(chunk, chunk_byte_idx) { + Ok(n) => return n, + Err(GraphemeIncomplete::PreContext(n)) => { + let (ctx_chunk, ctx_byte_start, _, _) = slice.chunk_at_byte(n - 1); + gc.provide_context(ctx_chunk, ctx_byte_start); + } + Err(_) => unreachable!(), + } + } +} + /// An iterator over the graphemes of a `RopeSlice`. #[derive(Clone)] pub struct RopeGraphemes<'a> { diff --git a/helix-core/src/history.rs b/helix-core/src/history.rs index 4b1c8d3b..bb95213c 100644 --- a/helix-core/src/history.rs +++ b/helix-core/src/history.rs @@ -448,8 +448,8 @@ mod test { change: crate::transaction::Change, instant: Instant, ) { - let txn = Transaction::change(&state.doc, vec![change.clone()].into_iter()); - history.commit_revision_at_timestamp(&txn, &state, instant); + let txn = Transaction::change(&state.doc, vec![change].into_iter()); + history.commit_revision_at_timestamp(&txn, state, instant); txn.apply(&mut state.doc); } diff --git a/helix-core/src/increment/date_time.rs b/helix-core/src/increment/date_time.rs new file mode 100644 index 00000000..91fa5963 --- /dev/null +++ b/helix-core/src/increment/date_time.rs @@ -0,0 +1,490 @@ +use chrono::{Datelike, Duration, NaiveDate, NaiveDateTime, NaiveTime, Timelike}; +use once_cell::sync::Lazy; +use regex::Regex; +use ropey::RopeSlice; + +use std::borrow::Cow; +use std::cmp; + +use super::Increment; +use crate::{Range, Tendril}; + +#[derive(Debug, PartialEq, Eq)] +pub struct DateTimeIncrementor { + date_time: NaiveDateTime, + range: Range, + fmt: &'static str, + field: DateField, +} + +impl DateTimeIncrementor { + pub fn from_range(text: RopeSlice, range: Range) -> Option<DateTimeIncrementor> { + let range = if range.is_empty() { + if range.anchor < text.len_chars() { + // Treat empty range as a cursor range. + range.put_cursor(text, range.anchor + 1, true) + } else { + // The range is empty and at the end of the text. + return None; + } + } else { + range + }; + + FORMATS.iter().find_map(|format| { + let from = range.from().saturating_sub(format.max_len); + let to = (range.from() + format.max_len).min(text.len_chars()); + + let (from_in_text, to_in_text) = (range.from() - from, range.to() - from); + let text: Cow<str> = text.slice(from..to).into(); + + let captures = format.regex.captures(&text)?; + if captures.len() - 1 != format.fields.len() { + return None; + } + + let date_time = captures.get(0)?; + let offset = range.from() - from_in_text; + let range = Range::new(date_time.start() + offset, date_time.end() + offset); + + let field = captures + .iter() + .skip(1) + .enumerate() + .find_map(|(i, capture)| { + let capture = capture?; + let capture_range = capture.range(); + + if capture_range.contains(&from_in_text) + && capture_range.contains(&(to_in_text - 1)) + { + Some(format.fields[i]) + } else { + None + } + })?; + + let has_date = format.fields.iter().any(|f| f.unit.is_date()); + let has_time = format.fields.iter().any(|f| f.unit.is_time()); + + let date_time = &text[date_time.start()..date_time.end()]; + let date_time = match (has_date, has_time) { + (true, true) => NaiveDateTime::parse_from_str(date_time, format.fmt).ok()?, + (true, false) => { + let date = NaiveDate::parse_from_str(date_time, format.fmt).ok()?; + + date.and_hms(0, 0, 0) + } + (false, true) => { + let time = NaiveTime::parse_from_str(date_time, format.fmt).ok()?; + + NaiveDate::from_ymd(0, 1, 1).and_time(time) + } + (false, false) => return None, + }; + + Some(DateTimeIncrementor { + date_time, + range, + fmt: format.fmt, + field, + }) + }) + } +} + +impl Increment for DateTimeIncrementor { + fn increment(&self, amount: i64) -> (Range, Tendril) { + let date_time = match self.field.unit { + DateUnit::Years => add_years(self.date_time, amount), + DateUnit::Months => add_months(self.date_time, amount), + DateUnit::Days => add_duration(self.date_time, Duration::days(amount)), + DateUnit::Hours => add_duration(self.date_time, Duration::hours(amount)), + DateUnit::Minutes => add_duration(self.date_time, Duration::minutes(amount)), + DateUnit::Seconds => add_duration(self.date_time, Duration::seconds(amount)), + DateUnit::AmPm => toggle_am_pm(self.date_time), + } + .unwrap_or(self.date_time); + + (self.range, date_time.format(self.fmt).to_string().into()) + } +} + +static FORMATS: Lazy<Vec<Format>> = Lazy::new(|| { + vec![ + Format::new("%Y-%m-%d %H:%M:%S"), // 2021-11-24 07:12:23 + Format::new("%Y/%m/%d %H:%M:%S"), // 2021/11/24 07:12:23 + Format::new("%Y-%m-%d %H:%M"), // 2021-11-24 07:12 + Format::new("%Y/%m/%d %H:%M"), // 2021/11/24 07:12 + Format::new("%Y-%m-%d"), // 2021-11-24 + Format::new("%Y/%m/%d"), // 2021/11/24 + Format::new("%a %b %d %Y"), // Wed Nov 24 2021 + Format::new("%d-%b-%Y"), // 24-Nov-2021 + Format::new("%Y %b %d"), // 2021 Nov 24 + Format::new("%b %d, %Y"), // Nov 24, 2021 + Format::new("%-I:%M:%S %P"), // 7:21:53 am + Format::new("%-I:%M %P"), // 7:21 am + Format::new("%-I:%M:%S %p"), // 7:21:53 AM + Format::new("%-I:%M %p"), // 7:21 AM + Format::new("%H:%M:%S"), // 23:24:23 + Format::new("%H:%M"), // 23:24 + ] +}); + +#[derive(Debug)] +struct Format { + fmt: &'static str, + fields: Vec<DateField>, + regex: Regex, + max_len: usize, +} + +impl Format { + fn new(fmt: &'static str) -> Self { + let mut remaining = fmt; + let mut fields = Vec::new(); + let mut regex = String::new(); + let mut max_len = 0; + + while let Some(i) = remaining.find('%') { + let after = &remaining[i + 1..]; + let mut chars = after.chars(); + let c = chars.next().unwrap(); + + let spec_len = if c == '-' { + 1 + chars.next().unwrap().len_utf8() + } else { + c.len_utf8() + }; + + let specifier = &after[..spec_len]; + let field = DateField::from_specifier(specifier).unwrap(); + fields.push(field); + max_len += field.max_len + remaining[..i].len(); + regex += &remaining[..i]; + regex += &format!("({})", field.regex); + remaining = &after[spec_len..]; + } + + let regex = Regex::new(®ex).unwrap(); + + Self { + fmt, + fields, + regex, + max_len, + } + } +} + +impl PartialEq for Format { + fn eq(&self, other: &Self) -> bool { + self.fmt == other.fmt && self.fields == other.fields && self.max_len == other.max_len + } +} + +impl Eq for Format {} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +struct DateField { + regex: &'static str, + unit: DateUnit, + max_len: usize, +} + +impl DateField { + fn from_specifier(specifier: &str) -> Option<Self> { + match specifier { + "Y" => Some(Self { + regex: r"\d{4}", + unit: DateUnit::Years, + max_len: 5, + }), + "y" => Some(Self { + regex: r"\d\d", + unit: DateUnit::Years, + max_len: 2, + }), + "m" => Some(Self { + regex: r"[0-1]\d", + unit: DateUnit::Months, + max_len: 2, + }), + "d" => Some(Self { + regex: r"[0-3]\d", + unit: DateUnit::Days, + max_len: 2, + }), + "-d" => Some(Self { + regex: r"[1-3]?\d", + unit: DateUnit::Days, + max_len: 2, + }), + "a" => Some(Self { + regex: r"Sun|Mon|Tue|Wed|Thu|Fri|Sat", + unit: DateUnit::Days, + max_len: 3, + }), + "A" => Some(Self { + regex: r"Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday", + unit: DateUnit::Days, + max_len: 9, + }), + "b" | "h" => Some(Self { + regex: r"Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec", + unit: DateUnit::Months, + max_len: 3, + }), + "B" => Some(Self { + regex: r"January|February|March|April|May|June|July|August|September|October|November|December", + unit: DateUnit::Months, + max_len: 9, + }), + "H" => Some(Self { + regex: r"[0-2]\d", + unit: DateUnit::Hours, + max_len: 2, + }), + "M" => Some(Self { + regex: r"[0-5]\d", + unit: DateUnit::Minutes, + max_len: 2, + }), + "S" => Some(Self { + regex: r"[0-5]\d", + unit: DateUnit::Seconds, + max_len: 2, + }), + "I" => Some(Self { + regex: r"[0-1]\d", + unit: DateUnit::Hours, + max_len: 2, + }), + "-I" => Some(Self { + regex: r"1?\d", + unit: DateUnit::Hours, + max_len: 2, + }), + "P" => Some(Self { + regex: r"am|pm", + unit: DateUnit::AmPm, + max_len: 2, + }), + "p" => Some(Self { + regex: r"AM|PM", + unit: DateUnit::AmPm, + max_len: 2, + }), + _ => None, + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum DateUnit { + Years, + Months, + Days, + Hours, + Minutes, + Seconds, + AmPm, +} + +impl DateUnit { + fn is_date(self) -> bool { + matches!(self, DateUnit::Years | DateUnit::Months | DateUnit::Days) + } + + fn is_time(self) -> bool { + matches!( + self, + DateUnit::Hours | DateUnit::Minutes | DateUnit::Seconds + ) + } +} + +fn ndays_in_month(year: i32, month: u32) -> u32 { + // The first day of the next month... + let (y, m) = if month == 12 { + (year + 1, 1) + } else { + (year, month + 1) + }; + let d = NaiveDate::from_ymd(y, m, 1); + + // ...is preceded by the last day of the original month. + d.pred().day() +} + +fn add_months(date_time: NaiveDateTime, amount: i64) -> Option<NaiveDateTime> { + let month = (date_time.month0() as i64).checked_add(amount)?; + let year = date_time.year() + i32::try_from(month / 12).ok()?; + let year = if month.is_negative() { year - 1 } else { year }; + + // Normalize month + let month = month % 12; + let month = if month.is_negative() { + month + 12 + } else { + month + } as u32 + + 1; + + let day = cmp::min(date_time.day(), ndays_in_month(year, month)); + + Some(NaiveDate::from_ymd(year, month, day).and_time(date_time.time())) +} + +fn add_years(date_time: NaiveDateTime, amount: i64) -> Option<NaiveDateTime> { + let year = i32::try_from((date_time.year() as i64).checked_add(amount)?).ok()?; + let ndays = ndays_in_month(year, date_time.month()); + + if date_time.day() > ndays { + let d = NaiveDate::from_ymd(year, date_time.month(), ndays); + Some(d.succ().and_time(date_time.time())) + } else { + date_time.with_year(year) + } +} + +fn add_duration(date_time: NaiveDateTime, duration: Duration) -> Option<NaiveDateTime> { + date_time.checked_add_signed(duration) +} + +fn toggle_am_pm(date_time: NaiveDateTime) -> Option<NaiveDateTime> { + if date_time.hour() < 12 { + add_duration(date_time, Duration::hours(12)) + } else { + add_duration(date_time, Duration::hours(-12)) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::Rope; + + #[test] + fn test_increment_date_times() { + let tests = [ + // (original, cursor, amount, expected) + ("2020-02-28", 0, 1, "2021-02-28"), + ("2020-02-29", 0, 1, "2021-03-01"), + ("2020-01-31", 5, 1, "2020-02-29"), + ("2020-01-20", 5, 1, "2020-02-20"), + ("2021-01-01", 5, -1, "2020-12-01"), + ("2021-01-31", 5, -2, "2020-11-30"), + ("2020-02-28", 8, 1, "2020-02-29"), + ("2021-02-28", 8, 1, "2021-03-01"), + ("2021-02-28", 0, -1, "2020-02-28"), + ("2021-03-01", 0, -1, "2020-03-01"), + ("2020-02-29", 5, -1, "2020-01-29"), + ("2020-02-20", 5, -1, "2020-01-20"), + ("2020-02-29", 8, -1, "2020-02-28"), + ("2021-03-01", 8, -1, "2021-02-28"), + ("1980/12/21", 8, 100, "1981/03/31"), + ("1980/12/21", 8, -100, "1980/09/12"), + ("1980/12/21", 8, 1000, "1983/09/17"), + ("1980/12/21", 8, -1000, "1978/03/27"), + ("2021-11-24 07:12:23", 0, 1, "2022-11-24 07:12:23"), + ("2021-11-24 07:12:23", 5, 1, "2021-12-24 07:12:23"), + ("2021-11-24 07:12:23", 8, 1, "2021-11-25 07:12:23"), + ("2021-11-24 07:12:23", 11, 1, "2021-11-24 08:12:23"), + ("2021-11-24 07:12:23", 14, 1, "2021-11-24 07:13:23"), + ("2021-11-24 07:12:23", 17, 1, "2021-11-24 07:12:24"), + ("2021/11/24 07:12:23", 0, 1, "2022/11/24 07:12:23"), + ("2021/11/24 07:12:23", 5, 1, "2021/12/24 07:12:23"), + ("2021/11/24 07:12:23", 8, 1, "2021/11/25 07:12:23"), + ("2021/11/24 07:12:23", 11, 1, "2021/11/24 08:12:23"), + ("2021/11/24 07:12:23", 14, 1, "2021/11/24 07:13:23"), + ("2021/11/24 07:12:23", 17, 1, "2021/11/24 07:12:24"), + ("2021-11-24 07:12", 0, 1, "2022-11-24 07:12"), + ("2021-11-24 07:12", 5, 1, "2021-12-24 07:12"), + ("2021-11-24 07:12", 8, 1, "2021-11-25 07:12"), + ("2021-11-24 07:12", 11, 1, "2021-11-24 08:12"), + ("2021-11-24 07:12", 14, 1, "2021-11-24 07:13"), + ("2021/11/24 07:12", 0, 1, "2022/11/24 07:12"), + ("2021/11/24 07:12", 5, 1, "2021/12/24 07:12"), + ("2021/11/24 07:12", 8, 1, "2021/11/25 07:12"), + ("2021/11/24 07:12", 11, 1, "2021/11/24 08:12"), + ("2021/11/24 07:12", 14, 1, "2021/11/24 07:13"), + ("Wed Nov 24 2021", 0, 1, "Thu Nov 25 2021"), + ("Wed Nov 24 2021", 4, 1, "Fri Dec 24 2021"), + ("Wed Nov 24 2021", 8, 1, "Thu Nov 25 2021"), + ("Wed Nov 24 2021", 11, 1, "Thu Nov 24 2022"), + ("24-Nov-2021", 0, 1, "25-Nov-2021"), + ("24-Nov-2021", 3, 1, "24-Dec-2021"), + ("24-Nov-2021", 7, 1, "24-Nov-2022"), + ("2021 Nov 24", 0, 1, "2022 Nov 24"), + ("2021 Nov 24", 5, 1, "2021 Dec 24"), + ("2021 Nov 24", 9, 1, "2021 Nov 25"), + ("Nov 24, 2021", 0, 1, "Dec 24, 2021"), + ("Nov 24, 2021", 4, 1, "Nov 25, 2021"), + ("Nov 24, 2021", 8, 1, "Nov 24, 2022"), + ("7:21:53 am", 0, 1, "8:21:53 am"), + ("7:21:53 am", 3, 1, "7:22:53 am"), + ("7:21:53 am", 5, 1, "7:21:54 am"), + ("7:21:53 am", 8, 1, "7:21:53 pm"), + ("7:21:53 AM", 0, 1, "8:21:53 AM"), + ("7:21:53 AM", 3, 1, "7:22:53 AM"), + ("7:21:53 AM", 5, 1, "7:21:54 AM"), + ("7:21:53 AM", 8, 1, "7:21:53 PM"), + ("7:21 am", 0, 1, "8:21 am"), + ("7:21 am", 3, 1, "7:22 am"), + ("7:21 am", 5, 1, "7:21 pm"), + ("7:21 AM", 0, 1, "8:21 AM"), + ("7:21 AM", 3, 1, "7:22 AM"), + ("7:21 AM", 5, 1, "7:21 PM"), + ("23:24:23", 1, 1, "00:24:23"), + ("23:24:23", 3, 1, "23:25:23"), + ("23:24:23", 6, 1, "23:24:24"), + ("23:24", 1, 1, "00:24"), + ("23:24", 3, 1, "23:25"), + ]; + + for (original, cursor, amount, expected) in tests { + let rope = Rope::from_str(original); + let range = Range::new(cursor, cursor + 1); + assert_eq!( + DateTimeIncrementor::from_range(rope.slice(..), range) + .unwrap() + .increment(amount) + .1, + Tendril::from(expected) + ); + } + } + + #[test] + fn test_invalid_date_times() { + let tests = [ + "0000-00-00", + "1980-2-21", + "1980-12-1", + "12345", + "2020-02-30", + "1999-12-32", + "19-12-32", + "1-2-3", + "0000/00/00", + "1980/2/21", + "1980/12/1", + "12345", + "2020/02/30", + "1999/12/32", + "19/12/32", + "1/2/3", + "123:456:789", + "11:61", + "2021-55-12 08:12:54", + ]; + + for invalid in tests { + let rope = Rope::from_str(invalid); + let range = Range::new(0, 1); + + assert_eq!(DateTimeIncrementor::from_range(rope.slice(..), range), None) + } + } +} diff --git a/helix-core/src/increment/mod.rs b/helix-core/src/increment/mod.rs new file mode 100644 index 00000000..f5945774 --- /dev/null +++ b/helix-core/src/increment/mod.rs @@ -0,0 +1,8 @@ +pub mod date_time; +pub mod number; + +use crate::{Range, Tendril}; + +pub trait Increment { + fn increment(&self, amount: i64) -> (Range, Tendril); +} diff --git a/helix-core/src/numbers.rs b/helix-core/src/increment/number.rs index e9f3c898..57171f67 100644 --- a/helix-core/src/numbers.rs +++ b/helix-core/src/increment/number.rs @@ -2,6 +2,8 @@ use std::borrow::Cow; use ropey::RopeSlice; +use super::Increment; + use crate::{ textobject::{textobject_word, TextObject}, Range, Tendril, @@ -9,9 +11,9 @@ use crate::{ #[derive(Debug, PartialEq, Eq)] pub struct NumberIncrementor<'a> { - pub range: Range, - pub value: i64, - pub radix: u32, + value: i64, + radix: u32, + range: Range, text: RopeSlice<'a>, } @@ -71,9 +73,10 @@ impl<'a> NumberIncrementor<'a> { text, }) } +} - /// Add `amount` to the number and return the formatted text. - pub fn incremented_text(&self, amount: i64) -> Tendril { +impl<'a> Increment for NumberIncrementor<'a> { + fn increment(&self, amount: i64) -> (Range, Tendril) { let old_text: Cow<str> = self.text.slice(self.range.from()..self.range.to()).into(); let old_length = old_text.len(); let new_value = self.value.wrapping_add(amount); @@ -144,7 +147,7 @@ impl<'a> NumberIncrementor<'a> { } } - new_text.into() + (self.range, new_text.into()) } } @@ -366,8 +369,9 @@ mod test { assert_eq!( NumberIncrementor::from_range(rope.slice(..), range) .unwrap() - .incremented_text(amount), - expected.into() + .increment(amount) + .1, + Tendril::from(expected) ); } } @@ -392,8 +396,9 @@ mod test { assert_eq!( NumberIncrementor::from_range(rope.slice(..), range) .unwrap() - .incremented_text(amount), - expected.into() + .increment(amount) + .1, + Tendril::from(expected) ); } } @@ -419,8 +424,9 @@ mod test { assert_eq!( NumberIncrementor::from_range(rope.slice(..), range) .unwrap() - .incremented_text(amount), - expected.into() + .increment(amount) + .1, + Tendril::from(expected) ); } } @@ -464,8 +470,9 @@ mod test { assert_eq!( NumberIncrementor::from_range(rope.slice(..), range) .unwrap() - .incremented_text(amount), - expected.into() + .increment(amount) + .1, + Tendril::from(expected) ); } } @@ -491,8 +498,9 @@ mod test { assert_eq!( NumberIncrementor::from_range(rope.slice(..), range) .unwrap() - .incremented_text(amount), - expected.into() + .increment(amount) + .1, + Tendril::from(expected) ); } } diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index 8ccc0120..5d20edc1 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -1,6 +1,5 @@ use crate::{ chars::{char_is_line_ending, char_is_whitespace}, - find_first_non_whitespace_char, syntax::{IndentQuery, LanguageConfiguration, Syntax}, tree_sitter::Node, Rope, RopeSlice, @@ -174,8 +173,7 @@ pub fn auto_detect_indent_style(document_text: &Rope) -> Option<IndentStyle> { /// To determine indentation of a newly inserted line, figure out the indentation at the last col /// of the previous line. -#[allow(dead_code)] -fn indent_level_for_line(line: RopeSlice, tab_width: usize) -> usize { +pub fn indent_level_for_line(line: RopeSlice, tab_width: usize) -> usize { let mut len = 0; for ch in line.chars() { match ch { @@ -207,10 +205,15 @@ fn get_highest_syntax_node_at_bytepos(syntax: &Syntax, pos: usize) -> Option<Nod Some(node) } -fn calculate_indentation(query: &IndentQuery, node: Option<Node>, newline: bool) -> usize { - // NOTE: can't use contains() on query because of comparing Vec<String> and &str - // https://doc.rust-lang.org/std/vec/struct.Vec.html#method.contains - +/// Calculate the indentation at a given treesitter node. +/// If newline is false, then any "indent" nodes on the line are ignored ("outdent" still applies). +/// This is because the indentation is only increased starting at the second line of the node. +fn calculate_indentation( + query: &IndentQuery, + node: Option<Node>, + line: usize, + newline: bool, +) -> usize { let mut increment: isize = 0; let mut node = match node { @@ -218,70 +221,45 @@ fn calculate_indentation(query: &IndentQuery, node: Option<Node>, newline: bool) None => return 0, }; - let mut prev_start = node.start_position().row; - - // if we're calculating indentation for a brand new line then the current node will become the - // parent node. We need to take it's indentation level into account too. - let node_kind = node.kind(); - if newline && query.indent.contains(node_kind) { - increment += 1; - } - - while let Some(parent) = node.parent() { - let parent_kind = parent.kind(); - let start = parent.start_position().row; - - // detect deeply nested indents in the same line - // .map(|a| { <-- ({ is two scopes - // let len = 1; <-- indents one level - // }) <-- }) is two scopes - let starts_same_line = start == prev_start; - - if query.outdent.contains(node.kind()) && !starts_same_line { - // we outdent by skipping the rules for the current level and jumping up - // node = parent; - increment -= 1; - // continue; + let mut current_line = line; + let mut consider_indent = newline; + let mut increment_from_line: isize = 0; + + loop { + let node_kind = node.kind(); + let start = node.start_position().row; + if current_line != start { + // Indent/dedent by at most one per line: + // .map(|a| { <-- ({ is two scopes + // let len = 1; <-- indents one level + // }) <-- }) is two scopes + if consider_indent || increment_from_line < 0 { + increment += increment_from_line.signum(); + } + increment_from_line = 0; + current_line = start; + consider_indent = true; } - if query.indent.contains(parent_kind) // && not_first_or_last_sibling - && !starts_same_line - { - // println!("is_scope {}", parent_kind); - prev_start = start; - increment += 1 + if query.outdent.contains(node_kind) { + increment_from_line -= 1; + } + if query.indent.contains(node_kind) { + increment_from_line += 1; } - // if last_scope && increment > 0 && ...{ ignore } - - node = parent; + if let Some(parent) = node.parent() { + node = parent; + } else { + break; + } + } + if consider_indent || increment_from_line < 0 { + increment += increment_from_line.signum(); } - increment.max(0) as usize } -#[allow(dead_code)] -fn suggested_indent_for_line( - language_config: &LanguageConfiguration, - syntax: Option<&Syntax>, - text: RopeSlice, - line_num: usize, - _tab_width: usize, -) -> usize { - if let Some(start) = find_first_non_whitespace_char(text.line(line_num)) { - return suggested_indent_for_pos( - Some(language_config), - syntax, - text, - start + text.line_to_char(line_num), - false, - ); - }; - - // if the line is blank, indent should be zero - 0 -} - // TODO: two usecases: if we are triggering this for a new, blank line: // - it should return 0 when mass indenting stuff // - it should look up the wrapper node and count it too when we press o/O @@ -290,23 +268,20 @@ pub fn suggested_indent_for_pos( syntax: Option<&Syntax>, text: RopeSlice, pos: usize, + line: usize, new_line: bool, -) -> usize { +) -> Option<usize> { if let (Some(query), Some(syntax)) = ( language_config.and_then(|config| config.indent_query()), syntax, ) { let byte_start = text.char_to_byte(pos); let node = get_highest_syntax_node_at_bytepos(syntax, byte_start); - - // let config = load indentation query config from Syntax(should contain language_config) - // TODO: special case for comments // TODO: if preserve_leading_whitespace - calculate_indentation(query, node, new_line) + Some(calculate_indentation(query, node, line, new_line)) } else { - // TODO: heuristics for non-tree sitter grammars - 0 + None } } @@ -438,7 +413,8 @@ where ", ); - let doc = Rope::from(doc); + let doc = doc; + use crate::diagnostic::Severity; use crate::syntax::{ Configuration, IndentationConfiguration, LanguageConfiguration, Loader, }; @@ -456,6 +432,8 @@ where roots: vec![], comment_token: None, auto_format: false, + diagnostic_severity: Severity::Warning, + tree_sitter_library: None, language_server: None, indent: Some(IndentationConfiguration { tab_width: 4, @@ -474,20 +452,29 @@ where let language_config = loader.language_config_for_scope("source.rust").unwrap(); let highlight_config = language_config.highlight_config(&[]).unwrap(); - let syntax = Syntax::new(&doc, highlight_config.clone()); + let syntax = Syntax::new(&doc, highlight_config, std::sync::Arc::new(loader)); let text = doc.slice(..); let tab_width = 4; for i in 0..doc.len_lines() { let line = text.line(i); - let indent = indent_level_for_line(line, tab_width); - assert_eq!( - suggested_indent_for_line(&language_config, Some(&syntax), text, i, tab_width), - indent, - "line {}: {}", - i, - line - ); + if let Some(pos) = crate::find_first_non_whitespace_char(line) { + let indent = indent_level_for_line(line, tab_width); + assert_eq!( + suggested_indent_for_pos( + Some(&language_config), + Some(&syntax), + text, + text.line_to_char(i) + pos, + i, + false + ), + Some(indent), + "line {}: \"{}\"", + i, + line + ); + } } } } diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 7d790406..fa8566ab 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -1,3 +1,5 @@ +pub use encoding_rs as encoding; + pub mod auto_pairs; pub mod chars; pub mod comment; @@ -5,18 +7,19 @@ pub mod diagnostic; pub mod diff; pub mod graphemes; pub mod history; +pub mod increment; pub mod indent; pub mod line_ending; pub mod macros; pub mod match_brackets; pub mod movement; -pub mod numbers; pub mod object; pub mod path; mod position; pub mod register; pub mod search; pub mod selection; +pub mod shellwords; mod state; pub mod surround; pub mod syntax; @@ -36,8 +39,14 @@ pub fn find_first_non_whitespace_char(line: RopeSlice) -> Option<usize> { line.chars().position(|ch| !ch.is_whitespace()) } -/// Find `.git` root. -pub fn find_root(root: Option<&str>) -> Option<std::path::PathBuf> { +/// Find project root. +/// +/// Order of detection: +/// * Top-most folder containing a root marker in current git repository +/// * Git repostory root if no marker detected +/// * Top-most folder containing a root marker if not git repository detected +/// * Current working directory as fallback +pub fn find_root(root: Option<&str>, root_markers: &[String]) -> Option<std::path::PathBuf> { let current_dir = std::env::current_dir().expect("unable to determine current directory"); let root = match root { @@ -49,16 +58,30 @@ pub fn find_root(root: Option<&str>) -> Option<std::path::PathBuf> { current_dir.join(root) } } - None => current_dir, + None => current_dir.clone(), }; + let mut top_marker = None; for ancestor in root.ancestors() { - // TODO: also use defined roots if git isn't found + for marker in root_markers { + if ancestor.join(marker).exists() { + top_marker = Some(ancestor); + break; + } + } + // don't go higher than repo if ancestor.join(".git").is_dir() { - return Some(ancestor.to_path_buf()); + // Use workspace if detected from marker + return Some(top_marker.unwrap_or(ancestor).to_path_buf()); } } - None + + // In absence of git repo, use workspace if detected + if top_marker.is_some() { + top_marker.map(|a| a.to_path_buf()) + } else { + Some(current_dir) + } } pub fn runtime_dir() -> std::path::PathBuf { @@ -158,7 +181,7 @@ mod merge_toml_tests { "; let base: Value = toml::from_slice(include_bytes!("../../languages.toml")) - .expect("Couldn't parse built-in langauges config"); + .expect("Couldn't parse built-in languages config"); let user: Value = toml::from_str(USER).unwrap(); let merged = merge_toml_values(base, user); @@ -189,7 +212,10 @@ use etcetera::base_strategy::{choose_base_strategy, BaseStrategy}; pub use ropey::{Rope, RopeBuilder, RopeSlice}; -pub use tendril::StrTendril as Tendril; +// pub use tendril::StrTendril as Tendril; +pub use smartstring::SmartString; + +pub type Tendril = SmartString<smartstring::LazyCompact>; #[doc(inline)] pub use {regex, tree_sitter}; diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index 3541305c..8eb426e1 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -250,7 +250,7 @@ mod line_ending_tests { assert_eq!(get_line_ending_of_str(&text[..6]), Some(LineEnding::CR)); assert_eq!(get_line_ending_of_str(&text[..12]), Some(LineEnding::LF)); assert_eq!(get_line_ending_of_str(&text[..17]), Some(LineEnding::Crlf)); - assert_eq!(get_line_ending_of_str(&text[..]), None); + assert_eq!(get_line_ending_of_str(text), None); } #[test] diff --git a/helix-core/src/match_brackets.rs b/helix-core/src/match_brackets.rs index cd554005..0189dedd 100644 --- a/helix-core/src/match_brackets.rs +++ b/helix-core/src/match_brackets.rs @@ -11,7 +11,7 @@ const PAIRS: &[(char, char)] = &[ ('\"', '\"'), ]; -// limit matching pairs to only ( ) { } [ ] < > +// limit matching pairs to only ( ) { } [ ] < > ' ' " " // Returns the position of the matching bracket under cursor. // diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs index 01a8f890..47fe6827 100644 --- a/helix-core/src/movement.rs +++ b/helix-core/src/movement.rs @@ -307,8 +307,6 @@ fn reached_target(target: WordMotionTarget, prev_ch: char, next_ch: char) -> boo #[cfg(test)] mod test { - use std::array::{self, IntoIter}; - use ropey::Rope; use super::*; @@ -360,7 +358,7 @@ mod test { ((Direction::Backward, 999usize), (0, 0)), // |This is a simple alphabetic line ]; - for ((direction, amount), coordinates) in IntoIter::new(moves_and_expected_coordinates) { + for ((direction, amount), coordinates) in moves_and_expected_coordinates { range = move_horizontally(slice, range, direction, amount, Movement::Move); assert_eq!(coords_at_pos(slice, range.head), coordinates.into()) } @@ -374,7 +372,7 @@ mod test { let mut range = Range::point(position); - let moves_and_expected_coordinates = IntoIter::new([ + let moves_and_expected_coordinates = [ ((Direction::Forward, 11usize), (1, 1)), // Multiline\nt|ext sample\n... ((Direction::Backward, 1usize), (1, 0)), // Multiline\n|text sample\n... ((Direction::Backward, 5usize), (0, 5)), // Multi|line\ntext sample\n... @@ -384,7 +382,7 @@ mod test { ((Direction::Backward, 0usize), (0, 3)), // Mul|tiline\ntext sample\n... ((Direction::Forward, 999usize), (5, 0)), // ...and whitespaced\n| ((Direction::Forward, 999usize), (5, 0)), // ...and whitespaced\n| - ]); + ]; for ((direction, amount), coordinates) in moves_and_expected_coordinates { range = move_horizontally(slice, range, direction, amount, Movement::Move); @@ -402,11 +400,11 @@ mod test { let mut range = Range::point(position); let original_anchor = range.anchor; - let moves = IntoIter::new([ + let moves = [ (Direction::Forward, 1usize), (Direction::Forward, 5usize), (Direction::Backward, 3usize), - ]); + ]; for (direction, amount) in moves { range = move_horizontally(slice, range, direction, amount, Movement::Extend); @@ -420,7 +418,7 @@ mod test { let slice = text.slice(..); let position = pos_at_coords(slice, (0, 0).into(), true); let mut range = Range::point(position); - let moves_and_expected_coordinates = IntoIter::new([ + let moves_and_expected_coordinates = [ ((Direction::Forward, 1usize), (1, 0)), ((Direction::Forward, 2usize), (3, 0)), ((Direction::Forward, 1usize), (4, 0)), @@ -430,7 +428,7 @@ mod test { ((Direction::Backward, 0usize), (4, 0)), ((Direction::Forward, 5), (5, 0)), ((Direction::Forward, 999usize), (5, 0)), - ]); + ]; for ((direction, amount), coordinates) in moves_and_expected_coordinates { range = move_vertically(slice, range, direction, amount, Movement::Move); @@ -450,7 +448,7 @@ mod test { H, V, } - let moves_and_expected_coordinates = IntoIter::new([ + let moves_and_expected_coordinates = [ // Places cursor at the end of line ((Axis::H, Direction::Forward, 8usize), (0, 8)), // First descent preserves column as the target line is wider @@ -463,7 +461,7 @@ mod test { ((Axis::V, Direction::Backward, 999usize), (0, 8)), ((Axis::V, Direction::Forward, 4usize), (4, 8)), ((Axis::V, Direction::Forward, 999usize), (5, 0)), - ]); + ]; for ((axis, direction, amount), coordinates) in moves_and_expected_coordinates { range = match axis { @@ -489,7 +487,7 @@ mod test { H, V, } - let moves_and_expected_coordinates = IntoIter::new([ + let moves_and_expected_coordinates = [ // Places cursor at the fourth kana. ((Axis::H, Direction::Forward, 4), (0, 4)), // Descent places cursor at the 4th character. @@ -498,7 +496,7 @@ mod test { ((Axis::H, Direction::Backward, 1usize), (1, 3)), // Jumping back up 1 line. ((Axis::V, Direction::Backward, 1usize), (0, 3)), - ]); + ]; for ((axis, direction, amount), coordinates) in moves_and_expected_coordinates { range = match axis { @@ -530,7 +528,7 @@ mod test { #[test] fn test_behaviour_when_moving_to_start_of_next_words() { - let tests = array::IntoIter::new([ + let tests = [ ("Basic forward motion stops at the first space", vec![(1, Range::new(0, 0), Range::new(0, 6))]), (" Starting from a boundary advances the anchor", @@ -604,7 +602,7 @@ mod test { vec![ (1, Range::new(0, 0), Range::new(0, 6)), ]), - ]); + ]; for (sample, scenario) in tests { for (count, begin, expected_end) in scenario.into_iter() { @@ -616,7 +614,7 @@ mod test { #[test] fn test_behaviour_when_moving_to_start_of_next_long_words() { - let tests = array::IntoIter::new([ + let tests = [ ("Basic forward motion stops at the first space", vec![(1, Range::new(0, 0), Range::new(0, 6))]), (" Starting from a boundary advances the anchor", @@ -688,7 +686,7 @@ mod test { vec![ (1, Range::new(0, 0), Range::new(0, 8)), ]), - ]); + ]; for (sample, scenario) in tests { for (count, begin, expected_end) in scenario.into_iter() { @@ -700,7 +698,7 @@ mod test { #[test] fn test_behaviour_when_moving_to_start_of_previous_words() { - let tests = array::IntoIter::new([ + let tests = [ ("Basic backward motion from the middle of a word", vec![(1, Range::new(3, 3), Range::new(4, 0))]), @@ -773,7 +771,7 @@ mod test { vec![ (1, Range::new(0, 6), Range::new(6, 0)), ]), - ]); + ]; for (sample, scenario) in tests { for (count, begin, expected_end) in scenario.into_iter() { @@ -785,7 +783,7 @@ mod test { #[test] fn test_behaviour_when_moving_to_start_of_previous_long_words() { - let tests = array::IntoIter::new([ + let tests = [ ( "Basic backward motion from the middle of a word", vec![(1, Range::new(3, 3), Range::new(4, 0))], @@ -870,7 +868,7 @@ mod test { vec![ (1, Range::new(0, 8), Range::new(8, 0)), ]), - ]); + ]; for (sample, scenario) in tests { for (count, begin, expected_end) in scenario.into_iter() { @@ -882,7 +880,7 @@ mod test { #[test] fn test_behaviour_when_moving_to_end_of_next_words() { - let tests = array::IntoIter::new([ + let tests = [ ("Basic forward motion from the start of a word to the end of it", vec![(1, Range::new(0, 0), Range::new(0, 5))]), ("Basic forward motion from the end of a word to the end of the next", @@ -954,7 +952,7 @@ mod test { vec![ (1, Range::new(0, 0), Range::new(0, 5)), ]), - ]); + ]; for (sample, scenario) in tests { for (count, begin, expected_end) in scenario.into_iter() { @@ -966,7 +964,7 @@ mod test { #[test] fn test_behaviour_when_moving_to_end_of_previous_words() { - let tests = array::IntoIter::new([ + let tests = [ ("Basic backward motion from the middle of a word", vec![(1, Range::new(9, 9), Range::new(10, 5))]), ("Starting from after boundary retreats the anchor", @@ -1036,7 +1034,7 @@ mod test { vec![ (1, Range::new(0, 10), Range::new(10, 4)), ]), - ]); + ]; for (sample, scenario) in tests { for (count, begin, expected_end) in scenario.into_iter() { @@ -1048,7 +1046,7 @@ mod test { #[test] fn test_behaviour_when_moving_to_end_of_next_long_words() { - let tests = array::IntoIter::new([ + let tests = [ ("Basic forward motion from the start of a word to the end of it", vec![(1, Range::new(0, 0), Range::new(0, 5))]), ("Basic forward motion from the end of a word to the end of the next", @@ -1118,7 +1116,7 @@ mod test { vec![ (1, Range::new(0, 0), Range::new(0, 7)), ]), - ]); + ]; for (sample, scenario) in tests { for (count, begin, expected_end) in scenario.into_iter() { diff --git a/helix-core/src/object.rs b/helix-core/src/object.rs index 717c5994..b06f4144 100644 --- a/helix-core/src/object.rs +++ b/helix-core/src/object.rs @@ -1,31 +1,72 @@ use crate::{Range, RopeSlice, Selection, Syntax}; +use tree_sitter::Node; -// TODO: to contract_selection we'd need to store the previous ranges before expand. -// Maybe just contract to the first child node? -pub fn expand_selection(syntax: &Syntax, text: RopeSlice, selection: &Selection) -> Selection { +pub fn expand_selection(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection { + select_node_impl(syntax, text, selection, |descendant, from, to| { + if descendant.start_byte() == from && descendant.end_byte() == to { + descendant.parent() + } else { + Some(descendant) + } + }) +} + +pub fn shrink_selection(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection { + select_node_impl(syntax, text, selection, |descendant, _from, _to| { + descendant.child(0).or(Some(descendant)) + }) +} + +pub fn select_sibling<F>( + syntax: &Syntax, + text: RopeSlice, + selection: Selection, + sibling_fn: &F, +) -> Selection +where + F: Fn(Node) -> Option<Node>, +{ + select_node_impl(syntax, text, selection, |descendant, _from, _to| { + find_sibling_recursive(descendant, sibling_fn) + }) +} + +fn find_sibling_recursive<F>(node: Node, sibling_fn: F) -> Option<Node> +where + F: Fn(Node) -> Option<Node>, +{ + sibling_fn(node).or_else(|| { + node.parent() + .and_then(|node| find_sibling_recursive(node, sibling_fn)) + }) +} + +fn select_node_impl<F>( + syntax: &Syntax, + text: RopeSlice, + selection: Selection, + select_fn: F, +) -> Selection +where + F: Fn(Node, usize, usize) -> Option<Node>, +{ let tree = syntax.tree(); - selection.clone().transform(|range| { + selection.transform(|range| { let from = text.char_to_byte(range.from()); let to = text.char_to_byte(range.to()); - // find parent of a descendant that matches the range - let parent = match tree + let node = match tree .root_node() .descendant_for_byte_range(from, to) - .and_then(|node| { - if node.child_count() == 0 || (node.start_byte() == from && node.end_byte() == to) { - node.parent() - } else { - Some(node) - } - }) { - Some(parent) => parent, + .and_then(|node| select_fn(node, from, to)) + { + Some(node) => node, None => return range, }; - let from = text.byte_to_char(parent.start_byte()); - let to = text.byte_to_char(parent.end_byte()); + let from = text.byte_to_char(node.start_byte()); + let to = text.byte_to_char(node.end_byte()); if range.head < range.anchor { Range::new(to, from) diff --git a/helix-core/src/position.rs b/helix-core/src/position.rs index c6018ce6..93362c77 100644 --- a/helix-core/src/position.rs +++ b/helix-core/src/position.rs @@ -109,7 +109,10 @@ pub fn visual_coords_at_pos(text: RopeSlice, pos: usize, tab_width: usize) -> Po /// TODO: this should be changed to work in terms of visual row/column, not /// graphemes. pub fn pos_at_coords(text: RopeSlice, coords: Position, limit_before_line_ending: bool) -> usize { - let Position { row, col } = coords; + let Position { mut row, col } = coords; + if limit_before_line_ending { + row = row.min(text.len_lines() - 1); + }; let line_start = text.line_to_char(row); let line_end = if limit_before_line_ending { line_end_char_index(&text, row) @@ -290,5 +293,12 @@ mod test { assert_eq!(pos_at_coords(slice, (0, 0).into(), false), 0); assert_eq!(pos_at_coords(slice, (0, 1).into(), false), 1); assert_eq!(pos_at_coords(slice, (0, 2).into(), false), 2); + + // Test out of bounds. + let text = Rope::new(); + let slice = text.slice(..); + assert_eq!(pos_at_coords(slice, (10, 0).into(), true), 0); + assert_eq!(pos_at_coords(slice, (0, 10).into(), true), 0); + assert_eq!(pos_at_coords(slice, (10, 10).into(), true), 0); } } diff --git a/helix-core/src/register.rs b/helix-core/src/register.rs index b9eb497d..b39e4034 100644 --- a/helix-core/src/register.rs +++ b/helix-core/src/register.rs @@ -68,4 +68,8 @@ impl Registers { pub fn read(&self, name: char) -> Option<&[String]> { self.get(name).map(|reg| reg.read()) } + + pub fn inner(&self) -> &HashMap<char, Register> { + &self.inner + } } diff --git a/helix-core/src/selection.rs b/helix-core/src/selection.rs index 116a1c7c..c6eceb4b 100644 --- a/helix-core/src/selection.rs +++ b/helix-core/src/selection.rs @@ -7,6 +7,7 @@ use crate::{ ensure_grapheme_boundary_next, ensure_grapheme_boundary_prev, next_grapheme_boundary, prev_grapheme_boundary, }, + movement::Direction, Assoc, ChangeSet, RopeSlice, }; use smallvec::{smallvec, SmallVec}; @@ -82,6 +83,13 @@ impl Range { std::cmp::max(self.anchor, self.head) } + /// Total length of the range. + #[inline] + #[must_use] + pub fn len(&self) -> usize { + self.to() - self.from() + } + /// The (inclusive) range of lines that the range overlaps. #[inline] #[must_use] @@ -102,6 +110,27 @@ impl Range { self.anchor == self.head } + /// `Direction::Backward` when head < anchor. + /// `Direction::Backward` otherwise. + #[inline] + #[must_use] + pub fn direction(&self) -> Direction { + if self.head < self.anchor { + Direction::Backward + } else { + Direction::Forward + } + } + + // flips the direction of the selection + pub fn flip(&self) -> Self { + Self { + anchor: self.head, + head: self.anchor, + horiz: self.horiz, + } + } + /// Check two ranges for overlap. #[must_use] pub fn overlaps(&self, other: &Self) -> bool { @@ -111,6 +140,11 @@ impl Range { self.from() == other.from() || (self.to() > other.from() && other.to() > self.from()) } + #[inline] + pub fn contains_range(&self, other: &Self) -> bool { + self.from() <= other.from() && self.to() >= other.to() + } + pub fn contains(&self, pos: usize) -> bool { self.from() <= pos && pos < self.to() } @@ -515,6 +549,39 @@ impl Selection { pub fn len(&self) -> usize { self.ranges.len() } + + // returns true if self β other + pub fn contains(&self, other: &Selection) -> bool { + // can't contain other if it is larger + if other.len() > self.len() { + return false; + } + + let (mut iter_self, mut iter_other) = (self.iter(), other.iter()); + let (mut ele_self, mut ele_other) = (iter_self.next(), iter_other.next()); + + loop { + match (ele_self, ele_other) { + (Some(ra), Some(rb)) => { + if !ra.contains_range(rb) { + // `self` doesn't contain next element from `other`, advance `self`, we need to match all from `other` + ele_self = iter_self.next(); + } else { + // matched element from `other`, advance `other` + ele_other = iter_other.next(); + }; + } + (None, Some(_)) => { + // exhausted `self`, we can't match the reminder of `other` + return false; + } + (_, None) => { + // no elements from `other` left to match, `self` contains `other` + return true; + } + } + } + } } impl<'a> IntoIterator for &'a Selection { @@ -699,16 +766,16 @@ mod test { fn test_contains() { let range = Range::new(10, 12); - assert_eq!(range.contains(9), false); - assert_eq!(range.contains(10), true); - assert_eq!(range.contains(11), true); - assert_eq!(range.contains(12), false); - assert_eq!(range.contains(13), false); + assert!(!range.contains(9)); + assert!(range.contains(10)); + assert!(range.contains(11)); + assert!(!range.contains(12)); + assert!(!range.contains(13)); let range = Range::new(9, 6); - assert_eq!(range.contains(9), false); - assert_eq!(range.contains(7), true); - assert_eq!(range.contains(6), true); + assert!(!range.contains(9)); + assert!(range.contains(7)); + assert!(range.contains(6)); } #[test] @@ -953,4 +1020,30 @@ mod test { &["", "abcd", "efg", "rs", "xyz"] ); } + #[test] + fn test_selection_contains() { + fn contains(a: Vec<(usize, usize)>, b: Vec<(usize, usize)>) -> bool { + let sela = Selection::new(a.iter().map(|a| Range::new(a.0, a.1)).collect(), 0); + let selb = Selection::new(b.iter().map(|b| Range::new(b.0, b.1)).collect(), 0); + sela.contains(&selb) + } + + // exact match + assert!(contains(vec!((1, 1)), vec!((1, 1)))); + + // larger set contains smaller + assert!(contains(vec!((1, 1), (2, 2), (3, 3)), vec!((2, 2)))); + + // multiple matches + assert!(contains(vec!((1, 1), (2, 2)), vec!((1, 1), (2, 2)))); + + // smaller set can't contain bigger + assert!(!contains(vec!((1, 1)), vec!((1, 1), (2, 2)))); + + assert!(contains( + vec!((1, 1), (2, 4), (5, 6), (7, 9), (10, 13)), + vec!((3, 4), (7, 9)) + )); + assert!(!contains(vec!((1, 1), (5, 6)), vec!((1, 6)))); + } } diff --git a/helix-core/src/shellwords.rs b/helix-core/src/shellwords.rs new file mode 100644 index 00000000..13f6f3e9 --- /dev/null +++ b/helix-core/src/shellwords.rs @@ -0,0 +1,164 @@ +use std::borrow::Cow; + +/// Get the vec of escaped / quoted / doublequoted filenames from the input str +pub fn shellwords(input: &str) -> Vec<Cow<'_, str>> { + enum State { + Normal, + NormalEscaped, + Quoted, + QuoteEscaped, + Dquoted, + DquoteEscaped, + } + + use State::*; + + let mut state = Normal; + let mut args: Vec<Cow<str>> = Vec::new(); + let mut escaped = String::with_capacity(input.len()); + + let mut start = 0; + let mut end = 0; + + for (i, c) in input.char_indices() { + state = match state { + Normal => match c { + '\\' => { + escaped.push_str(&input[start..i]); + start = i + 1; + NormalEscaped + } + '"' => { + end = i; + Dquoted + } + '\'' => { + end = i; + Quoted + } + c if c.is_ascii_whitespace() => { + end = i; + Normal + } + _ => Normal, + }, + NormalEscaped => Normal, + Quoted => match c { + '\\' => { + escaped.push_str(&input[start..i]); + start = i + 1; + QuoteEscaped + } + '\'' => { + end = i; + Normal + } + _ => Quoted, + }, + QuoteEscaped => Quoted, + Dquoted => match c { + '\\' => { + escaped.push_str(&input[start..i]); + start = i + 1; + DquoteEscaped + } + '"' => { + end = i; + Normal + } + _ => Dquoted, + }, + DquoteEscaped => Dquoted, + }; + + if i >= input.len() - 1 && end == 0 { + end = i + 1; + } + + if end > 0 { + let esc_trim = escaped.trim(); + let inp = &input[start..end]; + + if !(esc_trim.is_empty() && inp.trim().is_empty()) { + if esc_trim.is_empty() { + args.push(inp.into()); + } else { + args.push([escaped, inp.into()].concat().into()); + escaped = "".to_string(); + } + } + start = i + 1; + end = 0; + } + } + args +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_normal() { + let input = r#":o single_word twΓ³ wΓΆrds \three\ \"with\ escaping\\"#; + let result = shellwords(input); + let expected = vec![ + Cow::from(":o"), + Cow::from("single_word"), + Cow::from("twΓ³"), + Cow::from("wΓΆrds"), + Cow::from(r#"three "with escaping\"#), + ]; + // TODO test is_owned and is_borrowed, once they get stabilized. + assert_eq!(expected, result); + } + + #[test] + fn test_quoted() { + let quoted = + r#":o 'single_word' 'twΓ³ wΓΆrds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"#; + let result = shellwords(quoted); + let expected = vec![ + Cow::from(":o"), + Cow::from("single_word"), + Cow::from("twΓ³ wΓΆrds"), + Cow::from(r#"three' "with escaping\"#), + Cow::from("quote incomplete"), + ]; + assert_eq!(expected, result); + } + + #[test] + fn test_dquoted() { + let dquoted = r#":o "single_word" "twΓ³ wΓΆrds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"#; + let result = shellwords(dquoted); + let expected = vec![ + Cow::from(":o"), + Cow::from("single_word"), + Cow::from("twΓ³ wΓΆrds"), + Cow::from(r#"three' "with escaping\"#), + Cow::from("dquote incomplete"), + ]; + assert_eq!(expected, result); + } + + #[test] + fn test_mixed() { + let dquoted = r#":o single_word 'twΓ³ wΓΆrds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"#; + let result = shellwords(dquoted); + let expected = vec![ + Cow::from(":o"), + Cow::from("single_word"), + Cow::from("twΓ³ wΓΆrds"), + Cow::from("three' \"with escaping\\"), + Cow::from("no space before"), + Cow::from("and after"), + Cow::from("$#%^@"), + Cow::from("%^&(%^"), + Cow::from(")(*&^%"), + Cow::from(r#"a\\b"#), + //last ' just changes to quoted but since we dont have anything after it, it should be ignored + ]; + assert_eq!(expected, result); + } +} diff --git a/helix-core/src/surround.rs b/helix-core/src/surround.rs index b53b0a78..58eb23cf 100644 --- a/helix-core/src/surround.rs +++ b/helix-core/src/surround.rs @@ -172,6 +172,7 @@ mod test { use ropey::Rope; use smallvec::SmallVec; + #[allow(clippy::type_complexity)] fn check_find_nth_pair_pos( text: &str, cases: Vec<(usize, char, usize, Option<(usize, usize)>)>, diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index f1c399d2..a5c5e498 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -1,5 +1,6 @@ use crate::{ chars::char_is_line_ending, + diagnostic::Severity, regex::Regex, transaction::{ChangeSet, Operation}, Rope, RopeSlice, Tendril, @@ -7,12 +8,13 @@ use crate::{ pub use helix_syntax::get_language; -use arc_swap::ArcSwap; +use arc_swap::{ArcSwap, Guard}; +use slotmap::{DefaultKey as LayerId, HopSlotMap}; use std::{ borrow::Cow, cell::RefCell, - collections::{HashMap, HashSet}, + collections::{HashMap, HashSet, VecDeque}, fmt, path::Path, sync::Arc, @@ -50,7 +52,7 @@ pub struct Configuration { #[serde(rename_all = "kebab-case", deny_unknown_fields)] pub struct LanguageConfiguration { #[serde(rename = "name")] - pub language_id: String, + pub language_id: String, // c-sharp, rust pub scope: String, // source.rust pub file_types: Vec<String>, // filename ends_with? <Gemfile, rb, etc> #[serde(default)] @@ -63,6 +65,10 @@ pub struct LanguageConfiguration { #[serde(default)] pub auto_format: bool, + #[serde(default)] + pub diagnostic_severity: Severity, + + pub tree_sitter_library: Option<String>, // tree-sitter library name, defaults to language_id // content_regex #[serde(default, skip_serializing, deserialize_with = "deserialize_regex")] @@ -92,6 +98,7 @@ pub struct LanguageServerConfiguration { #[serde(default)] #[serde(skip_serializing_if = "Vec::is_empty")] pub args: Vec<String>, + pub language_id: Option<String>, } #[derive(Debug, PartialEq, Clone, Deserialize, Serialize)] @@ -245,20 +252,22 @@ impl LanguageConfiguration { if highlights_query.is_empty() { None } else { - let language = get_language(&crate::RUNTIME_DIR, &self.language_id) - .map_err(|e| log::info!("{}", e)) - .ok()?; + let language = get_language( + &crate::RUNTIME_DIR, + self.tree_sitter_library + .as_deref() + .unwrap_or(&self.language_id), + ) + .map_err(|e| log::info!("{}", e)) + .ok()?; let config = HighlightConfiguration::new( language, &highlights_query, &injections_query, &locals_query, - ); + ) + .unwrap(); // TODO: avoid panic - let config = match config { - Ok(config) => config, - Err(err) => panic!("{}", err), - }; // TODO: avoid panic config.configure(scopes); Some(Arc::new(config)) } @@ -308,12 +317,16 @@ impl LanguageConfiguration { } } +// Expose loader as Lazy<> global since it's always static? + #[derive(Debug)] pub struct Loader { // highlight_names ? language_configs: Vec<Arc<LanguageConfiguration>>, language_config_ids_by_file_type: HashMap<String, usize>, // Vec<usize> language_config_ids_by_shebang: HashMap<String, usize>, + + scopes: ArcSwap<Vec<String>>, } impl Loader { @@ -322,6 +335,7 @@ impl Loader { language_configs: Vec::new(), language_config_ids_by_file_type: HashMap::new(), language_config_ids_by_shebang: HashMap::new(), + scopes: ArcSwap::from_pointee(Vec::new()), }; for config in config.language { @@ -366,8 +380,9 @@ impl Loader { pub fn language_config_for_shebang(&self, source: &Rope) -> Option<Arc<LanguageConfiguration>> { let line = Cow::from(source.line(0)); - static SHEBANG_REGEX: Lazy<Regex> = - Lazy::new(|| Regex::new(r"^#!\s*(?:\S*[/\\](?:env\s+)?)?([^\s\.\d]+)").unwrap()); + static SHEBANG_REGEX: Lazy<Regex> = Lazy::new(|| { + Regex::new(r"^#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)").unwrap() + }); let configuration_id = SHEBANG_REGEX .captures(&line) .and_then(|cap| self.language_config_ids_by_shebang.get(&cap[1])); @@ -406,8 +421,22 @@ impl Loader { } None } - pub fn language_configs_iter(&self) -> impl Iterator<Item = &Arc<LanguageConfiguration>> { - self.language_configs.iter() + + pub fn set_scopes(&self, scopes: Vec<String>) { + self.scopes.store(Arc::new(scopes)); + + // Reconfigure existing grammars + for config in self + .language_configs + .iter() + .filter(|cfg| cfg.is_highlight_initialized()) + { + config.reconfigure(&self.scopes()); + } + } + + pub fn scopes(&self) -> Guard<Arc<Vec<String>>> { + self.scopes.load() } } @@ -416,12 +445,6 @@ pub struct TsParser { cursors: Vec<QueryCursor>, } -impl fmt::Debug for TsParser { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("TsParser").finish() - } -} - // could also just use a pool, or a single instance? thread_local! { pub static PARSER: RefCell<TsParser> = RefCell::new(TsParser { @@ -432,9 +455,9 @@ thread_local! { #[derive(Debug)] pub struct Syntax { - config: Arc<HighlightConfiguration>, - - root_layer: LanguageLayer, + layers: HopSlotMap<LayerId, LanguageLayer>, + root: LayerId, + loader: Arc<Loader>, } fn byte_range_to_str(range: std::ops::Range<usize>, source: RopeSlice) -> Cow<str> { @@ -444,38 +467,34 @@ fn byte_range_to_str(range: std::ops::Range<usize>, source: RopeSlice) -> Cow<st } impl Syntax { - // buffer, grammar, config, grammars, sync_timeout? - pub fn new( - /*language: Lang,*/ source: &Rope, - config: Arc<HighlightConfiguration>, - ) -> Self { - let root_layer = LanguageLayer { tree: None }; + pub fn new(source: &Rope, config: Arc<HighlightConfiguration>, loader: Arc<Loader>) -> Self { + let root_layer = LanguageLayer { + tree: None, + config, + depth: 0, + ranges: vec![Range { + start_byte: 0, + end_byte: usize::MAX, + start_point: Point::new(0, 0), + end_point: Point::new(usize::MAX, usize::MAX), + }], + }; - // track markers of injections // track scope_descriptor: a Vec of scopes for item in tree + let mut layers = HopSlotMap::default(); + let root = layers.insert(root_layer); + let mut syntax = Self { - // grammar, - config, - root_layer, + root, + layers, + loader, }; - // update root layer - PARSER.with(|ts_parser| { - // TODO: handle the returned `Result` properly. - let _ = syntax.root_layer.parse( - &mut ts_parser.borrow_mut(), - &syntax.config, - source, - 0, - vec![Range { - start_byte: 0, - end_byte: usize::MAX, - start_point: Point::new(0, 0), - end_point: Point::new(usize::MAX, usize::MAX), - }], - ); - }); + syntax + .update(source, source, &ChangeSet::new(source)) + .unwrap(); + syntax } @@ -485,32 +504,255 @@ impl Syntax { source: &Rope, changeset: &ChangeSet, ) -> Result<(), Error> { + let mut queue = VecDeque::new(); + queue.push_back(self.root); + + let scopes = self.loader.scopes.load(); + let injection_callback = |language: &str| { + self.loader + .language_configuration_for_injection_string(language) + .and_then(|language_config| language_config.highlight_config(&scopes)) + }; + + // Convert the changeset into tree sitter edits. + let edits = generate_edits(old_source, changeset); + + // Use the edits to update all layers markers + if !edits.is_empty() { + fn point_add(a: Point, b: Point) -> Point { + if b.row > 0 { + Point::new(a.row.saturating_add(b.row), b.column) + } else { + Point::new(0, a.column.saturating_add(b.column)) + } + } + fn point_sub(a: Point, b: Point) -> Point { + if a.row > b.row { + Point::new(a.row.saturating_sub(b.row), a.column) + } else { + Point::new(0, a.column.saturating_sub(b.column)) + } + } + + for layer in &mut self.layers.values_mut() { + // The root layer always covers the whole range (0..usize::MAX) + if layer.depth == 0 { + continue; + } + + for range in &mut layer.ranges { + // Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720 + for edit in edits.iter().rev() { + let is_pure_insertion = edit.old_end_byte == edit.start_byte; + + // if edit is after range, skip + if edit.start_byte > range.end_byte { + // TODO: || (is_noop && edit.start_byte == range.end_byte) + continue; + } + + // if edit is before range, shift entire range by len + if edit.old_end_byte < range.start_byte { + range.start_byte = + edit.new_end_byte + (range.start_byte - edit.old_end_byte); + range.start_point = point_add( + edit.new_end_position, + point_sub(range.start_point, edit.old_end_position), + ); + + range.end_byte = edit + .new_end_byte + .saturating_add(range.end_byte - edit.old_end_byte); + range.end_point = point_add( + edit.new_end_position, + point_sub(range.end_point, edit.old_end_position), + ); + } + // if the edit starts in the space before and extends into the range + else if edit.start_byte < range.start_byte { + range.start_byte = edit.new_end_byte; + range.start_point = edit.new_end_position; + + range.end_byte = range + .end_byte + .saturating_sub(edit.old_end_byte) + .saturating_add(edit.new_end_byte); + range.end_point = point_add( + edit.new_end_position, + point_sub(range.end_point, edit.old_end_position), + ); + } + // If the edit is an insertion at the start of the tree, shift + else if edit.start_byte == range.start_byte && is_pure_insertion { + range.start_byte = edit.new_end_byte; + range.start_point = edit.new_end_position; + } else { + range.end_byte = range + .end_byte + .saturating_sub(edit.old_end_byte) + .saturating_add(edit.new_end_byte); + range.end_point = point_add( + edit.new_end_position, + point_sub(range.end_point, edit.old_end_position), + ); + } + } + } + } + } + PARSER.with(|ts_parser| { - self.root_layer.update( - &mut ts_parser.borrow_mut(), - &self.config, - old_source, - source, - changeset, - ) - }) + let ts_parser = &mut ts_parser.borrow_mut(); + let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new); + // TODO: might need to set cursor range + cursor.set_byte_range(0..usize::MAX); - // TODO: deal with injections and update them too - } + let source_slice = source.slice(..); - // fn buffer_changed -> call layer.update(range, new_text) on root layer and then all marker layers + let mut touched = HashSet::new(); - // call this on transaction.apply() -> buffer_changed(changes) - // - // fn parse(language, old_tree, ranges) - // - pub fn tree(&self) -> &Tree { - self.root_layer.tree() + // TODO: we should be able to avoid editing & parsing layers with ranges earlier in the document before the edit + + while let Some(layer_id) = queue.pop_front() { + // Mark the layer as touched + touched.insert(layer_id); + + let layer = &mut self.layers[layer_id]; + + // If a tree already exists, notify it of changes. + if let Some(tree) = &mut layer.tree { + for edit in edits.iter().rev() { + // Apply the edits in reverse. + // If we applied them in order then edit 1 would disrupt the positioning of edit 2. + tree.edit(edit); + } + } + + // Re-parse the tree. + layer.parse(&mut ts_parser.parser, source)?; + + // Switch to an immutable borrow. + let layer = &self.layers[layer_id]; + + // Process injections. + let matches = cursor.matches( + &layer.config.injections_query, + layer.tree().root_node(), + RopeProvider(source_slice), + ); + let mut injections = Vec::new(); + for mat in matches { + let (language_name, content_node, include_children) = injection_for_match( + &layer.config, + &layer.config.injections_query, + &mat, + source_slice, + ); + + // Explicitly remove this match so that none of its other captures will remain + // in the stream of captures. + mat.remove(); + + // If a language is found with the given name, then add a new language layer + // to the highlighted document. + if let (Some(language_name), Some(content_node)) = (language_name, content_node) + { + if let Some(config) = (injection_callback)(&language_name) { + let ranges = + intersect_ranges(&layer.ranges, &[content_node], include_children); + + if !ranges.is_empty() { + injections.push((config, ranges)); + } + } + } + } + + // Process combined injections. + if let Some(combined_injections_query) = &layer.config.combined_injections_query { + let mut injections_by_pattern_index = + vec![(None, Vec::new(), false); combined_injections_query.pattern_count()]; + let matches = cursor.matches( + combined_injections_query, + layer.tree().root_node(), + RopeProvider(source_slice), + ); + for mat in matches { + let entry = &mut injections_by_pattern_index[mat.pattern_index]; + let (language_name, content_node, include_children) = injection_for_match( + &layer.config, + combined_injections_query, + &mat, + source_slice, + ); + if language_name.is_some() { + entry.0 = language_name; + } + if let Some(content_node) = content_node { + entry.1.push(content_node); + } + entry.2 = include_children; + } + for (lang_name, content_nodes, includes_children) in injections_by_pattern_index + { + if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) { + if let Some(config) = (injection_callback)(&lang_name) { + let ranges = intersect_ranges( + &layer.ranges, + &content_nodes, + includes_children, + ); + if !ranges.is_empty() { + injections.push((config, ranges)); + } + } + } + } + } + + let depth = layer.depth + 1; + // TODO: can't inline this since matches borrows self.layers + for (config, ranges) in injections { + // Find an existing layer + let layer = self + .layers + .iter_mut() + .find(|(_, layer)| { + layer.depth == depth && // TODO: track parent id instead + layer.config.language == config.language && layer.ranges == ranges + }) + .map(|(id, _layer)| id); + + // ...or insert a new one. + let layer_id = layer.unwrap_or_else(|| { + self.layers.insert(LanguageLayer { + tree: None, + config, + depth, + ranges, + }) + }); + + queue.push_back(layer_id); + } + + // TODO: pre-process local scopes at this time, rather than highlight? + // would solve problems with locals not working across boundaries + } + + // Return the cursor back in the pool. + ts_parser.cursors.push(cursor); + + // Remove all untouched layers + self.layers.retain(|id, _| touched.contains(&id)); + + Ok(()) + }) } - // - // <!--update_for_injection(grammar)--> - // Highlighting + pub fn tree(&self) -> &Tree { + self.layers[self.root].tree() + } /// Iterate over the highlighted regions for a given slice of source code. pub fn highlight_iter<'a>( @@ -518,65 +760,76 @@ impl Syntax { source: RopeSlice<'a>, range: Option<std::ops::Range<usize>>, cancellation_flag: Option<&'a AtomicUsize>, - injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, ) -> impl Iterator<Item = Result<HighlightEvent, Error>> + 'a { - // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which - // prevents them from being moved. But both of these values are really just - // pointers, so it's actually ok to move them. - - // reuse a cursor from the pool if possible - let mut cursor = PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - highlighter.cursors.pop().unwrap_or_else(QueryCursor::new) + let mut layers = self + .layers + .iter() + .filter_map(|(_, layer)| { + // TODO: if range doesn't overlap layer range, skip it + + // Reuse a cursor from the pool if available. + let mut cursor = PARSER.with(|ts_parser| { + let highlighter = &mut ts_parser.borrow_mut(); + highlighter.cursors.pop().unwrap_or_else(QueryCursor::new) + }); + + // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which + // prevents them from being moved. But both of these values are really just + // pointers, so it's actually ok to move them. + let cursor_ref = + unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; + + // if reusing cursors & no range this resets to whole range + cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX)); + + let mut captures = cursor_ref + .captures( + &layer.config.query, + layer.tree().root_node(), + RopeProvider(source), + ) + .peekable(); + + // If there's no captures, skip the layer + captures.peek()?; + + Some(HighlightIterLayer { + highlight_end_stack: Vec::new(), + scope_stack: vec![LocalScope { + inherits: false, + range: 0..usize::MAX, + local_defs: Vec::new(), + }], + cursor, + _tree: None, + captures, + config: layer.config.as_ref(), // TODO: just reuse `layer` + depth: layer.depth, // TODO: just reuse `layer` + ranges: &layer.ranges, // TODO: temp + }) + }) + .collect::<Vec<_>>(); + + // HAXX: arrange layers by byte range, with deeper layers positioned first + layers.sort_by_key(|layer| { + ( + layer.ranges.first().cloned(), + std::cmp::Reverse(layer.depth), + ) }); - let tree_ref = self.tree(); - let cursor_ref = unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; - let query_ref = &self.config.query; - let config_ref = self.config.as_ref(); - - // if reusing cursors & no range this resets to whole range - cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX)); - - let captures = cursor_ref - .captures(query_ref, tree_ref.root_node(), RopeProvider(source)) - .peekable(); - - // manually craft the root layer based on the existing tree - let layer = HighlightIterLayer { - highlight_end_stack: Vec::new(), - scope_stack: vec![LocalScope { - inherits: false, - range: 0..usize::MAX, - local_defs: Vec::new(), - }], - cursor, - depth: 0, - _tree: None, - captures, - config: config_ref, - ranges: vec![Range { - start_byte: 0, - end_byte: usize::MAX, - start_point: Point::new(0, 0), - end_point: Point::new(usize::MAX, usize::MAX), - }], - }; let mut result = HighlightIter { source, - byte_offset: range.map_or(0, |r| r.start), // TODO: simplify - injection_callback, + byte_offset: range.map_or(0, |r| r.start), cancellation_flag, iter_count: 0, - layers: vec![layer], + layers, next_event: None, last_highlight_range: None, }; result.sort_layers(); result } - // on_tokenize - // on_change_highlighting // Commenting // comment_strings_for_pos @@ -588,246 +841,157 @@ impl Syntax { // indent_level_for_line // TODO: Folding - - // Syntax APIs - // get_syntax_node_containing_range -> - // ... - // get_syntax_node_at_pos - // buffer_range_for_scope_at_pos } #[derive(Debug)] pub struct LanguageLayer { // mode // grammar - // depth + pub config: Arc<HighlightConfiguration>, pub(crate) tree: Option<Tree>, + pub ranges: Vec<Range>, + pub depth: usize, } impl LanguageLayer { - // pub fn new() -> Self { - // Self { tree: None } - // } - pub fn tree(&self) -> &Tree { // TODO: no unwrap self.tree.as_ref().unwrap() } - fn parse( - &mut self, - ts_parser: &mut TsParser, - config: &HighlightConfiguration, - source: &Rope, - _depth: usize, - ranges: Vec<Range>, - ) -> Result<(), Error> { - if ts_parser.parser.set_included_ranges(&ranges).is_ok() { - ts_parser - .parser - .set_language(config.language) - .map_err(|_| Error::InvalidLanguage)?; - - // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) }; - let tree = ts_parser - .parser - .parse_with( - &mut |byte, _| { - if byte <= source.len_bytes() { - let (chunk, start_byte, _, _) = source.chunk_at_byte(byte); - chunk[byte - start_byte..].as_bytes() - } else { - // out of range - &[] - } - }, - self.tree.as_ref(), - ) - .ok_or(Error::Cancelled)?; + fn parse(&mut self, parser: &mut Parser, source: &Rope) -> Result<(), Error> { + parser.set_included_ranges(&self.ranges).unwrap(); - self.tree = Some(tree) - } + parser + .set_language(self.config.language) + .map_err(|_| Error::InvalidLanguage)?; + + // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) }; + let tree = parser + .parse_with( + &mut |byte, _| { + if byte <= source.len_bytes() { + let (chunk, start_byte, _, _) = source.chunk_at_byte(byte); + chunk[byte - start_byte..].as_bytes() + } else { + // out of range + &[] + } + }, + self.tree.as_ref(), + ) + .ok_or(Error::Cancelled)?; + // unsafe { ts_parser.parser.set_cancellation_flag(None) }; + self.tree = Some(tree); Ok(()) } +} - pub(crate) fn generate_edits( - old_text: RopeSlice, - changeset: &ChangeSet, - ) -> Vec<tree_sitter::InputEdit> { - use Operation::*; - let mut old_pos = 0; +pub(crate) fn generate_edits( + old_text: &Rope, + changeset: &ChangeSet, +) -> Vec<tree_sitter::InputEdit> { + use Operation::*; + let mut old_pos = 0; - let mut edits = Vec::new(); + let mut edits = Vec::new(); - let mut iter = changeset.changes.iter().peekable(); + if changeset.changes.is_empty() { + return edits; + } - // TODO; this is a lot easier with Change instead of Operation. + let mut iter = changeset.changes.iter().peekable(); - fn point_at_pos(text: RopeSlice, pos: usize) -> (usize, Point) { - let byte = text.char_to_byte(pos); // <- attempted to index past end - let line = text.char_to_line(pos); - let line_start_byte = text.line_to_byte(line); - let col = byte - line_start_byte; + // TODO; this is a lot easier with Change instead of Operation. - (byte, Point::new(line, col)) - } + fn point_at_pos(text: &Rope, pos: usize) -> (usize, Point) { + let byte = text.char_to_byte(pos); // <- attempted to index past end + let line = text.char_to_line(pos); + let line_start_byte = text.line_to_byte(line); + let col = byte - line_start_byte; - fn traverse(point: Point, text: &Tendril) -> Point { - let Point { - mut row, - mut column, - } = point; - - // TODO: there should be a better way here. - let mut chars = text.chars().peekable(); - while let Some(ch) = chars.next() { - if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) { - row += 1; - column = 0; - } else { - column += 1; - } + (byte, Point::new(line, col)) + } + + fn traverse(point: Point, text: &Tendril) -> Point { + let Point { + mut row, + mut column, + } = point; + + // TODO: there should be a better way here. + let mut chars = text.chars().peekable(); + while let Some(ch) = chars.next() { + if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) { + row += 1; + column = 0; + } else { + column += 1; } - Point { row, column } } + Point { row, column } + } - while let Some(change) = iter.next() { - let len = match change { - Delete(i) | Retain(i) => *i, - Insert(_) => 0, - }; - let mut old_end = old_pos + len; + while let Some(change) = iter.next() { + let len = match change { + Delete(i) | Retain(i) => *i, + Insert(_) => 0, + }; + let mut old_end = old_pos + len; + + match change { + Retain(_) => {} + Delete(_) => { + let (start_byte, start_position) = point_at_pos(old_text, old_pos); + let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); + + // deletion + edits.push(tree_sitter::InputEdit { + start_byte, // old_pos to byte + old_end_byte, // old_end to byte + new_end_byte: start_byte, // old_pos to byte + start_position, // old pos to coords + old_end_position, // old_end to coords + new_end_position: start_position, // old pos to coords + }); + } + Insert(s) => { + let (start_byte, start_position) = point_at_pos(old_text, old_pos); - match change { - Retain(_) => {} - Delete(_) => { - let (start_byte, start_position) = point_at_pos(old_text, old_pos); + // a subsequent delete means a replace, consume it + if let Some(Delete(len)) = iter.peek() { + old_end = old_pos + len; let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); - // TODO: Position also needs to be byte based... - // let byte = char_to_byte(old_pos) - // let line = char_to_line(old_pos) - // let line_start_byte = line_to_byte() - // Position::new(line, line_start_byte - byte) + iter.next(); - // deletion + // replacement edits.push(tree_sitter::InputEdit { - start_byte, // old_pos to byte - old_end_byte, // old_end to byte - new_end_byte: start_byte, // old_pos to byte - start_position, // old pos to coords - old_end_position, // old_end to coords - new_end_position: start_position, // old pos to coords + start_byte, // old_pos to byte + old_end_byte, // old_end to byte + new_end_byte: start_byte + s.len(), // old_pos to byte + s.len() + start_position, // old pos to coords + old_end_position, // old_end to coords + new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) + }); + } else { + // insert + edits.push(tree_sitter::InputEdit { + start_byte, // old_pos to byte + old_end_byte: start_byte, // same + new_end_byte: start_byte + s.len(), // old_pos + s.len() + start_position, // old pos to coords + old_end_position: start_position, // same + new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) }); - } - Insert(s) => { - let (start_byte, start_position) = point_at_pos(old_text, old_pos); - - // a subsequent delete means a replace, consume it - if let Some(Delete(len)) = iter.peek() { - old_end = old_pos + len; - let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); - - iter.next(); - - // replacement - edits.push(tree_sitter::InputEdit { - start_byte, // old_pos to byte - old_end_byte, // old_end to byte - new_end_byte: start_byte + s.len(), // old_pos to byte + s.len() - start_position, // old pos to coords - old_end_position, // old_end to coords - new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) - }); - } else { - // insert - edits.push(tree_sitter::InputEdit { - start_byte, // old_pos to byte - old_end_byte: start_byte, // same - new_end_byte: start_byte + s.len(), // old_pos + s.len() - start_position, // old pos to coords - old_end_position: start_position, // same - new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) - }); - } } } - old_pos = old_end; } - edits - } - - fn update( - &mut self, - ts_parser: &mut TsParser, - config: &HighlightConfiguration, - old_source: &Rope, - source: &Rope, - changeset: &ChangeSet, - ) -> Result<(), Error> { - if changeset.is_empty() { - return Ok(()); - } - - let edits = Self::generate_edits(old_source.slice(..), changeset); - - // Notify the tree about all the changes - for edit in edits.iter().rev() { - // apply the edits in reverse. If we applied them in order then edit 1 would disrupt - // the positioning of edit 2 - self.tree.as_mut().unwrap().edit(edit); - } - - self.parse( - ts_parser, - config, - source, - 0, - // TODO: what to do about this range on update - vec![Range { - start_byte: 0, - end_byte: usize::MAX, - start_point: Point::new(0, 0), - end_point: Point::new(usize::MAX, usize::MAX), - }], - ) + old_pos = old_end; } - - // fn highlight_iter() -> same as Mode but for this layer. Mode composits these - // fn buffer_changed - // fn update(range) - // fn update_injections() + edits } -// -- refactored from tree-sitter-highlight to be able to retain state -// TODO: add seek() to iter - -// problem: any time a layer is updated it must update it's injections on the parent (potentially -// removing some from use) -// can't modify to vec and exist in it at the same time since that would violate borrows -// maybe we can do with an arena -// maybe just caching on the top layer and nevermind the injections for now? -// -// Grammar { -// layers: Vec<Box<Layer>> to prevent memory moves when vec is modified -// } -// injections tracked by marker: -// if marker areas match it's fine and update -// if not found add new layer -// if length 0 then area got removed, clean up the layer -// -// layer update: -// if range.len = 0 then remove the layer -// for change in changes { tree.edit(change) } -// tree = parser.parse(.., tree, ..) -// calculate affected range and update injections -// injection update: -// look for existing injections -// if present, range = (first injection start, last injection end) -// -// For now cheat and just throw out non-root layers if they exist. This should still improve -// parsing in majority of cases. - use std::sync::atomic::{AtomicUsize, Ordering}; use std::{iter, mem, ops, str, usize}; use tree_sitter::{ @@ -864,8 +1028,8 @@ pub enum HighlightEvent { pub struct HighlightConfiguration { pub language: Grammar, pub query: Query, + injections_query: Query, combined_injections_query: Option<Query>, - locals_pattern_index: usize, highlights_pattern_index: usize, highlight_indices: ArcSwap<Vec<Option<Highlight>>>, non_local_variable_patterns: Vec<bool>, @@ -892,13 +1056,9 @@ struct LocalScope<'a> { } #[derive(Debug)] -struct HighlightIter<'a, F> -where - F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, -{ +struct HighlightIter<'a> { source: RopeSlice<'a>, byte_offset: usize, - injection_callback: F, cancellation_flag: Option<&'a AtomicUsize>, layers: Vec<HighlightIterLayer<'a>>, iter_count: usize, @@ -938,8 +1098,8 @@ struct HighlightIterLayer<'a> { config: &'a HighlightConfiguration, highlight_end_stack: Vec<usize>, scope_stack: Vec<LocalScope<'a>>, - ranges: Vec<Range>, depth: usize, + ranges: &'a [Range], } impl<'a> fmt::Debug for HighlightIterLayer<'a> { @@ -971,38 +1131,32 @@ impl HighlightConfiguration { ) -> Result<Self, QueryError> { // Concatenate the query strings, keeping track of the start offset of each section. let mut query_source = String::new(); - query_source.push_str(injection_query); - let locals_query_offset = query_source.len(); query_source.push_str(locals_query); let highlights_query_offset = query_source.len(); query_source.push_str(highlights_query); // Construct a single query by concatenating the three query strings, but record the // range of pattern indices that belong to each individual string. - let mut query = Query::new(language, &query_source)?; - let mut locals_pattern_index = 0; + let query = Query::new(language, &query_source)?; let mut highlights_pattern_index = 0; for i in 0..(query.pattern_count()) { let pattern_offset = query.start_byte_for_pattern(i); if pattern_offset < highlights_query_offset { - if pattern_offset < highlights_query_offset { - highlights_pattern_index += 1; - } - if pattern_offset < locals_query_offset { - locals_pattern_index += 1; - } + highlights_pattern_index += 1; } } + let mut injections_query = Query::new(language, injection_query)?; + // Construct a separate query just for dealing with the 'combined injections'. // Disable the combined injection patterns in the main query. let mut combined_injections_query = Query::new(language, injection_query)?; let mut has_combined_queries = false; - for pattern_index in 0..locals_pattern_index { - let settings = query.property_settings(pattern_index); + for pattern_index in 0..injections_query.pattern_count() { + let settings = injections_query.property_settings(pattern_index); if settings.iter().any(|s| &*s.key == "injection.combined") { has_combined_queries = true; - query.disable_pattern(pattern_index); + injections_query.disable_pattern(pattern_index); } else { combined_injections_query.disable_pattern(pattern_index); } @@ -1034,8 +1188,6 @@ impl HighlightConfiguration { for (i, name) in query.capture_names().iter().enumerate() { let i = Some(i as u32); match name.as_str() { - "injection.content" => injection_content_capture_index = i, - "injection.language" => injection_language_capture_index = i, "local.definition" => local_def_capture_index = i, "local.definition-value" => local_def_value_capture_index = i, "local.reference" => local_ref_capture_index = i, @@ -1044,12 +1196,21 @@ impl HighlightConfiguration { } } + for (i, name) in injections_query.capture_names().iter().enumerate() { + let i = Some(i as u32); + match name.as_str() { + "injection.content" => injection_content_capture_index = i, + "injection.language" => injection_language_capture_index = i, + _ => {} + } + } + let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]); Ok(Self { language, query, + injections_query, combined_injections_query, - locals_pattern_index, highlights_pattern_index, highlight_indices, non_local_variable_patterns, @@ -1114,238 +1275,6 @@ impl HighlightConfiguration { } impl<'a> HighlightIterLayer<'a> { - /// Create a new 'layer' of highlighting for this document. - /// - /// In the even that the new layer contains "combined injections" (injections where multiple - /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and - /// added to the returned vector. - fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>( - source: RopeSlice<'a>, - cancellation_flag: Option<&'a AtomicUsize>, - injection_callback: &mut F, - mut config: &'a HighlightConfiguration, - mut depth: usize, - mut ranges: Vec<Range>, - ) -> Result<Vec<Self>, Error> { - let mut result = Vec::with_capacity(1); - let mut queue = Vec::new(); - loop { - // --> Tree parsing part - - PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - - if highlighter.parser.set_included_ranges(&ranges).is_ok() { - highlighter - .parser - .set_language(config.language) - .map_err(|_| Error::InvalidLanguage)?; - - unsafe { highlighter.parser.set_cancellation_flag(cancellation_flag) }; - let tree = highlighter - .parser - .parse_with( - &mut |byte, _| { - if byte <= source.len_bytes() { - let (chunk, start_byte, _, _) = source.chunk_at_byte(byte); - chunk[byte - start_byte..].as_bytes() - } else { - // out of range - &[] - } - }, - None, - ) - .ok_or(Error::Cancelled)?; - unsafe { highlighter.parser.set_cancellation_flag(None) }; - let mut cursor = highlighter.cursors.pop().unwrap_or_else(QueryCursor::new); - - // Process combined injections. - if let Some(combined_injections_query) = &config.combined_injections_query { - let mut injections_by_pattern_index = vec![ - (None, Vec::new(), false); - combined_injections_query - .pattern_count() - ]; - let matches = cursor.matches( - combined_injections_query, - tree.root_node(), - RopeProvider(source), - ); - for mat in matches { - let entry = &mut injections_by_pattern_index[mat.pattern_index]; - let (language_name, content_node, include_children) = - injection_for_match( - config, - combined_injections_query, - &mat, - source, - ); - if language_name.is_some() { - entry.0 = language_name; - } - if let Some(content_node) = content_node { - entry.1.push(content_node); - } - entry.2 = include_children; - } - for (lang_name, content_nodes, includes_children) in - injections_by_pattern_index - { - if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) - { - if let Some(next_config) = (injection_callback)(&lang_name) { - let ranges = Self::intersect_ranges( - &ranges, - &content_nodes, - includes_children, - ); - if !ranges.is_empty() { - queue.push((next_config, depth + 1, ranges)); - } - } - } - } - } - - // --> Highlighting query part - - // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which - // prevents them from being moved. But both of these values are really just - // pointers, so it's actually ok to move them. - let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(&tree) }; - let cursor_ref = - unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; - let captures = cursor_ref - .captures(&config.query, tree_ref.root_node(), RopeProvider(source)) - .peekable(); - - result.push(HighlightIterLayer { - highlight_end_stack: Vec::new(), - scope_stack: vec![LocalScope { - inherits: false, - range: 0..usize::MAX, - local_defs: Vec::new(), - }], - cursor, - depth, - _tree: Some(tree), - captures, - config, - ranges, - }); - } - - Ok(()) // so we can use the try operator - })?; - - if queue.is_empty() { - break; - } - - let (next_config, next_depth, next_ranges) = queue.remove(0); - config = next_config; - depth = next_depth; - ranges = next_ranges; - } - - Ok(result) - } - - // Compute the ranges that should be included when parsing an injection. - // This takes into account three things: - // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. - // * `nodes` - Every injection takes place within a set of nodes. The injection ranges - // are the ranges of those nodes. - // * `includes_children` - For some injections, the content nodes' children should be - // excluded from the nested document, so that only the content nodes' *own* content - // is reparsed. For other injections, the content nodes' entire ranges should be - // reparsed, including the ranges of their children. - fn intersect_ranges( - parent_ranges: &[Range], - nodes: &[Node], - includes_children: bool, - ) -> Vec<Range> { - let mut cursor = nodes[0].walk(); - let mut result = Vec::new(); - let mut parent_range_iter = parent_ranges.iter(); - let mut parent_range = parent_range_iter - .next() - .expect("Layers should only be constructed with non-empty ranges vectors"); - for node in nodes.iter() { - let mut preceding_range = Range { - start_byte: 0, - start_point: Point::new(0, 0), - end_byte: node.start_byte(), - end_point: node.start_position(), - }; - let following_range = Range { - start_byte: node.end_byte(), - start_point: node.end_position(), - end_byte: usize::MAX, - end_point: Point::new(usize::MAX, usize::MAX), - }; - - for excluded_range in node - .children(&mut cursor) - .filter_map(|child| { - if includes_children { - None - } else { - Some(child.range()) - } - }) - .chain([following_range].iter().cloned()) - { - let mut range = Range { - start_byte: preceding_range.end_byte, - start_point: preceding_range.end_point, - end_byte: excluded_range.start_byte, - end_point: excluded_range.start_point, - }; - preceding_range = excluded_range; - - if range.end_byte < parent_range.start_byte { - continue; - } - - while parent_range.start_byte <= range.end_byte { - if parent_range.end_byte > range.start_byte { - if range.start_byte < parent_range.start_byte { - range.start_byte = parent_range.start_byte; - range.start_point = parent_range.start_point; - } - - if parent_range.end_byte < range.end_byte { - if range.start_byte < parent_range.end_byte { - result.push(Range { - start_byte: range.start_byte, - start_point: range.start_point, - end_byte: parent_range.end_byte, - end_point: parent_range.end_point, - }); - } - range.start_byte = parent_range.end_byte; - range.start_point = parent_range.end_point; - } else { - if range.start_byte < range.end_byte { - result.push(range); - } - break; - } - } - - if let Some(next_range) = parent_range_iter.next() { - parent_range = next_range; - } else { - return result; - } - } - } - } - result - } - // First, sort scope boundaries by their byte offset in the document. At a // given position, emit scope endings before scope beginnings. Finally, emit // scope boundaries from deeper layers first. @@ -1371,10 +1300,101 @@ impl<'a> HighlightIterLayer<'a> { } } -impl<'a, F> HighlightIter<'a, F> -where - F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, -{ +// Compute the ranges that should be included when parsing an injection. +// This takes into account three things: +// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. +// * `nodes` - Every injection takes place within a set of nodes. The injection ranges +// are the ranges of those nodes. +// * `includes_children` - For some injections, the content nodes' children should be +// excluded from the nested document, so that only the content nodes' *own* content +// is reparsed. For other injections, the content nodes' entire ranges should be +// reparsed, including the ranges of their children. +fn intersect_ranges( + parent_ranges: &[Range], + nodes: &[Node], + includes_children: bool, +) -> Vec<Range> { + let mut cursor = nodes[0].walk(); + let mut result = Vec::new(); + let mut parent_range_iter = parent_ranges.iter(); + let mut parent_range = parent_range_iter + .next() + .expect("Layers should only be constructed with non-empty ranges vectors"); + for node in nodes.iter() { + let mut preceding_range = Range { + start_byte: 0, + start_point: Point::new(0, 0), + end_byte: node.start_byte(), + end_point: node.start_position(), + }; + let following_range = Range { + start_byte: node.end_byte(), + start_point: node.end_position(), + end_byte: usize::MAX, + end_point: Point::new(usize::MAX, usize::MAX), + }; + + for excluded_range in node + .children(&mut cursor) + .filter_map(|child| { + if includes_children { + None + } else { + Some(child.range()) + } + }) + .chain([following_range].iter().cloned()) + { + let mut range = Range { + start_byte: preceding_range.end_byte, + start_point: preceding_range.end_point, + end_byte: excluded_range.start_byte, + end_point: excluded_range.start_point, + }; + preceding_range = excluded_range; + + if range.end_byte < parent_range.start_byte { + continue; + } + + while parent_range.start_byte <= range.end_byte { + if parent_range.end_byte > range.start_byte { + if range.start_byte < parent_range.start_byte { + range.start_byte = parent_range.start_byte; + range.start_point = parent_range.start_point; + } + + if parent_range.end_byte < range.end_byte { + if range.start_byte < parent_range.end_byte { + result.push(Range { + start_byte: range.start_byte, + start_point: range.start_point, + end_byte: parent_range.end_byte, + end_point: parent_range.end_point, + }); + } + range.start_byte = parent_range.end_byte; + range.start_point = parent_range.end_point; + } else { + if range.start_byte < range.end_byte { + result.push(range); + } + break; + } + } + + if let Some(next_range) = parent_range_iter.next() { + parent_range = next_range; + } else { + return result; + } + } + } + } + result +} + +impl<'a> HighlightIter<'a> { fn emit_event( &mut self, offset: usize, @@ -1405,6 +1425,12 @@ where i += 1; continue; } + } else { + let layer = self.layers.remove(i + 1); + PARSER.with(|ts_parser| { + let highlighter = &mut ts_parser.borrow_mut(); + highlighter.cursors.push(layer.cursor); + }); } break; } @@ -1421,30 +1447,9 @@ where } } } - - fn insert_layer(&mut self, mut layer: HighlightIterLayer<'a>) { - if let Some(sort_key) = layer.sort_key() { - let mut i = 1; - while i < self.layers.len() { - if let Some(sort_key_i) = self.layers[i].sort_key() { - if sort_key_i > sort_key { - self.layers.insert(i, layer); - return; - } - i += 1; - } else { - self.layers.remove(i); - } - } - self.layers.push(layer); - } - } } -impl<'a, F> Iterator for HighlightIter<'a, F> -where - F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, -{ +impl<'a> Iterator for HighlightIter<'a> { type Item = Result<HighlightEvent, Error>; fn next(&mut self) -> Option<Self::Item> { @@ -1504,55 +1509,12 @@ where layer.highlight_end_stack.pop(); return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); } else { - // return self.emit_event(self.source.len(), None); - return None; + return self.emit_event(self.source.len_bytes(), None); }; let (mut match_, capture_index) = layer.captures.next().unwrap(); let mut capture = match_.captures[capture_index]; - // If this capture represents an injection, then process the injection. - if match_.pattern_index < layer.config.locals_pattern_index { - let (language_name, content_node, include_children) = - injection_for_match(layer.config, &layer.config.query, &match_, self.source); - - // Explicitly remove this match so that none of its other captures will remain - // in the stream of captures. - match_.remove(); - - // If a language is found with the given name, then add a new language layer - // to the highlighted document. - if let (Some(language_name), Some(content_node)) = (language_name, content_node) { - if let Some(config) = (self.injection_callback)(&language_name) { - let ranges = HighlightIterLayer::intersect_ranges( - &self.layers[0].ranges, - &[content_node], - include_children, - ); - if !ranges.is_empty() { - match HighlightIterLayer::new( - self.source, - self.cancellation_flag, - &mut self.injection_callback, - config, - self.layers[0].depth + 1, - ranges, - ) { - Ok(layers) => { - for layer in layers { - self.insert_layer(layer); - } - } - Err(e) => return Some(Err(e)), - } - } - } - } - - self.sort_layers(); - continue 'main; - } - // Remove from the local scope stack any local scopes that have already ended. while range.start > layer.scope_stack.last().unwrap().range.end { layer.scope_stack.pop(); @@ -1747,14 +1709,6 @@ fn injection_for_match<'a>( (language_name, content_node, include_children) } -// fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) { -// if vec.len() > capacity { -// vec.truncate(capacity); -// vec.shrink_to_fit(); -// } -// vec.clear(); -// } - pub struct Merge<I> { iter: I, spans: Box<dyn Iterator<Item = (usize, std::ops::Range<usize>)>>, @@ -1921,6 +1875,8 @@ mod test { .map(String::from) .collect(); + let loader = Loader::new(Configuration { language: vec![] }); + let language = get_language(&crate::RUNTIME_DIR, "Rust").unwrap(); let config = HighlightConfiguration::new( language, @@ -1943,7 +1899,7 @@ mod test { fn main() {} ", ); - let syntax = Syntax::new(&source, Arc::new(config)); + let syntax = Syntax::new(&source, Arc::new(config), Arc::new(loader)); let tree = syntax.tree(); let root = tree.root_node(); assert_eq!(root.kind(), "source_file"); @@ -1970,7 +1926,7 @@ mod test { &doc, vec![(6, 11, Some("test".into())), (12, 17, None)].into_iter(), ); - let edits = LanguageLayer::generate_edits(doc.slice(..), transaction.changes()); + let edits = generate_edits(&doc, transaction.changes()); // transaction.apply(&mut state); assert_eq!( @@ -1999,7 +1955,7 @@ mod test { let mut doc = Rope::from("fn test() {}"); let transaction = Transaction::change(&doc, vec![(8, 8, Some("a: u32".into()))].into_iter()); - let edits = LanguageLayer::generate_edits(doc.slice(..), transaction.changes()); + let edits = generate_edits(&doc, transaction.changes()); transaction.apply(&mut doc); assert_eq!(doc, "fn test(a: u32) {}"); diff --git a/helix-core/src/transaction.rs b/helix-core/src/transaction.rs index dfc18fbe..2e34a986 100644 --- a/helix-core/src/transaction.rs +++ b/helix-core/src/transaction.rs @@ -22,7 +22,7 @@ pub enum Assoc { } // ChangeSpec = Change | ChangeSet | Vec<Change> -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Default, Clone, PartialEq, Eq)] pub struct ChangeSet { pub(crate) changes: Vec<Operation>, /// The required document length. Will refuse to apply changes unless it matches. @@ -30,16 +30,6 @@ pub struct ChangeSet { len_after: usize, } -impl Default for ChangeSet { - fn default() -> Self { - Self { - changes: Vec::new(), - len: 0, - len_after: 0, - } - } -} - impl ChangeSet { pub fn with_capacity(capacity: usize) -> Self { Self { @@ -95,7 +85,7 @@ impl ChangeSet { let new_last = match self.changes.as_mut_slice() { [.., Insert(prev)] | [.., Insert(prev), Delete(_)] => { - prev.push_tendril(&fragment); + prev.push_str(&fragment); return; } [.., last @ Delete(_)] => std::mem::replace(last, Insert(fragment)), @@ -199,7 +189,7 @@ impl ChangeSet { // TODO: cover this with a test // figure out the byte index of the truncated string end let (pos, _) = s.char_indices().nth(j).unwrap(); - s.pop_front(pos as u32); + s.replace_range(0..pos, ""); head_a = Some(Insert(s)); head_b = changes_b.next(); } @@ -221,9 +211,11 @@ impl ChangeSet { Ordering::Greater => { // figure out the byte index of the truncated string end let (pos, _) = s.char_indices().nth(j).unwrap(); - let pos = pos as u32; - changes.insert(s.subtendril(0, pos)); - head_a = Some(Insert(s.subtendril(pos, s.len() as u32 - pos))); + let mut before = s; + let after = before.split_off(pos); + + changes.insert(before); + head_a = Some(Insert(after)); head_b = changes_b.next(); } } @@ -287,7 +279,7 @@ impl ChangeSet { } Delete(n) => { let text = Cow::from(original_doc.slice(pos..pos + *n)); - changes.insert(Tendril::from_slice(&text)); + changes.insert(Tendril::from(text.as_ref())); pos += n; } Insert(s) => { @@ -330,7 +322,7 @@ impl ChangeSet { /// `true` when the set is empty. #[inline] pub fn is_empty(&self) -> bool { - self.changes.is_empty() + self.changes.is_empty() || self.changes == [Operation::Retain(self.len)] } /// Map a position through the changes. @@ -419,7 +411,7 @@ impl ChangeSet { /// Transaction represents a single undoable unit of changes. Several changes can be grouped into /// a single transaction. -#[derive(Debug, Default, Clone)] +#[derive(Debug, Default, Clone, PartialEq, Eq)] pub struct Transaction { changes: ChangeSet, selection: Option<Selection>, @@ -720,19 +712,19 @@ mod test { #[test] fn optimized_composition() { let mut state = State::new("".into()); - let t1 = Transaction::insert(&state.doc, &state.selection, Tendril::from_char('h')); + let t1 = Transaction::insert(&state.doc, &state.selection, Tendril::from("h")); t1.apply(&mut state.doc); state.selection = state.selection.clone().map(t1.changes()); - let t2 = Transaction::insert(&state.doc, &state.selection, Tendril::from_char('e')); + let t2 = Transaction::insert(&state.doc, &state.selection, Tendril::from("e")); t2.apply(&mut state.doc); state.selection = state.selection.clone().map(t2.changes()); - let t3 = Transaction::insert(&state.doc, &state.selection, Tendril::from_char('l')); + let t3 = Transaction::insert(&state.doc, &state.selection, Tendril::from("l")); t3.apply(&mut state.doc); state.selection = state.selection.clone().map(t3.changes()); - let t4 = Transaction::insert(&state.doc, &state.selection, Tendril::from_char('l')); + let t4 = Transaction::insert(&state.doc, &state.selection, Tendril::from("l")); t4.apply(&mut state.doc); state.selection = state.selection.clone().map(t4.changes()); - let t5 = Transaction::insert(&state.doc, &state.selection, Tendril::from_char('o')); + let t5 = Transaction::insert(&state.doc, &state.selection, Tendril::from("o")); t5.apply(&mut state.doc); state.selection = state.selection.clone().map(t5.changes()); @@ -771,7 +763,7 @@ mod test { #[test] fn combine_with_utf8() { - const TEST_CASE: &'static str = "Hello, γγγ―γγͺγγ―γΉγ¨γγ£γΏγΌγ§γοΌ"; + const TEST_CASE: &str = "Hello, γγγ―γγͺγγ―γΉγ¨γγ£γΏγΌγ§γοΌ"; let empty = Rope::from(""); let a = ChangeSet::new(&empty); |