diff options
Diffstat (limited to 'helix-core/src')
-rw-r--r-- | helix-core/src/auto_pairs.rs | 255 | ||||
-rw-r--r-- | helix-core/src/diff.rs | 4 | ||||
-rw-r--r-- | helix-core/src/graphemes.rs | 86 | ||||
-rw-r--r-- | helix-core/src/increment/date_time.rs | 32 | ||||
-rw-r--r-- | helix-core/src/indent.rs | 2 | ||||
-rw-r--r-- | helix-core/src/object.rs | 90 | ||||
-rw-r--r-- | helix-core/src/syntax.rs | 1237 |
7 files changed, 961 insertions, 745 deletions
diff --git a/helix-core/src/auto_pairs.rs b/helix-core/src/auto_pairs.rs index 1b3de6ea..9d1152bc 100644 --- a/helix-core/src/auto_pairs.rs +++ b/helix-core/src/auto_pairs.rs @@ -1,7 +1,9 @@ //! When typing the opening character of one of the possible pairs defined below, //! this module provides the functionality to insert the paired closing character. -use crate::{movement::Direction, Range, Rope, Selection, Tendril, Transaction}; +use crate::{ + graphemes, movement::Direction, Range, Rope, RopeGraphemes, Selection, Tendril, Transaction, +}; use log::debug; use smallvec::SmallVec; @@ -63,31 +65,132 @@ fn prev_char(doc: &Rope, pos: usize) -> Option<char> { doc.get_char(pos - 1) } +fn is_single_grapheme(doc: &Rope, range: &Range) -> bool { + let mut graphemes = RopeGraphemes::new(doc.slice(range.from()..range.to())); + let first = graphemes.next(); + let second = graphemes.next(); + debug!("first: {:#?}, second: {:#?}", first, second); + first.is_some() && second.is_none() +} + /// calculate what the resulting range should be for an auto pair insertion fn get_next_range( + doc: &Rope, start_range: &Range, offset: usize, typed_char: char, len_inserted: usize, ) -> Range { - let end_head = start_range.head + offset + typed_char.len_utf8(); + // When the character under the cursor changes due to complete pair + // insertion, we must look backward a grapheme and then add the length + // of the insertion to put the resulting cursor in the right place, e.g. + // + // foo[\r\n] - anchor: 3, head: 5 + // foo([)]\r\n - anchor: 4, head: 5 + // + // foo[\r\n] - anchor: 3, head: 5 + // foo'[\r\n] - anchor: 4, head: 6 + // + // foo([)]\r\n - anchor: 4, head: 5 + // foo()[\r\n] - anchor: 5, head: 7 + // + // [foo]\r\n - anchor: 0, head: 3 + // [foo(])\r\n - anchor: 0, head: 5 + + // inserting at the very end of the document after the last newline + if start_range.head == doc.len_chars() && start_range.anchor == doc.len_chars() { + return Range::new( + start_range.anchor + offset + typed_char.len_utf8(), + start_range.head + offset + typed_char.len_utf8(), + ); + } + + let single_grapheme = is_single_grapheme(doc, start_range); + let doc_slice = doc.slice(..); + + // just skip over graphemes + if len_inserted == 0 { + let end_anchor = if single_grapheme { + graphemes::next_grapheme_boundary(doc_slice, start_range.anchor) + offset + + // even for backward inserts with multiple grapheme selections, + // we want the anchor to stay where it is so that the relative + // selection does not change, e.g.: + // + // foo([) wor]d -> insert ) -> foo()[ wor]d + } else { + start_range.anchor + offset + }; + + return Range::new( + end_anchor, + graphemes::next_grapheme_boundary(doc_slice, start_range.head) + offset, + ); + } + + // trivial case: only inserted a single-char opener, just move the selection + if len_inserted == 1 { + let end_anchor = if single_grapheme || start_range.direction() == Direction::Backward { + start_range.anchor + offset + typed_char.len_utf8() + } else { + start_range.anchor + offset + }; + + return Range::new( + end_anchor, + start_range.head + offset + typed_char.len_utf8(), + ); + } + + // If the head = 0, then we must be in insert mode with a backward + // cursor, which implies the head will just move + let end_head = if start_range.head == 0 || start_range.direction() == Direction::Backward { + start_range.head + offset + typed_char.len_utf8() + } else { + // We must have a forward cursor, which means we must move to the + // other end of the grapheme to get to where the new characters + // are inserted, then move the head to where it should be + let prev_bound = graphemes::prev_grapheme_boundary(doc_slice, start_range.head); + debug!( + "prev_bound: {}, offset: {}, len_inserted: {}", + prev_bound, offset, len_inserted + ); + prev_bound + offset + len_inserted + }; let end_anchor = match (start_range.len(), start_range.direction()) { // if we have a zero width cursor, it shifts to the same number (0, _) => end_head, - // if we are inserting for a regular one-width cursor, the anchor - // moves with the head + // If we are inserting for a regular one-width cursor, the anchor + // moves with the head. This is the fast path for ASCII. (1, Direction::Forward) => end_head - 1, (1, Direction::Backward) => end_head + 1, - // if we are appending, the anchor stays where it is; only offset - // for multiple range insertions - (_, Direction::Forward) => start_range.anchor + offset, + (_, Direction::Forward) => { + if single_grapheme { + graphemes::prev_grapheme_boundary(doc.slice(..), start_range.head) + + typed_char.len_utf8() - // when we are inserting in front of a selection, we need to move - // the anchor over by however many characters were inserted overall - (_, Direction::Backward) => start_range.anchor + offset + len_inserted, + // if we are appending, the anchor stays where it is; only offset + // for multiple range insertions + } else { + start_range.anchor + offset + } + } + + (_, Direction::Backward) => { + if single_grapheme { + // if we're backward, then the head is at the first char + // of the typed char, so we need to add the length of + // the closing char + graphemes::prev_grapheme_boundary(doc.slice(..), start_range.anchor) + len_inserted + } else { + // when we are inserting in front of a selection, we need to move + // the anchor over by however many characters were inserted overall + start_range.anchor + offset + len_inserted + } + } }; Range::new(end_anchor, end_head) @@ -122,7 +225,7 @@ fn handle_open( } }; - let next_range = get_next_range(start_range, offs, open, len_inserted); + let next_range = get_next_range(doc, start_range, offs, open, len_inserted); end_ranges.push(next_range); offs += len_inserted; @@ -152,7 +255,7 @@ fn handle_close(doc: &Rope, selection: &Selection, _open: char, close: char) -> (cursor, cursor, Some(Tendril::from_char(close))) }; - let next_range = get_next_range(start_range, offs, close, len_inserted); + let next_range = get_next_range(doc, start_range, offs, close, len_inserted); end_ranges.push(next_range); offs += len_inserted; @@ -202,7 +305,7 @@ fn handle_same( (cursor, cursor, Some(pair)) }; - let next_range = get_next_range(start_range, offs, token, len_inserted); + let next_range = get_next_range(doc, start_range, offs, token, len_inserted); end_ranges.push(next_range); offs += len_inserted; @@ -219,6 +322,8 @@ mod test { use super::*; use smallvec::smallvec; + const LINE_END: &'static str = crate::DEFAULT_LINE_ENDING.as_str(); + fn differing_pairs() -> impl Iterator<Item = &'static (char, char)> { PAIRS.iter().filter(|(open, close)| open != close) } @@ -270,12 +375,59 @@ mod test { #[test] fn test_insert_blank() { test_hooks_with_pairs( - &Rope::new(), + &Rope::from(LINE_END), &Selection::single(1, 0), PAIRS, - |open, close| format!("{}{}", open, close), + |open, close| format!("{}{}{}", open, close, LINE_END), &Selection::single(2, 1), ); + + let empty_doc = Rope::from(format!("{line_end}{line_end}", line_end = LINE_END)); + + test_hooks_with_pairs( + &empty_doc, + &Selection::single(empty_doc.len_chars(), LINE_END.len()), + PAIRS, + |open, close| { + format!( + "{line_end}{open}{close}{line_end}", + open = open, + close = close, + line_end = LINE_END + ) + }, + &Selection::single(LINE_END.len() + 2, LINE_END.len() + 1), + ); + } + + #[test] + fn test_insert_before_multi_code_point_graphemes() { + test_hooks_with_pairs( + &Rope::from(format!("hello ๐จโ๐ฉโ๐งโ๐ฆ goodbye{}", LINE_END)), + &Selection::single(13, 6), + PAIRS, + |open, _| format!("hello {}๐จโ๐ฉโ๐งโ๐ฆ goodbye{}", open, LINE_END), + &Selection::single(14, 7), + ); + } + + #[test] + fn test_insert_at_end_of_document() { + test_hooks_with_pairs( + &Rope::from(LINE_END), + &Selection::single(LINE_END.len(), LINE_END.len()), + PAIRS, + |open, close| format!("{}{}{}", LINE_END, open, close), + &Selection::single(LINE_END.len() + 1, LINE_END.len() + 1), + ); + + test_hooks_with_pairs( + &Rope::from(format!("foo{}", LINE_END)), + &Selection::single(3 + LINE_END.len(), 3 + LINE_END.len()), + PAIRS, + |open, close| format!("foo{}{}{}", LINE_END, open, close), + &Selection::single(LINE_END.len() + 4, LINE_END.len() + 4), + ); } /// [] -> append ( -> ([]) @@ -283,11 +435,20 @@ mod test { fn test_append_blank() { test_hooks_with_pairs( // this is what happens when you have a totally blank document and then append - &Rope::from("\n\n"), - &Selection::single(0, 2), + &Rope::from(format!("{line_end}{line_end}", line_end = LINE_END)), + // before inserting the pair, the cursor covers all of both empty lines + &Selection::single(0, LINE_END.len() * 2), PAIRS, - |open, close| format!("\n{}{}\n", open, close), - &Selection::single(0, 3), + |open, close| { + format!( + "{line_end}{open}{close}{line_end}", + line_end = LINE_END, + open = open, + close = close + ) + }, + // after inserting pair, the cursor covers the first new line and the open char + &Selection::single(0, LINE_END.len() + 2), ); } @@ -329,6 +490,18 @@ mod test { ); } + /// foo[] -> append to end of line ( -> foo([]) + #[test] + fn test_append_single_cursor() { + test_hooks_with_pairs( + &Rope::from(format!("foo{}", LINE_END)), + &Selection::single(3, 3 + LINE_END.len()), + differing_pairs(), + |open, close| format!("foo{}{}{}", open, close, LINE_END), + &Selection::single(4, 5), + ); + } + /// fo[o] fo[o(]) /// fo[o] -> append ( -> fo[o(]) /// fo[o] fo[o(]) @@ -355,18 +528,18 @@ mod test { ); } - /// ([]) -> insert ) -> ()[] + /// ([)] -> insert ) -> ()[] #[test] fn test_insert_close_inside_pair() { for (open, close) in PAIRS { - let doc = Rope::from(format!("{}{}", open, close)); + let doc = Rope::from(format!("{}{}{}", open, close, LINE_END)); test_hooks( &doc, &Selection::single(2, 1), *close, &doc, - &Selection::single(3, 2), + &Selection::single(2 + LINE_END.len(), 2), ); } } @@ -375,14 +548,14 @@ mod test { #[test] fn test_append_close_inside_pair() { for (open, close) in PAIRS { - let doc = Rope::from(format!("{}{}\n", open, close)); + let doc = Rope::from(format!("{}{}{}", open, close, LINE_END)); test_hooks( &doc, &Selection::single(0, 2), *close, &doc, - &Selection::single(0, 3), + &Selection::single(0, 2 + LINE_END.len()), ); } } @@ -564,6 +737,20 @@ mod test { ) } + /// foo([) wor]d -> insert ) -> foo()[ wor]d + #[test] + fn test_insert_close_inside_pair_trailing_word_with_selection() { + for (open, close) in differing_pairs() { + test_hooks( + &Rope::from(format!("foo{}{} word{}", open, close, LINE_END)), + &Selection::single(9, 4), + *close, + &Rope::from(format!("foo{}{} word{}", open, close, LINE_END)), + &Selection::single(9, 5), + ) + } + } + /// we want pairs that are *not* the same char to be inserted after /// a non-pair char, for cases like functions, but for pairs that are /// the same char, we want to *not* insert a pair to handle cases like "I'm" @@ -572,7 +759,7 @@ mod test { /// word[] -> insert ' -> word'[] #[test] fn test_insert_open_after_non_pair() { - let doc = Rope::from("word"); + let doc = Rope::from(format!("word{}", LINE_END)); let sel = Selection::single(5, 4); let expected_sel = Selection::single(6, 5); @@ -580,7 +767,7 @@ mod test { &doc, &sel, differing_pairs(), - |open, close| format!("word{}{}", open, close), + |open, close| format!("word{}{}{}", open, close, LINE_END), &expected_sel, ); @@ -588,22 +775,8 @@ mod test { &doc, &sel, matching_pairs(), - |open, _| format!("word{}", open), + |open, _| format!("word{}{}", open, LINE_END), &expected_sel, ); } - - /// appending with only a cursor should stay a cursor - /// - /// [] -> append to end "foo -> "foo[]" - #[test] - fn test_append_single_cursor() { - test_hooks_with_pairs( - &Rope::from("\n"), - &Selection::single(0, 1), - PAIRS, - |open, close| format!("{}{}\n", open, close), - &Selection::single(1, 2), - ); - } } diff --git a/helix-core/src/diff.rs b/helix-core/src/diff.rs index a83db333..3349213d 100644 --- a/helix-core/src/diff.rs +++ b/helix-core/src/diff.rs @@ -11,10 +11,6 @@ pub fn compare_ropes(old: &Rope, new: &Rope) -> Transaction { // A timeout is set so after 1 seconds, the algorithm will start // approximating. This is especially important for big `Rope`s or // `Rope`s that are extremely dissimilar to each other. - // - // Note: Ignore the clippy warning, as the trait bounds of - // `Transaction::change()` require an iterator implementing - // `ExactIterator`. let mut config = similar::TextDiff::configure(); config.timeout(std::time::Duration::from_secs(1)); diff --git a/helix-core/src/graphemes.rs b/helix-core/src/graphemes.rs index c6398875..aa898684 100644 --- a/helix-core/src/graphemes.rs +++ b/helix-core/src/graphemes.rs @@ -120,6 +120,43 @@ pub fn nth_next_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) - chunk_char_idx + tmp } +#[must_use] +pub fn nth_next_grapheme_boundary_byte(slice: RopeSlice, mut byte_idx: usize, n: usize) -> usize { + // Bounds check + debug_assert!(byte_idx <= slice.len_bytes()); + + // Get the chunk with our byte index in it. + let (mut chunk, mut chunk_byte_idx, mut _chunk_char_idx, _) = slice.chunk_at_byte(byte_idx); + + // Set up the grapheme cursor. + let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true); + + // Find the nth next grapheme cluster boundary. + for _ in 0..n { + loop { + match gc.next_boundary(chunk, chunk_byte_idx) { + Ok(None) => return slice.len_bytes(), + Ok(Some(n)) => { + byte_idx = n; + break; + } + Err(GraphemeIncomplete::NextChunk) => { + chunk_byte_idx += chunk.len(); + let (a, _, _c, _) = slice.chunk_at_byte(chunk_byte_idx); + chunk = a; + // chunk_char_idx = c; + } + Err(GraphemeIncomplete::PreContext(n)) => { + let ctx_chunk = slice.chunk_at_byte(n - 1).0; + gc.provide_context(ctx_chunk, n - ctx_chunk.len()); + } + _ => unreachable!(), + } + } + } + byte_idx +} + /// Finds the next grapheme boundary after the given char position. #[must_use] #[inline(always)] @@ -127,6 +164,13 @@ pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize { nth_next_grapheme_boundary(slice, char_idx, 1) } +/// Finds the next grapheme boundary after the given byte position. +#[must_use] +#[inline(always)] +pub fn next_grapheme_boundary_byte(slice: RopeSlice, byte_idx: usize) -> usize { + nth_next_grapheme_boundary_byte(slice, byte_idx, 1) +} + /// Returns the passed char index if it's already a grapheme boundary, /// or the next grapheme boundary char index if not. #[must_use] @@ -151,6 +195,23 @@ pub fn ensure_grapheme_boundary_prev(slice: RopeSlice, char_idx: usize) -> usize } } +/// Returns the passed byte index if it's already a grapheme boundary, +/// or the next grapheme boundary byte index if not. +#[must_use] +#[inline] +pub fn ensure_grapheme_boundary_next_byte(slice: RopeSlice, byte_idx: usize) -> usize { + if byte_idx == 0 { + byte_idx + } else { + // TODO: optimize so we're not constructing grapheme cursor twice + if is_grapheme_boundary_byte(slice, byte_idx) { + byte_idx + } else { + next_grapheme_boundary_byte(slice, byte_idx) + } + } +} + /// Returns whether the given char position is a grapheme boundary. #[must_use] pub fn is_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> bool { @@ -179,6 +240,31 @@ pub fn is_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> bool { } } +/// Returns whether the given byte position is a grapheme boundary. +#[must_use] +pub fn is_grapheme_boundary_byte(slice: RopeSlice, byte_idx: usize) -> bool { + // Bounds check + debug_assert!(byte_idx <= slice.len_bytes()); + + // Get the chunk with our byte index in it. + let (chunk, chunk_byte_idx, _, _) = slice.chunk_at_byte(byte_idx); + + // Set up the grapheme cursor. + let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true); + + // Determine if the given position is a grapheme cluster boundary. + loop { + match gc.is_boundary(chunk, chunk_byte_idx) { + Ok(n) => return n, + Err(GraphemeIncomplete::PreContext(n)) => { + let (ctx_chunk, ctx_byte_start, _, _) = slice.chunk_at_byte(n - 1); + gc.provide_context(ctx_chunk, ctx_byte_start); + } + Err(_) => unreachable!(), + } + } +} + /// An iterator over the graphemes of a `RopeSlice`. #[derive(Clone)] pub struct RopeGraphemes<'a> { diff --git a/helix-core/src/increment/date_time.rs b/helix-core/src/increment/date_time.rs index e3cfe107..1703c3ba 100644 --- a/helix-core/src/increment/date_time.rs +++ b/helix-core/src/increment/date_time.rs @@ -195,82 +195,82 @@ struct DateField { impl DateField { fn from_specifier(specifier: &str) -> Option<Self> { match specifier { - "Y" => Some(DateField { + "Y" => Some(Self { regex: r"\d{4}", unit: DateUnit::Years, max_len: 5, }), - "y" => Some(DateField { + "y" => Some(Self { regex: r"\d\d", unit: DateUnit::Years, max_len: 2, }), - "m" => Some(DateField { + "m" => Some(Self { regex: r"[0-1]\d", unit: DateUnit::Months, max_len: 2, }), - "d" => Some(DateField { + "d" => Some(Self { regex: r"[0-3]\d", unit: DateUnit::Days, max_len: 2, }), - "-d" => Some(DateField { + "-d" => Some(Self { regex: r"[1-3]?\d", unit: DateUnit::Days, max_len: 2, }), - "a" => Some(DateField { + "a" => Some(Self { regex: r"Sun|Mon|Tue|Wed|Thu|Fri|Sat", unit: DateUnit::Days, max_len: 3, }), - "A" => Some(DateField { + "A" => Some(Self { regex: r"Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday", unit: DateUnit::Days, max_len: 9, }), - "b" | "h" => Some(DateField { + "b" | "h" => Some(Self { regex: r"Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec", unit: DateUnit::Months, max_len: 3, }), - "B" => Some(DateField { + "B" => Some(Self { regex: r"January|February|March|April|May|June|July|August|September|October|November|December", unit: DateUnit::Months, max_len: 9, }), - "H" => Some(DateField { + "H" => Some(Self { regex: r"[0-2]\d", unit: DateUnit::Hours, max_len: 2, }), - "M" => Some(DateField { + "M" => Some(Self { regex: r"[0-5]\d", unit: DateUnit::Minutes, max_len: 2, }), - "S" => Some(DateField { + "S" => Some(Self { regex: r"[0-5]\d", unit: DateUnit::Seconds, max_len: 2, }), - "I" => Some(DateField { + "I" => Some(Self { regex: r"[0-1]\d", unit: DateUnit::Hours, max_len: 2, }), - "-I" => Some(DateField { + "-I" => Some(Self { regex: r"1?\d", unit: DateUnit::Hours, max_len: 2, }), - "P" => Some(DateField { + "P" => Some(Self { regex: r"am|pm", unit: DateUnit::AmPm, max_len: 2, }), - "p" => Some(DateField { + "p" => Some(Self { regex: r"AM|PM", unit: DateUnit::AmPm, max_len: 2, diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index 1fc2b8a5..ac2a1208 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -454,7 +454,7 @@ where let language_config = loader.language_config_for_scope("source.rust").unwrap(); let highlight_config = language_config.highlight_config(&[]).unwrap(); - let syntax = Syntax::new(&doc, highlight_config.clone()); + let syntax = Syntax::new(&doc, highlight_config.clone(), std::sync::Arc::new(loader)); let text = doc.slice(..); let tab_width = 4; diff --git a/helix-core/src/object.rs b/helix-core/src/object.rs index 3363e20b..b06f4144 100644 --- a/helix-core/src/object.rs +++ b/helix-core/src/object.rs @@ -1,56 +1,72 @@ use crate::{Range, RopeSlice, Selection, Syntax}; +use tree_sitter::Node; -pub fn expand_selection(syntax: &Syntax, text: RopeSlice, selection: &Selection) -> Selection { - let tree = syntax.tree(); - - selection.clone().transform(|range| { - let from = text.char_to_byte(range.from()); - let to = text.char_to_byte(range.to()); +pub fn expand_selection(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection { + select_node_impl(syntax, text, selection, |descendant, from, to| { + if descendant.start_byte() == from && descendant.end_byte() == to { + descendant.parent() + } else { + Some(descendant) + } + }) +} - // find parent of a descendant that matches the range - let parent = match tree - .root_node() - .descendant_for_byte_range(from, to) - .and_then(|node| { - if node.start_byte() == from && node.end_byte() == to { - node.parent() - } else { - Some(node) - } - }) { - Some(parent) => parent, - None => return range, - }; +pub fn shrink_selection(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection { + select_node_impl(syntax, text, selection, |descendant, _from, _to| { + descendant.child(0).or(Some(descendant)) + }) +} - let from = text.byte_to_char(parent.start_byte()); - let to = text.byte_to_char(parent.end_byte()); +pub fn select_sibling<F>( + syntax: &Syntax, + text: RopeSlice, + selection: Selection, + sibling_fn: &F, +) -> Selection +where + F: Fn(Node) -> Option<Node>, +{ + select_node_impl(syntax, text, selection, |descendant, _from, _to| { + find_sibling_recursive(descendant, sibling_fn) + }) +} - if range.head < range.anchor { - Range::new(to, from) - } else { - Range::new(from, to) - } +fn find_sibling_recursive<F>(node: Node, sibling_fn: F) -> Option<Node> +where + F: Fn(Node) -> Option<Node>, +{ + sibling_fn(node).or_else(|| { + node.parent() + .and_then(|node| find_sibling_recursive(node, sibling_fn)) }) } -pub fn shrink_selection(syntax: &Syntax, text: RopeSlice, selection: &Selection) -> Selection { +fn select_node_impl<F>( + syntax: &Syntax, + text: RopeSlice, + selection: Selection, + select_fn: F, +) -> Selection +where + F: Fn(Node, usize, usize) -> Option<Node>, +{ let tree = syntax.tree(); - selection.clone().transform(|range| { + selection.transform(|range| { let from = text.char_to_byte(range.from()); let to = text.char_to_byte(range.to()); - let descendant = match tree.root_node().descendant_for_byte_range(from, to) { - // find first child, if not possible, fallback to the node that contains selection - Some(descendant) => match descendant.child(0) { - Some(child) => child, - None => descendant, - }, + let node = match tree + .root_node() + .descendant_for_byte_range(from, to) + .and_then(|node| select_fn(node, from, to)) + { + Some(node) => node, None => return range, }; - let from = text.byte_to_char(descendant.start_byte()); - let to = text.byte_to_char(descendant.end_byte()); + let from = text.byte_to_char(node.start_byte()); + let to = text.byte_to_char(node.end_byte()); if range.head < range.anchor { Range::new(to, from) diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 5d37c219..d6ec7610 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -8,12 +8,13 @@ use crate::{ pub use helix_syntax::get_language; -use arc_swap::ArcSwap; +use arc_swap::{ArcSwap, Guard}; +use slotmap::{DefaultKey as LayerId, HopSlotMap}; use std::{ borrow::Cow, cell::RefCell, - collections::{HashMap, HashSet}, + collections::{HashMap, HashSet, VecDeque}, fmt, path::Path, sync::Arc, @@ -95,6 +96,7 @@ pub struct LanguageServerConfiguration { #[serde(default)] #[serde(skip_serializing_if = "Vec::is_empty")] pub args: Vec<String>, + pub language_id: Option<String>, } #[derive(Debug, Serialize, Deserialize)] @@ -207,12 +209,9 @@ impl LanguageConfiguration { &highlights_query, &injections_query, &locals_query, - ); + ) + .unwrap(); // TODO: avoid panic - let config = match config { - Ok(config) => config, - Err(err) => panic!("{}", err), - }; // TODO: avoid panic config.configure(scopes); Some(Arc::new(config)) } @@ -262,12 +261,16 @@ impl LanguageConfiguration { } } +// Expose loader as Lazy<> global since it's always static? + #[derive(Debug)] pub struct Loader { // highlight_names ? language_configs: Vec<Arc<LanguageConfiguration>>, language_config_ids_by_file_type: HashMap<String, usize>, // Vec<usize> language_config_ids_by_shebang: HashMap<String, usize>, + + scopes: ArcSwap<Vec<String>>, } impl Loader { @@ -276,6 +279,7 @@ impl Loader { language_configs: Vec::new(), language_config_ids_by_file_type: HashMap::new(), language_config_ids_by_shebang: HashMap::new(), + scopes: ArcSwap::from_pointee(Vec::new()), }; for config in config.language { @@ -361,8 +365,22 @@ impl Loader { } None } - pub fn language_configs_iter(&self) -> impl Iterator<Item = &Arc<LanguageConfiguration>> { - self.language_configs.iter() + + pub fn set_scopes(&self, scopes: Vec<String>) { + self.scopes.store(Arc::new(scopes)); + + // Reconfigure existing grammars + for config in self + .language_configs + .iter() + .filter(|cfg| cfg.is_highlight_initialized()) + { + config.reconfigure(&self.scopes()); + } + } + + pub fn scopes(&self) -> Guard<Arc<Vec<String>>> { + self.scopes.load() } } @@ -371,12 +389,6 @@ pub struct TsParser { cursors: Vec<QueryCursor>, } -impl fmt::Debug for TsParser { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("TsParser").finish() - } -} - // could also just use a pool, or a single instance? thread_local! { pub static PARSER: RefCell<TsParser> = RefCell::new(TsParser { @@ -387,9 +399,9 @@ thread_local! { #[derive(Debug)] pub struct Syntax { - config: Arc<HighlightConfiguration>, - - root_layer: LanguageLayer, + layers: HopSlotMap<LayerId, LanguageLayer>, + root: LayerId, + loader: Arc<Loader>, } fn byte_range_to_str(range: std::ops::Range<usize>, source: RopeSlice) -> Cow<str> { @@ -399,38 +411,34 @@ fn byte_range_to_str(range: std::ops::Range<usize>, source: RopeSlice) -> Cow<st } impl Syntax { - // buffer, grammar, config, grammars, sync_timeout? - pub fn new( - /*language: Lang,*/ source: &Rope, - config: Arc<HighlightConfiguration>, - ) -> Self { - let root_layer = LanguageLayer { tree: None }; + pub fn new(source: &Rope, config: Arc<HighlightConfiguration>, loader: Arc<Loader>) -> Self { + let root_layer = LanguageLayer { + tree: None, + config, + depth: 0, + ranges: vec![Range { + start_byte: 0, + end_byte: usize::MAX, + start_point: Point::new(0, 0), + end_point: Point::new(usize::MAX, usize::MAX), + }], + }; - // track markers of injections // track scope_descriptor: a Vec of scopes for item in tree + let mut layers = HopSlotMap::default(); + let root = layers.insert(root_layer); + let mut syntax = Self { - // grammar, - config, - root_layer, + root, + layers, + loader, }; - // update root layer - PARSER.with(|ts_parser| { - // TODO: handle the returned `Result` properly. - let _ = syntax.root_layer.parse( - &mut ts_parser.borrow_mut(), - &syntax.config, - source, - 0, - vec![Range { - start_byte: 0, - end_byte: usize::MAX, - start_point: Point::new(0, 0), - end_point: Point::new(usize::MAX, usize::MAX), - }], - ); - }); + syntax + .update(source, source, &ChangeSet::new(source)) + .unwrap(); + syntax } @@ -440,32 +448,255 @@ impl Syntax { source: &Rope, changeset: &ChangeSet, ) -> Result<(), Error> { + let mut queue = VecDeque::new(); + queue.push_back(self.root); + + let scopes = self.loader.scopes.load(); + let injection_callback = |language: &str| { + self.loader + .language_configuration_for_injection_string(language) + .and_then(|language_config| language_config.highlight_config(&scopes)) + }; + + // Convert the changeset into tree sitter edits. + let edits = generate_edits(old_source, changeset); + + // Use the edits to update all layers markers + if !edits.is_empty() { + fn point_add(a: Point, b: Point) -> Point { + if b.row > 0 { + Point::new(a.row.saturating_add(b.row), b.column) + } else { + Point::new(0, a.column.saturating_add(b.column)) + } + } + fn point_sub(a: Point, b: Point) -> Point { + if a.row > b.row { + Point::new(a.row.saturating_sub(b.row), a.column) + } else { + Point::new(0, a.column.saturating_sub(b.column)) + } + } + + for layer in &mut self.layers.values_mut() { + // The root layer always covers the whole range (0..usize::MAX) + if layer.depth == 0 { + continue; + } + + for range in &mut layer.ranges { + // Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720 + for edit in edits.iter().rev() { + let is_pure_insertion = edit.old_end_byte == edit.start_byte; + + // if edit is after range, skip + if edit.start_byte > range.end_byte { + // TODO: || (is_noop && edit.start_byte == range.end_byte) + continue; + } + + // if edit is before range, shift entire range by len + if edit.old_end_byte < range.start_byte { + range.start_byte = + edit.new_end_byte + (range.start_byte - edit.old_end_byte); + range.start_point = point_add( + edit.new_end_position, + point_sub(range.start_point, edit.old_end_position), + ); + + range.end_byte = edit + .new_end_byte + .saturating_add(range.end_byte - edit.old_end_byte); + range.end_point = point_add( + edit.new_end_position, + point_sub(range.end_point, edit.old_end_position), + ); + } + // if the edit starts in the space before and extends into the range + else if edit.start_byte < range.start_byte { + range.start_byte = edit.new_end_byte; + range.start_point = edit.new_end_position; + + range.end_byte = range + .end_byte + .saturating_sub(edit.old_end_byte) + .saturating_add(edit.new_end_byte); + range.end_point = point_add( + edit.new_end_position, + point_sub(range.end_point, edit.old_end_position), + ); + } + // If the edit is an insertion at the start of the tree, shift + else if edit.start_byte == range.start_byte && is_pure_insertion { + range.start_byte = edit.new_end_byte; + range.start_point = edit.new_end_position; + } else { + range.end_byte = range + .end_byte + .saturating_sub(edit.old_end_byte) + .saturating_add(edit.new_end_byte); + range.end_point = point_add( + edit.new_end_position, + point_sub(range.end_point, edit.old_end_position), + ); + } + } + } + } + } + PARSER.with(|ts_parser| { - self.root_layer.update( - &mut ts_parser.borrow_mut(), - &self.config, - old_source, - source, - changeset, - ) - }) + let ts_parser = &mut ts_parser.borrow_mut(); + let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new); + // TODO: might need to set cursor range + cursor.set_byte_range(0..usize::MAX); - // TODO: deal with injections and update them too - } + let source_slice = source.slice(..); - // fn buffer_changed -> call layer.update(range, new_text) on root layer and then all marker layers + let mut touched = HashSet::new(); - // call this on transaction.apply() -> buffer_changed(changes) - // - // fn parse(language, old_tree, ranges) - // - pub fn tree(&self) -> &Tree { - self.root_layer.tree() + // TODO: we should be able to avoid editing & parsing layers with ranges earlier in the document before the edit + + while let Some(layer_id) = queue.pop_front() { + // Mark the layer as touched + touched.insert(layer_id); + + let layer = &mut self.layers[layer_id]; + + // If a tree already exists, notify it of changes. + if let Some(tree) = &mut layer.tree { + for edit in edits.iter().rev() { + // Apply the edits in reverse. + // If we applied them in order then edit 1 would disrupt the positioning of edit 2. + tree.edit(edit); + } + } + + // Re-parse the tree. + layer.parse(&mut ts_parser.parser, source)?; + + // Switch to an immutable borrow. + let layer = &self.layers[layer_id]; + + // Process injections. + let matches = cursor.matches( + &layer.config.injections_query, + layer.tree().root_node(), + RopeProvider(source_slice), + ); + let mut injections = Vec::new(); + for mat in matches { + let (language_name, content_node, include_children) = injection_for_match( + &layer.config, + &layer.config.injections_query, + &mat, + source_slice, + ); + + // Explicitly remove this match so that none of its other captures will remain + // in the stream of captures. + mat.remove(); + + // If a language is found with the given name, then add a new language layer + // to the highlighted document. + if let (Some(language_name), Some(content_node)) = (language_name, content_node) + { + if let Some(config) = (injection_callback)(&language_name) { + let ranges = + intersect_ranges(&layer.ranges, &[content_node], include_children); + + if !ranges.is_empty() { + injections.push((config, ranges)); + } + } + } + } + + // Process combined injections. + if let Some(combined_injections_query) = &layer.config.combined_injections_query { + let mut injections_by_pattern_index = + vec![(None, Vec::new(), false); combined_injections_query.pattern_count()]; + let matches = cursor.matches( + combined_injections_query, + layer.tree().root_node(), + RopeProvider(source_slice), + ); + for mat in matches { + let entry = &mut injections_by_pattern_index[mat.pattern_index]; + let (language_name, content_node, include_children) = injection_for_match( + &layer.config, + combined_injections_query, + &mat, + source_slice, + ); + if language_name.is_some() { + entry.0 = language_name; + } + if let Some(content_node) = content_node { + entry.1.push(content_node); + } + entry.2 = include_children; + } + for (lang_name, content_nodes, includes_children) in injections_by_pattern_index + { + if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) { + if let Some(config) = (injection_callback)(&lang_name) { + let ranges = intersect_ranges( + &layer.ranges, + &content_nodes, + includes_children, + ); + if !ranges.is_empty() { + injections.push((config, ranges)); + } + } + } + } + } + + let depth = layer.depth + 1; + // TODO: can't inline this since matches borrows self.layers + for (config, ranges) in injections { + // Find an existing layer + let layer = self + .layers + .iter_mut() + .find(|(_, layer)| { + layer.depth == depth && // TODO: track parent id instead + layer.config.language == config.language && layer.ranges == ranges + }) + .map(|(id, _layer)| id); + + // ...or insert a new one. + let layer_id = layer.unwrap_or_else(|| { + self.layers.insert(LanguageLayer { + tree: None, + config, + depth, + ranges, + }) + }); + + queue.push_back(layer_id); + } + + // TODO: pre-process local scopes at this time, rather than highlight? + // would solve problems with locals not working across boundaries + } + + // Return the cursor back in the pool. + ts_parser.cursors.push(cursor); + + // Remove all untouched layers + self.layers.retain(|id, _| touched.contains(&id)); + + Ok(()) + }) } - // - // <!--update_for_injection(grammar)--> - // Highlighting + pub fn tree(&self) -> &Tree { + self.layers[self.root].tree() + } /// Iterate over the highlighted regions for a given slice of source code. pub fn highlight_iter<'a>( @@ -473,65 +704,76 @@ impl Syntax { source: RopeSlice<'a>, range: Option<std::ops::Range<usize>>, cancellation_flag: Option<&'a AtomicUsize>, - injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, ) -> impl Iterator<Item = Result<HighlightEvent, Error>> + 'a { - // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which - // prevents them from being moved. But both of these values are really just - // pointers, so it's actually ok to move them. - - // reuse a cursor from the pool if possible - let mut cursor = PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - highlighter.cursors.pop().unwrap_or_else(QueryCursor::new) + let mut layers = self + .layers + .iter() + .filter_map(|(_, layer)| { + // TODO: if range doesn't overlap layer range, skip it + + // Reuse a cursor from the pool if available. + let mut cursor = PARSER.with(|ts_parser| { + let highlighter = &mut ts_parser.borrow_mut(); + highlighter.cursors.pop().unwrap_or_else(QueryCursor::new) + }); + + // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which + // prevents them from being moved. But both of these values are really just + // pointers, so it's actually ok to move them. + let cursor_ref = + unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; + + // if reusing cursors & no range this resets to whole range + cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX)); + + let mut captures = cursor_ref + .captures( + &layer.config.query, + layer.tree().root_node(), + RopeProvider(source), + ) + .peekable(); + + // If there's no captures, skip the layer + captures.peek()?; + + Some(HighlightIterLayer { + highlight_end_stack: Vec::new(), + scope_stack: vec![LocalScope { + inherits: false, + range: 0..usize::MAX, + local_defs: Vec::new(), + }], + cursor, + _tree: None, + captures, + config: layer.config.as_ref(), // TODO: just reuse `layer` + depth: layer.depth, // TODO: just reuse `layer` + ranges: &layer.ranges, // TODO: temp + }) + }) + .collect::<Vec<_>>(); + + // HAXX: arrange layers by byte range, with deeper layers positioned first + layers.sort_by_key(|layer| { + ( + layer.ranges.first().cloned(), + std::cmp::Reverse(layer.depth), + ) }); - let tree_ref = self.tree(); - let cursor_ref = unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; - let query_ref = &self.config.query; - let config_ref = self.config.as_ref(); - - // if reusing cursors & no range this resets to whole range - cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX)); - - let captures = cursor_ref - .captures(query_ref, tree_ref.root_node(), RopeProvider(source)) - .peekable(); - - // manually craft the root layer based on the existing tree - let layer = HighlightIterLayer { - highlight_end_stack: Vec::new(), - scope_stack: vec![LocalScope { - inherits: false, - range: 0..usize::MAX, - local_defs: Vec::new(), - }], - cursor, - depth: 0, - _tree: None, - captures, - config: config_ref, - ranges: vec![Range { - start_byte: 0, - end_byte: usize::MAX, - start_point: Point::new(0, 0), - end_point: Point::new(usize::MAX, usize::MAX), - }], - }; let mut result = HighlightIter { source, - byte_offset: range.map_or(0, |r| r.start), // TODO: simplify - injection_callback, + byte_offset: range.map_or(0, |r| r.start), cancellation_flag, iter_count: 0, - layers: vec![layer], + layers, next_event: None, last_highlight_range: None, }; result.sort_layers(); result } - // on_tokenize - // on_change_highlighting // Commenting // comment_strings_for_pos @@ -543,246 +785,157 @@ impl Syntax { // indent_level_for_line // TODO: Folding - - // Syntax APIs - // get_syntax_node_containing_range -> - // ... - // get_syntax_node_at_pos - // buffer_range_for_scope_at_pos } #[derive(Debug)] pub struct LanguageLayer { // mode // grammar - // depth + pub config: Arc<HighlightConfiguration>, pub(crate) tree: Option<Tree>, + pub ranges: Vec<Range>, + pub depth: usize, } impl LanguageLayer { - // pub fn new() -> Self { - // Self { tree: None } - // } - pub fn tree(&self) -> &Tree { // TODO: no unwrap self.tree.as_ref().unwrap() } - fn parse( - &mut self, - ts_parser: &mut TsParser, - config: &HighlightConfiguration, - source: &Rope, - _depth: usize, - ranges: Vec<Range>, - ) -> Result<(), Error> { - if ts_parser.parser.set_included_ranges(&ranges).is_ok() { - ts_parser - .parser - .set_language(config.language) - .map_err(|_| Error::InvalidLanguage)?; - - // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) }; - let tree = ts_parser - .parser - .parse_with( - &mut |byte, _| { - if byte <= source.len_bytes() { - let (chunk, start_byte, _, _) = source.chunk_at_byte(byte); - chunk[byte - start_byte..].as_bytes() - } else { - // out of range - &[] - } - }, - self.tree.as_ref(), - ) - .ok_or(Error::Cancelled)?; + fn parse(&mut self, parser: &mut Parser, source: &Rope) -> Result<(), Error> { + parser.set_included_ranges(&self.ranges).unwrap(); - self.tree = Some(tree) - } + parser + .set_language(self.config.language) + .map_err(|_| Error::InvalidLanguage)?; + + // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) }; + let tree = parser + .parse_with( + &mut |byte, _| { + if byte <= source.len_bytes() { + let (chunk, start_byte, _, _) = source.chunk_at_byte(byte); + chunk[byte - start_byte..].as_bytes() + } else { + // out of range + &[] + } + }, + self.tree.as_ref(), + ) + .ok_or(Error::Cancelled)?; + // unsafe { ts_parser.parser.set_cancellation_flag(None) }; + self.tree = Some(tree); Ok(()) } +} - pub(crate) fn generate_edits( - old_text: RopeSlice, - changeset: &ChangeSet, - ) -> Vec<tree_sitter::InputEdit> { - use Operation::*; - let mut old_pos = 0; +pub(crate) fn generate_edits( + old_text: &Rope, + changeset: &ChangeSet, +) -> Vec<tree_sitter::InputEdit> { + use Operation::*; + let mut old_pos = 0; - let mut edits = Vec::new(); + let mut edits = Vec::new(); - let mut iter = changeset.changes.iter().peekable(); + if changeset.changes.is_empty() { + return edits; + } - // TODO; this is a lot easier with Change instead of Operation. + let mut iter = changeset.changes.iter().peekable(); - fn point_at_pos(text: RopeSlice, pos: usize) -> (usize, Point) { - let byte = text.char_to_byte(pos); // <- attempted to index past end - let line = text.char_to_line(pos); - let line_start_byte = text.line_to_byte(line); - let col = byte - line_start_byte; + // TODO; this is a lot easier with Change instead of Operation. - (byte, Point::new(line, col)) - } + fn point_at_pos(text: &Rope, pos: usize) -> (usize, Point) { + let byte = text.char_to_byte(pos); // <- attempted to index past end + let line = text.char_to_line(pos); + let line_start_byte = text.line_to_byte(line); + let col = byte - line_start_byte; - fn traverse(point: Point, text: &Tendril) -> Point { - let Point { - mut row, - mut column, - } = point; - - // TODO: there should be a better way here. - let mut chars = text.chars().peekable(); - while let Some(ch) = chars.next() { - if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) { - row += 1; - column = 0; - } else { - column += 1; - } + (byte, Point::new(line, col)) + } + + fn traverse(point: Point, text: &Tendril) -> Point { + let Point { + mut row, + mut column, + } = point; + + // TODO: there should be a better way here. + let mut chars = text.chars().peekable(); + while let Some(ch) = chars.next() { + if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) { + row += 1; + column = 0; + } else { + column += 1; } - Point { row, column } } + Point { row, column } + } - while let Some(change) = iter.next() { - let len = match change { - Delete(i) | Retain(i) => *i, - Insert(_) => 0, - }; - let mut old_end = old_pos + len; + while let Some(change) = iter.next() { + let len = match change { + Delete(i) | Retain(i) => *i, + Insert(_) => 0, + }; + let mut old_end = old_pos + len; + + match change { + Retain(_) => {} + Delete(_) => { + let (start_byte, start_position) = point_at_pos(old_text, old_pos); + let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); + + // deletion + edits.push(tree_sitter::InputEdit { + start_byte, // old_pos to byte + old_end_byte, // old_end to byte + new_end_byte: start_byte, // old_pos to byte + start_position, // old pos to coords + old_end_position, // old_end to coords + new_end_position: start_position, // old pos to coords + }); + } + Insert(s) => { + let (start_byte, start_position) = point_at_pos(old_text, old_pos); - match change { - Retain(_) => {} - Delete(_) => { - let (start_byte, start_position) = point_at_pos(old_text, old_pos); + // a subsequent delete means a replace, consume it + if let Some(Delete(len)) = iter.peek() { + old_end = old_pos + len; let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); - // TODO: Position also needs to be byte based... - // let byte = char_to_byte(old_pos) - // let line = char_to_line(old_pos) - // let line_start_byte = line_to_byte() - // Position::new(line, line_start_byte - byte) + iter.next(); - // deletion + // replacement edits.push(tree_sitter::InputEdit { - start_byte, // old_pos to byte - old_end_byte, // old_end to byte - new_end_byte: start_byte, // old_pos to byte - start_position, // old pos to coords - old_end_position, // old_end to coords - new_end_position: start_position, // old pos to coords + start_byte, // old_pos to byte + old_end_byte, // old_end to byte + new_end_byte: start_byte + s.len(), // old_pos to byte + s.len() + start_position, // old pos to coords + old_end_position, // old_end to coords + new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) + }); + } else { + // insert + edits.push(tree_sitter::InputEdit { + start_byte, // old_pos to byte + old_end_byte: start_byte, // same + new_end_byte: start_byte + s.len(), // old_pos + s.len() + start_position, // old pos to coords + old_end_position: start_position, // same + new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) }); - } - Insert(s) => { - let (start_byte, start_position) = point_at_pos(old_text, old_pos); - - // a subsequent delete means a replace, consume it - if let Some(Delete(len)) = iter.peek() { - old_end = old_pos + len; - let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); - - iter.next(); - - // replacement - edits.push(tree_sitter::InputEdit { - start_byte, // old_pos to byte - old_end_byte, // old_end to byte - new_end_byte: start_byte + s.len(), // old_pos to byte + s.len() - start_position, // old pos to coords - old_end_position, // old_end to coords - new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) - }); - } else { - // insert - edits.push(tree_sitter::InputEdit { - start_byte, // old_pos to byte - old_end_byte: start_byte, // same - new_end_byte: start_byte + s.len(), // old_pos + s.len() - start_position, // old pos to coords - old_end_position: start_position, // same - new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) - }); - } } } - old_pos = old_end; - } - edits - } - - fn update( - &mut self, - ts_parser: &mut TsParser, - config: &HighlightConfiguration, - old_source: &Rope, - source: &Rope, - changeset: &ChangeSet, - ) -> Result<(), Error> { - if changeset.is_empty() { - return Ok(()); } - - let edits = Self::generate_edits(old_source.slice(..), changeset); - - // Notify the tree about all the changes - for edit in edits.iter().rev() { - // apply the edits in reverse. If we applied them in order then edit 1 would disrupt - // the positioning of edit 2 - self.tree.as_mut().unwrap().edit(edit); - } - - self.parse( - ts_parser, - config, - source, - 0, - // TODO: what to do about this range on update - vec![Range { - start_byte: 0, - end_byte: usize::MAX, - start_point: Point::new(0, 0), - end_point: Point::new(usize::MAX, usize::MAX), - }], - ) + old_pos = old_end; } - - // fn highlight_iter() -> same as Mode but for this layer. Mode composits these - // fn buffer_changed - // fn update(range) - // fn update_injections() + edits } -// -- refactored from tree-sitter-highlight to be able to retain state -// TODO: add seek() to iter - -// problem: any time a layer is updated it must update it's injections on the parent (potentially -// removing some from use) -// can't modify to vec and exist in it at the same time since that would violate borrows -// maybe we can do with an arena -// maybe just caching on the top layer and nevermind the injections for now? -// -// Grammar { -// layers: Vec<Box<Layer>> to prevent memory moves when vec is modified -// } -// injections tracked by marker: -// if marker areas match it's fine and update -// if not found add new layer -// if length 0 then area got removed, clean up the layer -// -// layer update: -// if range.len = 0 then remove the layer -// for change in changes { tree.edit(change) } -// tree = parser.parse(.., tree, ..) -// calculate affected range and update injections -// injection update: -// look for existing injections -// if present, range = (first injection start, last injection end) -// -// For now cheat and just throw out non-root layers if they exist. This should still improve -// parsing in majority of cases. - use std::sync::atomic::{AtomicUsize, Ordering}; use std::{iter, mem, ops, str, usize}; use tree_sitter::{ @@ -819,8 +972,8 @@ pub enum HighlightEvent { pub struct HighlightConfiguration { pub language: Grammar, pub query: Query, + injections_query: Query, combined_injections_query: Option<Query>, - locals_pattern_index: usize, highlights_pattern_index: usize, highlight_indices: ArcSwap<Vec<Option<Highlight>>>, non_local_variable_patterns: Vec<bool>, @@ -847,13 +1000,9 @@ struct LocalScope<'a> { } #[derive(Debug)] -struct HighlightIter<'a, F> -where - F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, -{ +struct HighlightIter<'a> { source: RopeSlice<'a>, byte_offset: usize, - injection_callback: F, cancellation_flag: Option<&'a AtomicUsize>, layers: Vec<HighlightIterLayer<'a>>, iter_count: usize, @@ -893,8 +1042,8 @@ struct HighlightIterLayer<'a> { config: &'a HighlightConfiguration, highlight_end_stack: Vec<usize>, scope_stack: Vec<LocalScope<'a>>, - ranges: Vec<Range>, depth: usize, + ranges: &'a [Range], } impl<'a> fmt::Debug for HighlightIterLayer<'a> { @@ -926,38 +1075,32 @@ impl HighlightConfiguration { ) -> Result<Self, QueryError> { // Concatenate the query strings, keeping track of the start offset of each section. let mut query_source = String::new(); - query_source.push_str(injection_query); - let locals_query_offset = query_source.len(); query_source.push_str(locals_query); let highlights_query_offset = query_source.len(); query_source.push_str(highlights_query); // Construct a single query by concatenating the three query strings, but record the // range of pattern indices that belong to each individual string. - let mut query = Query::new(language, &query_source)?; - let mut locals_pattern_index = 0; + let query = Query::new(language, &query_source)?; let mut highlights_pattern_index = 0; for i in 0..(query.pattern_count()) { let pattern_offset = query.start_byte_for_pattern(i); if pattern_offset < highlights_query_offset { - if pattern_offset < highlights_query_offset { - highlights_pattern_index += 1; - } - if pattern_offset < locals_query_offset { - locals_pattern_index += 1; - } + highlights_pattern_index += 1; } } + let mut injections_query = Query::new(language, injection_query)?; + // Construct a separate query just for dealing with the 'combined injections'. // Disable the combined injection patterns in the main query. let mut combined_injections_query = Query::new(language, injection_query)?; let mut has_combined_queries = false; - for pattern_index in 0..locals_pattern_index { - let settings = query.property_settings(pattern_index); + for pattern_index in 0..injections_query.pattern_count() { + let settings = injections_query.property_settings(pattern_index); if settings.iter().any(|s| &*s.key == "injection.combined") { has_combined_queries = true; - query.disable_pattern(pattern_index); + injections_query.disable_pattern(pattern_index); } else { combined_injections_query.disable_pattern(pattern_index); } @@ -989,8 +1132,6 @@ impl HighlightConfiguration { for (i, name) in query.capture_names().iter().enumerate() { let i = Some(i as u32); match name.as_str() { - "injection.content" => injection_content_capture_index = i, - "injection.language" => injection_language_capture_index = i, "local.definition" => local_def_capture_index = i, "local.definition-value" => local_def_value_capture_index = i, "local.reference" => local_ref_capture_index = i, @@ -999,12 +1140,21 @@ impl HighlightConfiguration { } } + for (i, name) in injections_query.capture_names().iter().enumerate() { + let i = Some(i as u32); + match name.as_str() { + "injection.content" => injection_content_capture_index = i, + "injection.language" => injection_language_capture_index = i, + _ => {} + } + } + let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]); Ok(Self { language, query, + injections_query, combined_injections_query, - locals_pattern_index, highlights_pattern_index, highlight_indices, non_local_variable_patterns, @@ -1069,238 +1219,6 @@ impl HighlightConfiguration { } impl<'a> HighlightIterLayer<'a> { - /// Create a new 'layer' of highlighting for this document. - /// - /// In the even that the new layer contains "combined injections" (injections where multiple - /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and - /// added to the returned vector. - fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>( - source: RopeSlice<'a>, - cancellation_flag: Option<&'a AtomicUsize>, - injection_callback: &mut F, - mut config: &'a HighlightConfiguration, - mut depth: usize, - mut ranges: Vec<Range>, - ) -> Result<Vec<Self>, Error> { - let mut result = Vec::with_capacity(1); - let mut queue = Vec::new(); - loop { - // --> Tree parsing part - - PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - - if highlighter.parser.set_included_ranges(&ranges).is_ok() { - highlighter - .parser - .set_language(config.language) - .map_err(|_| Error::InvalidLanguage)?; - - unsafe { highlighter.parser.set_cancellation_flag(cancellation_flag) }; - let tree = highlighter - .parser - .parse_with( - &mut |byte, _| { - if byte <= source.len_bytes() { - let (chunk, start_byte, _, _) = source.chunk_at_byte(byte); - chunk[byte - start_byte..].as_bytes() - } else { - // out of range - &[] - } - }, - None, - ) - .ok_or(Error::Cancelled)?; - unsafe { highlighter.parser.set_cancellation_flag(None) }; - let mut cursor = highlighter.cursors.pop().unwrap_or_else(QueryCursor::new); - - // Process combined injections. - if let Some(combined_injections_query) = &config.combined_injections_query { - let mut injections_by_pattern_index = vec![ - (None, Vec::new(), false); - combined_injections_query - .pattern_count() - ]; - let matches = cursor.matches( - combined_injections_query, - tree.root_node(), - RopeProvider(source), - ); - for mat in matches { - let entry = &mut injections_by_pattern_index[mat.pattern_index]; - let (language_name, content_node, include_children) = - injection_for_match( - config, - combined_injections_query, - &mat, - source, - ); - if language_name.is_some() { - entry.0 = language_name; - } - if let Some(content_node) = content_node { - entry.1.push(content_node); - } - entry.2 = include_children; - } - for (lang_name, content_nodes, includes_children) in - injections_by_pattern_index - { - if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) - { - if let Some(next_config) = (injection_callback)(&lang_name) { - let ranges = Self::intersect_ranges( - &ranges, - &content_nodes, - includes_children, - ); - if !ranges.is_empty() { - queue.push((next_config, depth + 1, ranges)); - } - } - } - } - } - - // --> Highlighting query part - - // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which - // prevents them from being moved. But both of these values are really just - // pointers, so it's actually ok to move them. - let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(&tree) }; - let cursor_ref = - unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; - let captures = cursor_ref - .captures(&config.query, tree_ref.root_node(), RopeProvider(source)) - .peekable(); - - result.push(HighlightIterLayer { - highlight_end_stack: Vec::new(), - scope_stack: vec![LocalScope { - inherits: false, - range: 0..usize::MAX, - local_defs: Vec::new(), - }], - cursor, - depth, - _tree: Some(tree), - captures, - config, - ranges, - }); - } - - Ok(()) // so we can use the try operator - })?; - - if queue.is_empty() { - break; - } - - let (next_config, next_depth, next_ranges) = queue.remove(0); - config = next_config; - depth = next_depth; - ranges = next_ranges; - } - - Ok(result) - } - - // Compute the ranges that should be included when parsing an injection. - // This takes into account three things: - // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. - // * `nodes` - Every injection takes place within a set of nodes. The injection ranges - // are the ranges of those nodes. - // * `includes_children` - For some injections, the content nodes' children should be - // excluded from the nested document, so that only the content nodes' *own* content - // is reparsed. For other injections, the content nodes' entire ranges should be - // reparsed, including the ranges of their children. - fn intersect_ranges( - parent_ranges: &[Range], - nodes: &[Node], - includes_children: bool, - ) -> Vec<Range> { - let mut cursor = nodes[0].walk(); - let mut result = Vec::new(); - let mut parent_range_iter = parent_ranges.iter(); - let mut parent_range = parent_range_iter - .next() - .expect("Layers should only be constructed with non-empty ranges vectors"); - for node in nodes.iter() { - let mut preceding_range = Range { - start_byte: 0, - start_point: Point::new(0, 0), - end_byte: node.start_byte(), - end_point: node.start_position(), - }; - let following_range = Range { - start_byte: node.end_byte(), - start_point: node.end_position(), - end_byte: usize::MAX, - end_point: Point::new(usize::MAX, usize::MAX), - }; - - for excluded_range in node - .children(&mut cursor) - .filter_map(|child| { - if includes_children { - None - } else { - Some(child.range()) - } - }) - .chain([following_range].iter().cloned()) - { - let mut range = Range { - start_byte: preceding_range.end_byte, - start_point: preceding_range.end_point, - end_byte: excluded_range.start_byte, - end_point: excluded_range.start_point, - }; - preceding_range = excluded_range; - - if range.end_byte < parent_range.start_byte { - continue; - } - - while parent_range.start_byte <= range.end_byte { - if parent_range.end_byte > range.start_byte { - if range.start_byte < parent_range.start_byte { - range.start_byte = parent_range.start_byte; - range.start_point = parent_range.start_point; - } - - if parent_range.end_byte < range.end_byte { - if range.start_byte < parent_range.end_byte { - result.push(Range { - start_byte: range.start_byte, - start_point: range.start_point, - end_byte: parent_range.end_byte, - end_point: parent_range.end_point, - }); - } - range.start_byte = parent_range.end_byte; - range.start_point = parent_range.end_point; - } else { - if range.start_byte < range.end_byte { - result.push(range); - } - break; - } - } - - if let Some(next_range) = parent_range_iter.next() { - parent_range = next_range; - } else { - return result; - } - } - } - } - result - } - // First, sort scope boundaries by their byte offset in the document. At a // given position, emit scope endings before scope beginnings. Finally, emit // scope boundaries from deeper layers first. @@ -1326,10 +1244,101 @@ impl<'a> HighlightIterLayer<'a> { } } -impl<'a, F> HighlightIter<'a, F> -where - F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, -{ +// Compute the ranges that should be included when parsing an injection. +// This takes into account three things: +// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. +// * `nodes` - Every injection takes place within a set of nodes. The injection ranges +// are the ranges of those nodes. +// * `includes_children` - For some injections, the content nodes' children should be +// excluded from the nested document, so that only the content nodes' *own* content +// is reparsed. For other injections, the content nodes' entire ranges should be +// reparsed, including the ranges of their children. +fn intersect_ranges( + parent_ranges: &[Range], + nodes: &[Node], + includes_children: bool, +) -> Vec<Range> { + let mut cursor = nodes[0].walk(); + let mut result = Vec::new(); + let mut parent_range_iter = parent_ranges.iter(); + let mut parent_range = parent_range_iter + .next() + .expect("Layers should only be constructed with non-empty ranges vectors"); + for node in nodes.iter() { + let mut preceding_range = Range { + start_byte: 0, + start_point: Point::new(0, 0), + end_byte: node.start_byte(), + end_point: node.start_position(), + }; + let following_range = Range { + start_byte: node.end_byte(), + start_point: node.end_position(), + end_byte: usize::MAX, + end_point: Point::new(usize::MAX, usize::MAX), + }; + + for excluded_range in node + .children(&mut cursor) + .filter_map(|child| { + if includes_children { + None + } else { + Some(child.range()) + } + }) + .chain([following_range].iter().cloned()) + { + let mut range = Range { + start_byte: preceding_range.end_byte, + start_point: preceding_range.end_point, + end_byte: excluded_range.start_byte, + end_point: excluded_range.start_point, + }; + preceding_range = excluded_range; + + if range.end_byte < parent_range.start_byte { + continue; + } + + while parent_range.start_byte <= range.end_byte { + if parent_range.end_byte > range.start_byte { + if range.start_byte < parent_range.start_byte { + range.start_byte = parent_range.start_byte; + range.start_point = parent_range.start_point; + } + + if parent_range.end_byte < range.end_byte { + if range.start_byte < parent_range.end_byte { + result.push(Range { + start_byte: range.start_byte, + start_point: range.start_point, + end_byte: parent_range.end_byte, + end_point: parent_range.end_point, + }); + } + range.start_byte = parent_range.end_byte; + range.start_point = parent_range.end_point; + } else { + if range.start_byte < range.end_byte { + result.push(range); + } + break; + } + } + + if let Some(next_range) = parent_range_iter.next() { + parent_range = next_range; + } else { + return result; + } + } + } + } + result +} + +impl<'a> HighlightIter<'a> { fn emit_event( &mut self, offset: usize, @@ -1360,6 +1369,12 @@ where i += 1; continue; } + } else { + let layer = self.layers.remove(i + 1); + PARSER.with(|ts_parser| { + let highlighter = &mut ts_parser.borrow_mut(); + highlighter.cursors.push(layer.cursor); + }); } break; } @@ -1376,30 +1391,9 @@ where } } } - - fn insert_layer(&mut self, mut layer: HighlightIterLayer<'a>) { - if let Some(sort_key) = layer.sort_key() { - let mut i = 1; - while i < self.layers.len() { - if let Some(sort_key_i) = self.layers[i].sort_key() { - if sort_key_i > sort_key { - self.layers.insert(i, layer); - return; - } - i += 1; - } else { - self.layers.remove(i); - } - } - self.layers.push(layer); - } - } } -impl<'a, F> Iterator for HighlightIter<'a, F> -where - F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, -{ +impl<'a> Iterator for HighlightIter<'a> { type Item = Result<HighlightEvent, Error>; fn next(&mut self) -> Option<Self::Item> { @@ -1459,55 +1453,12 @@ where layer.highlight_end_stack.pop(); return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); } else { - // return self.emit_event(self.source.len(), None); - return None; + return self.emit_event(self.source.len_bytes(), None); }; let (mut match_, capture_index) = layer.captures.next().unwrap(); let mut capture = match_.captures[capture_index]; - // If this capture represents an injection, then process the injection. - if match_.pattern_index < layer.config.locals_pattern_index { - let (language_name, content_node, include_children) = - injection_for_match(layer.config, &layer.config.query, &match_, self.source); - - // Explicitly remove this match so that none of its other captures will remain - // in the stream of captures. - match_.remove(); - - // If a language is found with the given name, then add a new language layer - // to the highlighted document. - if let (Some(language_name), Some(content_node)) = (language_name, content_node) { - if let Some(config) = (self.injection_callback)(&language_name) { - let ranges = HighlightIterLayer::intersect_ranges( - &self.layers[0].ranges, - &[content_node], - include_children, - ); - if !ranges.is_empty() { - match HighlightIterLayer::new( - self.source, - self.cancellation_flag, - &mut self.injection_callback, - config, - self.layers[0].depth + 1, - ranges, - ) { - Ok(layers) => { - for layer in layers { - self.insert_layer(layer); - } - } - Err(e) => return Some(Err(e)), - } - } - } - } - - self.sort_layers(); - continue 'main; - } - // Remove from the local scope stack any local scopes that have already ended. while range.start > layer.scope_stack.last().unwrap().range.end { layer.scope_stack.pop(); @@ -1702,14 +1653,6 @@ fn injection_for_match<'a>( (language_name, content_node, include_children) } -// fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) { -// if vec.len() > capacity { -// vec.truncate(capacity); -// vec.shrink_to_fit(); -// } -// vec.clear(); -// } - pub struct Merge<I> { iter: I, spans: Box<dyn Iterator<Item = (usize, std::ops::Range<usize>)>>, @@ -1876,6 +1819,8 @@ mod test { .map(String::from) .collect(); + let loader = Loader::new(Configuration { language: vec![] }); + let language = get_language(&crate::RUNTIME_DIR, "Rust").unwrap(); let config = HighlightConfiguration::new( language, @@ -1898,7 +1843,7 @@ mod test { fn main() {} ", ); - let syntax = Syntax::new(&source, Arc::new(config)); + let syntax = Syntax::new(&source, Arc::new(config), Arc::new(loader)); let tree = syntax.tree(); let root = tree.root_node(); assert_eq!(root.kind(), "source_file"); @@ -1925,7 +1870,7 @@ mod test { &doc, vec![(6, 11, Some("test".into())), (12, 17, None)].into_iter(), ); - let edits = LanguageLayer::generate_edits(doc.slice(..), transaction.changes()); + let edits = generate_edits(&doc, transaction.changes()); // transaction.apply(&mut state); assert_eq!( @@ -1954,7 +1899,7 @@ mod test { let mut doc = Rope::from("fn test() {}"); let transaction = Transaction::change(&doc, vec![(8, 8, Some("a: u32".into()))].into_iter()); - let edits = LanguageLayer::generate_edits(doc.slice(..), transaction.changes()); + let edits = generate_edits(&doc, transaction.changes()); transaction.apply(&mut doc); assert_eq!(doc, "fn test(a: u32) {}"); |