From 36e7e2133fe1d472600cfd935b8046b8d50146c2 Mon Sep 17 00:00:00 2001 From: Blaž Hrastnik Date: Tue, 29 Sep 2020 01:01:27 +0900 Subject: Split selection on regex, fix InputEdit generation. --- helix-core/src/lib.rs | 5 ++- helix-core/src/selection.rs | 76 +++++++++++++++++++++++++++++++++++++++++-- helix-core/src/syntax.rs | 48 ++++++++++++++++++++++----- helix-core/src/transaction.rs | 25 +++++++------- 4 files changed, 131 insertions(+), 23 deletions(-) (limited to 'helix-core/src') diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 9705deaa..0f58fbbc 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -2,7 +2,7 @@ pub mod graphemes; pub mod macros; mod position; -mod selection; +pub mod selection; pub mod state; pub mod syntax; mod transaction; @@ -11,6 +11,9 @@ pub use ropey::{Rope, RopeSlice}; pub use tendril::StrTendril as Tendril; +#[doc(inline)] +pub use {regex, tree_sitter}; + pub use position::Position; pub use selection::Range; pub use selection::Selection; diff --git a/helix-core/src/selection.rs b/helix-core/src/selection.rs index f934f74d..2251c77f 100644 --- a/helix-core/src/selection.rs +++ b/helix-core/src/selection.rs @@ -2,8 +2,9 @@ //! single selection range. //! //! All positioning is done via `char` offsets into the buffer. -use crate::{Assoc, ChangeSet}; +use crate::{Assoc, ChangeSet, Rope, RopeSlice}; use smallvec::{smallvec, SmallVec}; +use std::borrow::Cow; #[inline] fn abs_difference(x: usize, y: usize) -> usize { @@ -22,7 +23,7 @@ pub struct Range { pub anchor: usize, /// The head of the range, moved when extending. pub head: usize, -} +} // TODO: might be cheaper to store normalized as from/to and an inverted flag impl Range { pub fn new(anchor: usize, head: usize) -> Self { @@ -106,6 +107,11 @@ impl Range { } // groupAt + + #[inline] + pub fn fragment<'a>(&'a self, text: &'a RopeSlice) -> Cow<'a, str> { + Cow::from(text.slice(self.from()..self.to())) + } } /// A selection consists of one or more selection ranges. @@ -239,10 +245,50 @@ impl Selection { self.primary_index, ) } + + pub fn fragments<'a>(&'a self, text: &'a RopeSlice) -> impl Iterator> + 'a { + self.ranges.iter().map(move |range| range.fragment(text)) + } } // TODO: checkSelection -> check if valid for doc length +// TODO: support to split on capture #N instead of whole match +pub fn split_on_matches( + text: &RopeSlice, + selections: &Selection, + regex: &crate::regex::Regex, +) -> Selection { + let mut result = SmallVec::with_capacity(selections.ranges().len()); + + for sel in selections.ranges() { + // TODO: can't avoid occasional allocations since Regex can't operate on chunks yet + let fragment = sel.fragment(&text); + + let mut sel_start = sel.from(); + let sel_end = sel.to(); + + let mut start_byte = text.char_to_byte(sel_start); + + let mut start = sel_start; + + for mat in regex.find_iter(&fragment) { + // TODO: retain range direction + + let end = text.byte_to_char(start_byte + mat.start()); + result.push(Range::new(start, end - 1)); + start = text.byte_to_char(start_byte + mat.end()); + } + + if start <= sel_end { + result.push(Range::new(start, sel_end)); + } + } + + // TODO: figure out a new primary index + Selection::new(result, 0) +} + #[cfg(test)] mod test { use super::*; @@ -312,4 +358,30 @@ mod test { assert_eq!(range.contains(6), false); } + #[test] + fn test_split_on_matches() { + use crate::regex::Regex; + + let text = Rope::from("abcd efg wrs xyz 123 456"); + + let selections = Selection::new(smallvec![Range::new(0, 8), Range::new(10, 19),], 0); + + let result = split_on_matches(&text.slice(..), &selections, &Regex::new(r"\s+").unwrap()); + + assert_eq!( + result.ranges(), + &[ + Range::new(0, 4), + Range::new(5, 8), + Range::new(10, 12), + Range::new(15, 18), + Range::new(19, 19), + ] + ); + + assert_eq!( + result.fragments(&text.slice(..)).collect::>(), + &["abcd", "efg", "rs", "xyz", ""] + ); + } } diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 9b09bb58..3d85ff25 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -188,9 +188,19 @@ impl Syntax { syntax } - pub fn update(&mut self, source: &Rope, changeset: &ChangeSet) -> Result<(), Error> { - self.root_layer - .update(&mut self.parser, &self.config, source, changeset) + pub fn update( + &mut self, + old_source: &Rope, + source: &Rope, + changeset: &ChangeSet, + ) -> Result<(), Error> { + self.root_layer.update( + &mut self.parser, + &self.config, + old_source, + source, + changeset, + ) // TODO: deal with injections and update them too } @@ -385,7 +395,7 @@ impl LanguageLayer { } pub(crate) fn generate_edits( - text: &RopeSlice, + old_text: &RopeSlice, changeset: &ChangeSet, ) -> Vec { use Operation::*; @@ -399,7 +409,7 @@ impl LanguageLayer { // TODO; this is a lot easier with Change instead of Operation. fn point_at_pos(text: &RopeSlice, pos: usize) -> (usize, Point) { - let byte = text.char_to_byte(pos); + let byte = text.char_to_byte(pos); // <- attempted to index past end let line = text.char_to_line(pos); let line_start_byte = text.line_to_byte(line); let col = byte - line_start_byte; @@ -437,8 +447,8 @@ impl LanguageLayer { new_pos += len; } Delete(_) => { - let (start_byte, start_position) = point_at_pos(&text, old_pos); - let (old_end_byte, old_end_position) = point_at_pos(&text, old_end); + let (start_byte, start_position) = point_at_pos(&old_text, old_pos); + let (old_end_byte, old_end_position) = point_at_pos(&old_text, old_end); // TODO: Position also needs to be byte based... // let byte = char_to_byte(old_pos) @@ -475,7 +485,7 @@ impl LanguageLayer { }; } Insert(s) => { - let (start_byte, start_position) = point_at_pos(&text, old_pos); + let (start_byte, start_position) = point_at_pos(&old_text, old_pos); let ins = s.chars().count(); @@ -501,6 +511,7 @@ impl LanguageLayer { &mut self, parser: &mut Parser, config: &HighlightConfiguration, + old_source: &Rope, source: &Rope, changeset: &ChangeSet, ) -> Result<(), Error> { @@ -508,7 +519,7 @@ impl LanguageLayer { return Ok(()); } - let edits = Self::generate_edits(&source.slice(..), changeset); + let edits = Self::generate_edits(&old_source.slice(..), changeset); // Notify the tree about all the changes for edit in edits { @@ -1530,4 +1541,23 @@ fn test_input_edits() { } ] ); + + // Testing with the official example from tree-sitter + let mut state = State::new("fn test() {}".into()); + let transaction = Transaction::change(&state, vec![(8, 8, Some("a: u32".into()))].into_iter()); + let edits = LanguageLayer::generate_edits(&state.doc.slice(..), &transaction.changes); + transaction.apply(&mut state); + + assert_eq!(state.doc(), "fn test(a: u32) {}"); + assert_eq!( + edits, + &[InputEdit { + start_byte: 8, + old_end_byte: 8, + new_end_byte: 14, + start_position: Point { row: 0, column: 8 }, + old_end_position: Point { row: 0, column: 8 }, + new_end_position: Point { row: 0, column: 14 } + }] + ); } diff --git a/helix-core/src/transaction.rs b/helix-core/src/transaction.rs index 278e071b..a8059497 100644 --- a/helix-core/src/transaction.rs +++ b/helix-core/src/transaction.rs @@ -326,9 +326,20 @@ impl Transaction { /// Returns true if applied successfully. pub fn apply(&self, state: &mut State) -> bool { - // apply changes to the document - if !self.changes.apply(&mut state.doc) { - return false; + if !self.changes.is_empty() { + // TODO: also avoid mapping the selection if not necessary + + let old_doc = state.doc().clone(); + + // apply changes to the document + if !self.changes.apply(&mut state.doc) { + return false; + } + + if let Some(syntax) = &mut state.syntax { + // TODO: no unwrap + syntax.update(&old_doc, &state.doc, &self.changes).unwrap(); + } } // update the selection: either take the selection specified in the transaction, or map the @@ -338,14 +349,6 @@ impl Transaction { .clone() .unwrap_or_else(|| state.selection.clone().map(&self.changes)); - // TODO: no unwrap - state - .syntax - .as_mut() - .unwrap() - .update(&state.doc, &self.changes) - .unwrap(); - true } -- cgit v1.2.3-70-g09d2