aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPascal Kuthe2024-02-26 07:45:20 +0000
committerGitHub2024-02-26 07:45:20 +0000
commitcd02976fa3a55c2c1f01b95c40d178061968f797 (patch)
tree707df0a4fb93fa7c8773ba59a85259835deb166e
parentc68ec92c5e1bd3a2bf402fb583de23693f59b722 (diff)
switch to regex-cursor (#9422)
-rw-r--r--Cargo.lock18
-rw-r--r--helix-core/src/selection.rs96
-rw-r--r--helix-core/src/syntax.rs12
-rw-r--r--helix-stdx/Cargo.toml1
-rw-r--r--helix-stdx/src/rope.rs45
-rw-r--r--helix-term/src/commands.rs56
-rw-r--r--helix-term/src/ui/mod.rs33
7 files changed, 175 insertions, 86 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 2b8a25c8..b8d375c5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1344,6 +1344,7 @@ version = "23.10.0"
dependencies = [
"dunce",
"etcetera",
+ "regex-cursor",
"ropey",
"tempfile",
"which",
@@ -1938,9 +1939,9 @@ dependencies = [
[[package]]
name = "regex-automata"
-version = "0.4.4"
+version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b7fa1134405e2ec9353fd416b17f8dacd46c473d7d3fd1cf202706a14eb792a"
+checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd"
dependencies = [
"aho-corasick",
"memchr",
@@ -1948,6 +1949,19 @@ dependencies = [
]
[[package]]
+name = "regex-cursor"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a43718aa0040434d45728c43f56bd53bda75a91c46954cdf0f2ff4dbc8aabbe7"
+dependencies = [
+ "log",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+ "ropey",
+]
+
+[[package]]
name = "regex-syntax"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/helix-core/src/selection.rs b/helix-core/src/selection.rs
index c44685ee..91f1d0de 100644
--- a/helix-core/src/selection.rs
+++ b/helix-core/src/selection.rs
@@ -7,9 +7,11 @@ use crate::{
ensure_grapheme_boundary_next, ensure_grapheme_boundary_prev, next_grapheme_boundary,
prev_grapheme_boundary,
},
+ line_ending::get_line_ending,
movement::Direction,
Assoc, ChangeSet, RopeGraphemes, RopeSlice,
};
+use helix_stdx::rope::{self, RopeSliceExt};
use smallvec::{smallvec, SmallVec};
use std::borrow::Cow;
@@ -708,12 +710,12 @@ impl IntoIterator for Selection {
pub fn keep_or_remove_matches(
text: RopeSlice,
selection: &Selection,
- regex: &crate::regex::Regex,
+ regex: &rope::Regex,
remove: bool,
) -> Option<Selection> {
let result: SmallVec<_> = selection
.iter()
- .filter(|range| regex.is_match(&range.fragment(text)) ^ remove)
+ .filter(|range| regex.is_match(text.regex_input_at(range.from()..range.to())) ^ remove)
.copied()
.collect();
@@ -724,25 +726,20 @@ pub fn keep_or_remove_matches(
None
}
+// TODO: support to split on capture #N instead of whole match
pub fn select_on_matches(
text: RopeSlice,
selection: &Selection,
- regex: &crate::regex::Regex,
+ regex: &rope::Regex,
) -> Option<Selection> {
let mut result = SmallVec::with_capacity(selection.len());
for sel in selection {
- // TODO: can't avoid occasional allocations since Regex can't operate on chunks yet
- let fragment = sel.fragment(text);
-
- let sel_start = sel.from();
- let start_byte = text.char_to_byte(sel_start);
-
- for mat in regex.find_iter(&fragment) {
+ for mat in regex.find_iter(text.regex_input_at(sel.from()..sel.to())) {
// TODO: retain range direction
- let start = text.byte_to_char(start_byte + mat.start());
- let end = text.byte_to_char(start_byte + mat.end());
+ let start = text.byte_to_char(mat.start());
+ let end = text.byte_to_char(mat.end());
let range = Range::new(start, end);
// Make sure the match is not right outside of the selection.
@@ -761,12 +758,7 @@ pub fn select_on_matches(
None
}
-// TODO: support to split on capture #N instead of whole match
-pub fn split_on_matches(
- text: RopeSlice,
- selection: &Selection,
- regex: &crate::regex::Regex,
-) -> Selection {
+pub fn split_on_newline(text: RopeSlice, selection: &Selection) -> Selection {
let mut result = SmallVec::with_capacity(selection.len());
for sel in selection {
@@ -776,21 +768,47 @@ pub fn split_on_matches(
continue;
}
- // TODO: can't avoid occasional allocations since Regex can't operate on chunks yet
- let fragment = sel.fragment(text);
-
let sel_start = sel.from();
let sel_end = sel.to();
- let start_byte = text.char_to_byte(sel_start);
+ let mut start = sel_start;
+ for mat in sel.slice(text).lines() {
+ let len = mat.len_chars();
+ let line_end_len = get_line_ending(&mat).map(|le| le.len_chars()).unwrap_or(0);
+ // TODO: retain range direction
+ result.push(Range::new(start, start + len - line_end_len));
+ start += len;
+ }
+
+ if start < sel_end {
+ result.push(Range::new(start, sel_end));
+ }
+ }
+
+ // TODO: figure out a new primary index
+ Selection::new(result, 0)
+}
+
+pub fn split_on_matches(text: RopeSlice, selection: &Selection, regex: &rope::Regex) -> Selection {
+ let mut result = SmallVec::with_capacity(selection.len());
+
+ for sel in selection {
+ // Special case: zero-width selection.
+ if sel.from() == sel.to() {
+ result.push(*sel);
+ continue;
+ }
+
+ let sel_start = sel.from();
+ let sel_end = sel.to();
let mut start = sel_start;
- for mat in regex.find_iter(&fragment) {
+ for mat in regex.find_iter(text.regex_input_at(sel_start..sel_end)) {
// TODO: retain range direction
- let end = text.byte_to_char(start_byte + mat.start());
+ let end = text.byte_to_char(mat.start());
result.push(Range::new(start, end));
- start = text.byte_to_char(start_byte + mat.end());
+ start = text.byte_to_char(mat.end());
}
if start < sel_end {
@@ -1021,14 +1039,12 @@ mod test {
#[test]
fn test_select_on_matches() {
- use crate::regex::{Regex, RegexBuilder};
-
let r = Rope::from_str("Nobody expects the Spanish inquisition");
let s = r.slice(..);
let selection = Selection::single(0, r.len_chars());
assert_eq!(
- select_on_matches(s, &selection, &Regex::new(r"[A-Z][a-z]*").unwrap()),
+ select_on_matches(s, &selection, &rope::Regex::new(r"[A-Z][a-z]*").unwrap()),
Some(Selection::new(
smallvec![Range::new(0, 6), Range::new(19, 26)],
0
@@ -1038,8 +1054,14 @@ mod test {
let r = Rope::from_str("This\nString\n\ncontains multiple\nlines");
let s = r.slice(..);
- let start_of_line = RegexBuilder::new(r"^").multi_line(true).build().unwrap();
- let end_of_line = RegexBuilder::new(r"$").multi_line(true).build().unwrap();
+ let start_of_line = rope::RegexBuilder::new()
+ .syntax(rope::Config::new().multi_line(true))
+ .build(r"^")
+ .unwrap();
+ let end_of_line = rope::RegexBuilder::new()
+ .syntax(rope::Config::new().multi_line(true))
+ .build(r"$")
+ .unwrap();
// line without ending
assert_eq!(
@@ -1077,9 +1099,9 @@ mod test {
select_on_matches(
s,
&Selection::single(0, s.len_chars()),
- &RegexBuilder::new(r"^[a-z ]*$")
- .multi_line(true)
- .build()
+ &rope::RegexBuilder::new()
+ .syntax(rope::Config::new().multi_line(true))
+ .build(r"^[a-z ]*$")
.unwrap()
),
Some(Selection::new(
@@ -1171,13 +1193,15 @@ mod test {
#[test]
fn test_split_on_matches() {
- use crate::regex::Regex;
-
let text = Rope::from(" abcd efg wrs xyz 123 456");
let selection = Selection::new(smallvec![Range::new(0, 9), Range::new(11, 20),], 0);
- let result = split_on_matches(text.slice(..), &selection, &Regex::new(r"\s+").unwrap());
+ let result = split_on_matches(
+ text.slice(..),
+ &selection,
+ &rope::Regex::new(r"\s+").unwrap(),
+ );
assert_eq!(
result.ranges(),
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index a9344448..0d8559ca 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -12,6 +12,7 @@ use arc_swap::{ArcSwap, Guard};
use bitflags::bitflags;
use globset::GlobSet;
use hashbrown::raw::RawTable;
+use helix_stdx::rope::{self, RopeSliceExt};
use slotmap::{DefaultKey as LayerId, HopSlotMap};
use std::{
@@ -1961,11 +1962,16 @@ impl HighlightConfiguration {
node_slice
};
- static SHEBANG_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(SHEBANG).unwrap());
+ static SHEBANG_REGEX: Lazy<rope::Regex> =
+ Lazy::new(|| rope::Regex::new(SHEBANG).unwrap());
injection_capture = SHEBANG_REGEX
- .captures(&Cow::from(lines))
- .map(|cap| InjectionLanguageMarker::Shebang(cap[1].to_owned()))
+ .captures_iter(lines.regex_input())
+ .map(|cap| {
+ let cap = lines.byte_slice(cap.get_group(1).unwrap().range());
+ InjectionLanguageMarker::Shebang(cap.into())
+ })
+ .next()
} else if index == self.injection_content_capture_index {
content_node = Some(capture.node);
}
diff --git a/helix-stdx/Cargo.toml b/helix-stdx/Cargo.toml
index 540a1b99..5ac7c011 100644
--- a/helix-stdx/Cargo.toml
+++ b/helix-stdx/Cargo.toml
@@ -16,6 +16,7 @@ dunce = "1.0"
etcetera = "0.8"
ropey = { version = "1.6.1", default-features = false }
which = "6.0"
+regex-cursor = "0.1.3"
[dev-dependencies]
tempfile = "3.10"
diff --git a/helix-stdx/src/rope.rs b/helix-stdx/src/rope.rs
index 4ee39d4a..7b4edda4 100644
--- a/helix-stdx/src/rope.rs
+++ b/helix-stdx/src/rope.rs
@@ -1,11 +1,22 @@
+use std::ops::{Bound, RangeBounds};
+
+pub use regex_cursor::engines::meta::{Builder as RegexBuilder, Regex};
+pub use regex_cursor::regex_automata::util::syntax::Config;
+use regex_cursor::{Input as RegexInput, RopeyCursor};
use ropey::RopeSlice;
-pub trait RopeSliceExt: Sized {
+pub trait RopeSliceExt<'a>: Sized {
fn ends_with(self, text: &str) -> bool;
fn starts_with(self, text: &str) -> bool;
+ fn regex_input(self) -> RegexInput<RopeyCursor<'a>>;
+ fn regex_input_at_bytes<R: RangeBounds<usize>>(
+ self,
+ byte_range: R,
+ ) -> RegexInput<RopeyCursor<'a>>;
+ fn regex_input_at<R: RangeBounds<usize>>(self, char_range: R) -> RegexInput<RopeyCursor<'a>>;
}
-impl RopeSliceExt for RopeSlice<'_> {
+impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
fn ends_with(self, text: &str) -> bool {
let len = self.len_bytes();
if len < text.len() {
@@ -23,4 +34,34 @@ impl RopeSliceExt for RopeSlice<'_> {
self.get_byte_slice(..len - text.len())
.map_or(false, |start| start == text)
}
+
+ fn regex_input(self) -> RegexInput<RopeyCursor<'a>> {
+ RegexInput::new(self)
+ }
+
+ fn regex_input_at<R: RangeBounds<usize>>(self, char_range: R) -> RegexInput<RopeyCursor<'a>> {
+ let start_bound = match char_range.start_bound() {
+ Bound::Included(&val) => Bound::Included(self.char_to_byte(val)),
+ Bound::Excluded(&val) => Bound::Excluded(self.char_to_byte(val)),
+ Bound::Unbounded => Bound::Unbounded,
+ };
+ let end_bound = match char_range.end_bound() {
+ Bound::Included(&val) => Bound::Included(self.char_to_byte(val)),
+ Bound::Excluded(&val) => Bound::Excluded(self.char_to_byte(val)),
+ Bound::Unbounded => Bound::Unbounded,
+ };
+ self.regex_input_at_bytes((start_bound, end_bound))
+ }
+ fn regex_input_at_bytes<R: RangeBounds<usize>>(
+ self,
+ byte_range: R,
+ ) -> RegexInput<RopeyCursor<'a>> {
+ let input = match byte_range.start_bound() {
+ Bound::Included(&pos) | Bound::Excluded(&pos) => {
+ RegexInput::new(RopeyCursor::at(self, pos))
+ }
+ Bound::Unbounded => RegexInput::new(self),
+ };
+ input.range(byte_range)
+ }
}
diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs
index 51a1ede9..fdad31a8 100644
--- a/helix-term/src/commands.rs
+++ b/helix-term/src/commands.rs
@@ -3,6 +3,7 @@ pub(crate) mod lsp;
pub(crate) mod typed;
pub use dap::*;
+use helix_stdx::rope::{self, RopeSliceExt};
use helix_vcs::Hunk;
pub use lsp::*;
use tui::widgets::Row;
@@ -19,7 +20,7 @@ use helix_core::{
match_brackets,
movement::{self, move_vertically_visual, Direction},
object, pos_at_coords,
- regex::{self, Regex, RegexBuilder},
+ regex::{self, Regex},
search::{self, CharMatcher},
selection, shellwords, surround,
syntax::LanguageServerFeature,
@@ -1907,11 +1908,7 @@ fn split_selection(cx: &mut Context) {
fn split_selection_on_newline(cx: &mut Context) {
let (view, doc) = current!(cx.editor);
let text = doc.text().slice(..);
- // only compile the regex once
- #[allow(clippy::trivial_regex)]
- static REGEX: Lazy<Regex> =
- Lazy::new(|| Regex::new(r"\r\n|[\n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}]").unwrap());
- let selection = selection::split_on_matches(text, doc.selection(view.id), &REGEX);
+ let selection = selection::split_on_newline(text, doc.selection(view.id));
doc.set_selection(view.id, selection);
}
@@ -1930,8 +1927,7 @@ fn merge_consecutive_selections(cx: &mut Context) {
#[allow(clippy::too_many_arguments)]
fn search_impl(
editor: &mut Editor,
- contents: &str,
- regex: &Regex,
+ regex: &rope::Regex,
movement: Movement,
direction: Direction,
scrolloff: usize,
@@ -1959,23 +1955,20 @@ fn search_impl(
// do a reverse search and wraparound to the end, we don't need to search
// the text before the current cursor position for matches, but by slicing
// it out, we need to add it back to the position of the selection.
- let mut offset = 0;
+ let doc = doc!(editor).text().slice(..);
// use find_at to find the next match after the cursor, loop around the end
// Careful, `Regex` uses `bytes` as offsets, not character indices!
let mut mat = match direction {
- Direction::Forward => regex.find_at(contents, start),
- Direction::Backward => regex.find_iter(&contents[..start]).last(),
+ Direction::Forward => regex.find(doc.regex_input_at_bytes(start..)),
+ Direction::Backward => regex.find_iter(doc.regex_input_at_bytes(..start)).last(),
};
if mat.is_none() {
if wrap_around {
mat = match direction {
- Direction::Forward => regex.find(contents),
- Direction::Backward => {
- offset = start;
- regex.find_iter(&contents[start..]).last()
- }
+ Direction::Forward => regex.find(doc.regex_input()),
+ Direction::Backward => regex.find_iter(doc.regex_input_at_bytes(start..)).last(),
};
}
if show_warnings {
@@ -1992,8 +1985,8 @@ fn search_impl(
let selection = doc.selection(view.id);
if let Some(mat) = mat {
- let start = text.byte_to_char(mat.start() + offset);
- let end = text.byte_to_char(mat.end() + offset);
+ let start = text.byte_to_char(mat.start());
+ let end = text.byte_to_char(mat.end());
if end == 0 {
// skip empty matches that don't make sense
@@ -2037,13 +2030,7 @@ fn searcher(cx: &mut Context, direction: Direction) {
let scrolloff = config.scrolloff;
let wrap_around = config.search.wrap_around;
- let doc = doc!(cx.editor);
-
// TODO: could probably share with select_on_matches?
-
- // HAXX: sadly we can't avoid allocating a single string for the whole buffer since we can't
- // feed chunks into the regex yet
- let contents = doc.text().slice(..).to_string();
let completions = search_completions(cx, Some(reg));
ui::regex_prompt(
@@ -2065,7 +2052,6 @@ fn searcher(cx: &mut Context, direction: Direction) {
}
search_impl(
cx.editor,
- &contents,
&regex,
Movement::Move,
direction,
@@ -2085,8 +2071,6 @@ fn search_next_or_prev_impl(cx: &mut Context, movement: Movement, direction: Dir
let config = cx.editor.config();
let scrolloff = config.scrolloff;
if let Some(query) = cx.editor.registers.first(register, cx.editor) {
- let doc = doc!(cx.editor);
- let contents = doc.text().slice(..).to_string();
let search_config = &config.search;
let case_insensitive = if search_config.smart_case {
!query.chars().any(char::is_uppercase)
@@ -2094,15 +2078,17 @@ fn search_next_or_prev_impl(cx: &mut Context, movement: Movement, direction: Dir
false
};
let wrap_around = search_config.wrap_around;
- if let Ok(regex) = RegexBuilder::new(&query)
- .case_insensitive(case_insensitive)
- .multi_line(true)
- .build()
+ if let Ok(regex) = rope::RegexBuilder::new()
+ .syntax(
+ rope::Config::new()
+ .case_insensitive(case_insensitive)
+ .multi_line(true),
+ )
+ .build(&query)
{
for _ in 0..count {
search_impl(
cx.editor,
- &contents,
&regex,
movement,
direction,
@@ -2239,7 +2225,7 @@ fn global_search(cx: &mut Context) {
let reg = cx.register.unwrap_or('/');
let completions = search_completions(cx, Some(reg));
- ui::regex_prompt(
+ ui::raw_regex_prompt(
cx,
"global-search:".into(),
Some(reg),
@@ -2250,7 +2236,7 @@ fn global_search(cx: &mut Context) {
.map(|comp| (0.., std::borrow::Cow::Owned(comp.clone())))
.collect()
},
- move |cx, regex, event| {
+ move |cx, _, input, event| {
if event != PromptEvent::Validate {
return;
}
@@ -2265,7 +2251,7 @@ fn global_search(cx: &mut Context) {
if let Ok(matcher) = RegexMatcherBuilder::new()
.case_smart(smart_case)
- .build(regex.as_str())
+ .build(input)
{
let search_root = helix_stdx::env::current_working_dir();
if !search_root.exists() {
diff --git a/helix-term/src/ui/mod.rs b/helix-term/src/ui/mod.rs
index 0873116c..a4b148af 100644
--- a/helix-term/src/ui/mod.rs
+++ b/helix-term/src/ui/mod.rs
@@ -18,6 +18,7 @@ use crate::filter_picker_entry;
use crate::job::{self, Callback};
pub use completion::{Completion, CompletionItem};
pub use editor::EditorView;
+use helix_stdx::rope;
pub use markdown::Markdown;
pub use menu::Menu;
pub use picker::{DynamicPicker, FileLocation, Picker};
@@ -26,8 +27,6 @@ pub use prompt::{Prompt, PromptEvent};
pub use spinner::{ProgressSpinners, Spinner};
pub use text::Text;
-use helix_core::regex::Regex;
-use helix_core::regex::RegexBuilder;
use helix_view::Editor;
use std::path::PathBuf;
@@ -63,7 +62,22 @@ pub fn regex_prompt(
prompt: std::borrow::Cow<'static, str>,
history_register: Option<char>,
completion_fn: impl FnMut(&Editor, &str) -> Vec<prompt::Completion> + 'static,
- fun: impl Fn(&mut crate::compositor::Context, Regex, PromptEvent) + 'static,
+ fun: impl Fn(&mut crate::compositor::Context, rope::Regex, PromptEvent) + 'static,
+) {
+ raw_regex_prompt(
+ cx,
+ prompt,
+ history_register,
+ completion_fn,
+ move |cx, regex, _, event| fun(cx, regex, event),
+ );
+}
+pub fn raw_regex_prompt(
+ cx: &mut crate::commands::Context,
+ prompt: std::borrow::Cow<'static, str>,
+ history_register: Option<char>,
+ completion_fn: impl FnMut(&Editor, &str) -> Vec<prompt::Completion> + 'static,
+ fun: impl Fn(&mut crate::compositor::Context, rope::Regex, &str, PromptEvent) + 'static,
) {
let (view, doc) = current!(cx.editor);
let doc_id = view.doc;
@@ -94,10 +108,13 @@ pub fn regex_prompt(
false
};
- match RegexBuilder::new(input)
- .case_insensitive(case_insensitive)
- .multi_line(true)
- .build()
+ match rope::RegexBuilder::new()
+ .syntax(
+ rope::Config::new()
+ .case_insensitive(case_insensitive)
+ .multi_line(true),
+ )
+ .build(input)
{
Ok(regex) => {
let (view, doc) = current!(cx.editor);
@@ -110,7 +127,7 @@ pub fn regex_prompt(
view.jumps.push((doc_id, snapshot.clone()));
}
- fun(cx, regex, event);
+ fun(cx, regex, input, event);
let (view, doc) = current!(cx.editor);
view.ensure_cursor_in_view(doc, config.scrolloff);