aboutsummaryrefslogtreecommitdiff
path: root/helix-core/src
diff options
context:
space:
mode:
authorPascal Kuthe2024-02-26 07:45:20 +0000
committerGitHub2024-02-26 07:45:20 +0000
commitcd02976fa3a55c2c1f01b95c40d178061968f797 (patch)
tree707df0a4fb93fa7c8773ba59a85259835deb166e /helix-core/src
parentc68ec92c5e1bd3a2bf402fb583de23693f59b722 (diff)
switch to regex-cursor (#9422)
Diffstat (limited to 'helix-core/src')
-rw-r--r--helix-core/src/selection.rs96
-rw-r--r--helix-core/src/syntax.rs12
2 files changed, 69 insertions, 39 deletions
diff --git a/helix-core/src/selection.rs b/helix-core/src/selection.rs
index c44685ee..91f1d0de 100644
--- a/helix-core/src/selection.rs
+++ b/helix-core/src/selection.rs
@@ -7,9 +7,11 @@ use crate::{
ensure_grapheme_boundary_next, ensure_grapheme_boundary_prev, next_grapheme_boundary,
prev_grapheme_boundary,
},
+ line_ending::get_line_ending,
movement::Direction,
Assoc, ChangeSet, RopeGraphemes, RopeSlice,
};
+use helix_stdx::rope::{self, RopeSliceExt};
use smallvec::{smallvec, SmallVec};
use std::borrow::Cow;
@@ -708,12 +710,12 @@ impl IntoIterator for Selection {
pub fn keep_or_remove_matches(
text: RopeSlice,
selection: &Selection,
- regex: &crate::regex::Regex,
+ regex: &rope::Regex,
remove: bool,
) -> Option<Selection> {
let result: SmallVec<_> = selection
.iter()
- .filter(|range| regex.is_match(&range.fragment(text)) ^ remove)
+ .filter(|range| regex.is_match(text.regex_input_at(range.from()..range.to())) ^ remove)
.copied()
.collect();
@@ -724,25 +726,20 @@ pub fn keep_or_remove_matches(
None
}
+// TODO: support to split on capture #N instead of whole match
pub fn select_on_matches(
text: RopeSlice,
selection: &Selection,
- regex: &crate::regex::Regex,
+ regex: &rope::Regex,
) -> Option<Selection> {
let mut result = SmallVec::with_capacity(selection.len());
for sel in selection {
- // TODO: can't avoid occasional allocations since Regex can't operate on chunks yet
- let fragment = sel.fragment(text);
-
- let sel_start = sel.from();
- let start_byte = text.char_to_byte(sel_start);
-
- for mat in regex.find_iter(&fragment) {
+ for mat in regex.find_iter(text.regex_input_at(sel.from()..sel.to())) {
// TODO: retain range direction
- let start = text.byte_to_char(start_byte + mat.start());
- let end = text.byte_to_char(start_byte + mat.end());
+ let start = text.byte_to_char(mat.start());
+ let end = text.byte_to_char(mat.end());
let range = Range::new(start, end);
// Make sure the match is not right outside of the selection.
@@ -761,12 +758,7 @@ pub fn select_on_matches(
None
}
-// TODO: support to split on capture #N instead of whole match
-pub fn split_on_matches(
- text: RopeSlice,
- selection: &Selection,
- regex: &crate::regex::Regex,
-) -> Selection {
+pub fn split_on_newline(text: RopeSlice, selection: &Selection) -> Selection {
let mut result = SmallVec::with_capacity(selection.len());
for sel in selection {
@@ -776,21 +768,47 @@ pub fn split_on_matches(
continue;
}
- // TODO: can't avoid occasional allocations since Regex can't operate on chunks yet
- let fragment = sel.fragment(text);
-
let sel_start = sel.from();
let sel_end = sel.to();
- let start_byte = text.char_to_byte(sel_start);
+ let mut start = sel_start;
+ for mat in sel.slice(text).lines() {
+ let len = mat.len_chars();
+ let line_end_len = get_line_ending(&mat).map(|le| le.len_chars()).unwrap_or(0);
+ // TODO: retain range direction
+ result.push(Range::new(start, start + len - line_end_len));
+ start += len;
+ }
+
+ if start < sel_end {
+ result.push(Range::new(start, sel_end));
+ }
+ }
+
+ // TODO: figure out a new primary index
+ Selection::new(result, 0)
+}
+
+pub fn split_on_matches(text: RopeSlice, selection: &Selection, regex: &rope::Regex) -> Selection {
+ let mut result = SmallVec::with_capacity(selection.len());
+
+ for sel in selection {
+ // Special case: zero-width selection.
+ if sel.from() == sel.to() {
+ result.push(*sel);
+ continue;
+ }
+
+ let sel_start = sel.from();
+ let sel_end = sel.to();
let mut start = sel_start;
- for mat in regex.find_iter(&fragment) {
+ for mat in regex.find_iter(text.regex_input_at(sel_start..sel_end)) {
// TODO: retain range direction
- let end = text.byte_to_char(start_byte + mat.start());
+ let end = text.byte_to_char(mat.start());
result.push(Range::new(start, end));
- start = text.byte_to_char(start_byte + mat.end());
+ start = text.byte_to_char(mat.end());
}
if start < sel_end {
@@ -1021,14 +1039,12 @@ mod test {
#[test]
fn test_select_on_matches() {
- use crate::regex::{Regex, RegexBuilder};
-
let r = Rope::from_str("Nobody expects the Spanish inquisition");
let s = r.slice(..);
let selection = Selection::single(0, r.len_chars());
assert_eq!(
- select_on_matches(s, &selection, &Regex::new(r"[A-Z][a-z]*").unwrap()),
+ select_on_matches(s, &selection, &rope::Regex::new(r"[A-Z][a-z]*").unwrap()),
Some(Selection::new(
smallvec![Range::new(0, 6), Range::new(19, 26)],
0
@@ -1038,8 +1054,14 @@ mod test {
let r = Rope::from_str("This\nString\n\ncontains multiple\nlines");
let s = r.slice(..);
- let start_of_line = RegexBuilder::new(r"^").multi_line(true).build().unwrap();
- let end_of_line = RegexBuilder::new(r"$").multi_line(true).build().unwrap();
+ let start_of_line = rope::RegexBuilder::new()
+ .syntax(rope::Config::new().multi_line(true))
+ .build(r"^")
+ .unwrap();
+ let end_of_line = rope::RegexBuilder::new()
+ .syntax(rope::Config::new().multi_line(true))
+ .build(r"$")
+ .unwrap();
// line without ending
assert_eq!(
@@ -1077,9 +1099,9 @@ mod test {
select_on_matches(
s,
&Selection::single(0, s.len_chars()),
- &RegexBuilder::new(r"^[a-z ]*$")
- .multi_line(true)
- .build()
+ &rope::RegexBuilder::new()
+ .syntax(rope::Config::new().multi_line(true))
+ .build(r"^[a-z ]*$")
.unwrap()
),
Some(Selection::new(
@@ -1171,13 +1193,15 @@ mod test {
#[test]
fn test_split_on_matches() {
- use crate::regex::Regex;
-
let text = Rope::from(" abcd efg wrs xyz 123 456");
let selection = Selection::new(smallvec![Range::new(0, 9), Range::new(11, 20),], 0);
- let result = split_on_matches(text.slice(..), &selection, &Regex::new(r"\s+").unwrap());
+ let result = split_on_matches(
+ text.slice(..),
+ &selection,
+ &rope::Regex::new(r"\s+").unwrap(),
+ );
assert_eq!(
result.ranges(),
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index a9344448..0d8559ca 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -12,6 +12,7 @@ use arc_swap::{ArcSwap, Guard};
use bitflags::bitflags;
use globset::GlobSet;
use hashbrown::raw::RawTable;
+use helix_stdx::rope::{self, RopeSliceExt};
use slotmap::{DefaultKey as LayerId, HopSlotMap};
use std::{
@@ -1961,11 +1962,16 @@ impl HighlightConfiguration {
node_slice
};
- static SHEBANG_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(SHEBANG).unwrap());
+ static SHEBANG_REGEX: Lazy<rope::Regex> =
+ Lazy::new(|| rope::Regex::new(SHEBANG).unwrap());
injection_capture = SHEBANG_REGEX
- .captures(&Cow::from(lines))
- .map(|cap| InjectionLanguageMarker::Shebang(cap[1].to_owned()))
+ .captures_iter(lines.regex_input())
+ .map(|cap| {
+ let cap = lines.byte_slice(cap.get_group(1).unwrap().range());
+ InjectionLanguageMarker::Shebang(cap.into())
+ })
+ .next()
} else if index == self.injection_content_capture_index {
content_node = Some(capture.node);
}