aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBlaž Hrastnik2021-02-24 08:12:44 +0000
committerBlaž Hrastnik2021-02-24 08:12:44 +0000
commit6bd16a73209579e8402f1a2cde0ce6813f08e6fc (patch)
treef765fb89cea4abdae602db68f74af3de6bdc5850
parentf118e7580f823aec9cb9801d4149f67d009394a4 (diff)
graphemes: Optimize nth_next/nth_prev operation.
It's used a lot more than it used to in position calculation. Instead of throwing away state between boundary calculation, reuse it.
-rw-r--r--helix-core/src/graphemes.rs114
1 files changed, 54 insertions, 60 deletions
diff --git a/helix-core/src/graphemes.rs b/helix-core/src/graphemes.rs
index 7e5424be..786f6a68 100644
--- a/helix-core/src/graphemes.rs
+++ b/helix-core/src/graphemes.rs
@@ -27,23 +27,11 @@ pub fn grapheme_width(g: &str) -> usize {
}
pub fn nth_prev_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize {
- // TODO: implement this more efficiently. This has to do a lot of
- // re-scanning of rope chunks. Probably move the main implementation here,
- // and have prev_grapheme_boundary call this instead.
- let mut char_idx = char_idx;
- for _ in 0..n {
- char_idx = prev_grapheme_boundary(slice, char_idx);
- }
- char_idx
-}
-
-/// Finds the previous grapheme boundary before the given char position.
-pub fn prev_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
// Bounds check
debug_assert!(char_idx <= slice.len_chars());
// We work with bytes for this, so convert.
- let byte_idx = slice.char_to_byte(char_idx);
+ let mut byte_idx = slice.char_to_byte(char_idx);
// Get the chunk with our byte index in it.
let (mut chunk, mut chunk_byte_idx, mut chunk_char_idx, _) = slice.chunk_at_byte(byte_idx);
@@ -52,46 +40,43 @@ pub fn prev_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true);
// Find the previous grapheme cluster boundary.
- loop {
- match gc.prev_boundary(chunk, chunk_byte_idx) {
- Ok(None) => return 0,
- Ok(Some(n)) => {
- let tmp = byte_to_char_idx(chunk, n - chunk_byte_idx);
- return chunk_char_idx + tmp;
- }
- Err(GraphemeIncomplete::PrevChunk) => {
- let (a, b, c, _) = slice.chunk_at_byte(chunk_byte_idx - 1);
- chunk = a;
- chunk_byte_idx = b;
- chunk_char_idx = c;
- }
- Err(GraphemeIncomplete::PreContext(n)) => {
- let ctx_chunk = slice.chunk_at_byte(n - 1).0;
- gc.provide_context(ctx_chunk, n - ctx_chunk.len());
+ for _ in 0..n {
+ loop {
+ match gc.prev_boundary(chunk, chunk_byte_idx) {
+ Ok(None) => return 0,
+ Ok(Some(n)) => {
+ byte_idx = n;
+ break;
+ }
+ Err(GraphemeIncomplete::PrevChunk) => {
+ let (a, b, c, _) = slice.chunk_at_byte(chunk_byte_idx - 1);
+ chunk = a;
+ chunk_byte_idx = b;
+ chunk_char_idx = c;
+ }
+ Err(GraphemeIncomplete::PreContext(n)) => {
+ let ctx_chunk = slice.chunk_at_byte(n - 1).0;
+ gc.provide_context(ctx_chunk, n - ctx_chunk.len());
+ }
+ _ => unreachable!(),
}
- _ => unreachable!(),
}
}
+ let tmp = byte_to_char_idx(chunk, byte_idx + chunk_byte_idx);
+ chunk_char_idx + tmp
}
-pub fn nth_next_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize {
- // TODO: implement this more efficiently. This has to do a lot of
- // re-scanning of rope chunks. Probably move the main implementation here,
- // and have next_grapheme_boundary call this instead.
- let mut char_idx = char_idx;
- for _ in 0..n {
- char_idx = next_grapheme_boundary(slice, char_idx);
- }
- char_idx
+/// Finds the previous grapheme boundary before the given char position.
+pub fn prev_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
+ nth_prev_grapheme_boundary(slice, char_idx, 1)
}
-/// Finds the next grapheme boundary after the given char position.
-pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
+pub fn nth_next_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize {
// Bounds check
debug_assert!(char_idx <= slice.len_chars());
// We work with bytes for this, so convert.
- let byte_idx = slice.char_to_byte(char_idx);
+ let mut byte_idx = slice.char_to_byte(char_idx);
// Get the chunk with our byte index in it.
let (mut chunk, mut chunk_byte_idx, mut chunk_char_idx, _) = slice.chunk_at_byte(byte_idx);
@@ -99,27 +84,36 @@ pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
// Set up the grapheme cursor.
let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true);
- // Find the next grapheme cluster boundary.
- loop {
- match gc.next_boundary(chunk, chunk_byte_idx) {
- Ok(None) => return slice.len_chars(),
- Ok(Some(n)) => {
- let tmp = byte_to_char_idx(chunk, n - chunk_byte_idx);
- return chunk_char_idx + tmp;
- }
- Err(GraphemeIncomplete::NextChunk) => {
- chunk_byte_idx += chunk.len();
- let (a, _, c, _) = slice.chunk_at_byte(chunk_byte_idx);
- chunk = a;
- chunk_char_idx = c;
- }
- Err(GraphemeIncomplete::PreContext(n)) => {
- let ctx_chunk = slice.chunk_at_byte(n - 1).0;
- gc.provide_context(ctx_chunk, n - ctx_chunk.len());
+ // Find the nth next grapheme cluster boundary.
+ for _ in 0..n {
+ loop {
+ match gc.next_boundary(chunk, chunk_byte_idx) {
+ Ok(None) => return slice.len_chars(),
+ Ok(Some(n)) => {
+ byte_idx = n;
+ break;
+ }
+ Err(GraphemeIncomplete::NextChunk) => {
+ chunk_byte_idx += chunk.len();
+ let (a, _, c, _) = slice.chunk_at_byte(chunk_byte_idx);
+ chunk = a;
+ chunk_char_idx = c;
+ }
+ Err(GraphemeIncomplete::PreContext(n)) => {
+ let ctx_chunk = slice.chunk_at_byte(n - 1).0;
+ gc.provide_context(ctx_chunk, n - ctx_chunk.len());
+ }
+ _ => unreachable!(),
}
- _ => unreachable!(),
}
}
+ let tmp = byte_to_char_idx(chunk, byte_idx - chunk_byte_idx);
+ chunk_char_idx + tmp
+}
+
+/// Finds the next grapheme boundary after the given char position.
+pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
+ nth_next_grapheme_boundary(slice, char_idx, 1)
}
/// Returns whether the given char position is a grapheme boundary.