diff options
author | Blaž Hrastnik | 2021-02-24 08:12:44 +0000 |
---|---|---|
committer | Blaž Hrastnik | 2021-02-24 08:12:44 +0000 |
commit | 6bd16a73209579e8402f1a2cde0ce6813f08e6fc (patch) | |
tree | f765fb89cea4abdae602db68f74af3de6bdc5850 | |
parent | f118e7580f823aec9cb9801d4149f67d009394a4 (diff) |
graphemes: Optimize nth_next/nth_prev operation.
It's used a lot more than it used to in position calculation. Instead of
throwing away state between boundary calculation, reuse it.
-rw-r--r-- | helix-core/src/graphemes.rs | 114 |
1 files changed, 54 insertions, 60 deletions
diff --git a/helix-core/src/graphemes.rs b/helix-core/src/graphemes.rs index 7e5424be..786f6a68 100644 --- a/helix-core/src/graphemes.rs +++ b/helix-core/src/graphemes.rs @@ -27,23 +27,11 @@ pub fn grapheme_width(g: &str) -> usize { } pub fn nth_prev_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize { - // TODO: implement this more efficiently. This has to do a lot of - // re-scanning of rope chunks. Probably move the main implementation here, - // and have prev_grapheme_boundary call this instead. - let mut char_idx = char_idx; - for _ in 0..n { - char_idx = prev_grapheme_boundary(slice, char_idx); - } - char_idx -} - -/// Finds the previous grapheme boundary before the given char position. -pub fn prev_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize { // Bounds check debug_assert!(char_idx <= slice.len_chars()); // We work with bytes for this, so convert. - let byte_idx = slice.char_to_byte(char_idx); + let mut byte_idx = slice.char_to_byte(char_idx); // Get the chunk with our byte index in it. let (mut chunk, mut chunk_byte_idx, mut chunk_char_idx, _) = slice.chunk_at_byte(byte_idx); @@ -52,46 +40,43 @@ pub fn prev_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize { let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true); // Find the previous grapheme cluster boundary. - loop { - match gc.prev_boundary(chunk, chunk_byte_idx) { - Ok(None) => return 0, - Ok(Some(n)) => { - let tmp = byte_to_char_idx(chunk, n - chunk_byte_idx); - return chunk_char_idx + tmp; - } - Err(GraphemeIncomplete::PrevChunk) => { - let (a, b, c, _) = slice.chunk_at_byte(chunk_byte_idx - 1); - chunk = a; - chunk_byte_idx = b; - chunk_char_idx = c; - } - Err(GraphemeIncomplete::PreContext(n)) => { - let ctx_chunk = slice.chunk_at_byte(n - 1).0; - gc.provide_context(ctx_chunk, n - ctx_chunk.len()); + for _ in 0..n { + loop { + match gc.prev_boundary(chunk, chunk_byte_idx) { + Ok(None) => return 0, + Ok(Some(n)) => { + byte_idx = n; + break; + } + Err(GraphemeIncomplete::PrevChunk) => { + let (a, b, c, _) = slice.chunk_at_byte(chunk_byte_idx - 1); + chunk = a; + chunk_byte_idx = b; + chunk_char_idx = c; + } + Err(GraphemeIncomplete::PreContext(n)) => { + let ctx_chunk = slice.chunk_at_byte(n - 1).0; + gc.provide_context(ctx_chunk, n - ctx_chunk.len()); + } + _ => unreachable!(), } - _ => unreachable!(), } } + let tmp = byte_to_char_idx(chunk, byte_idx + chunk_byte_idx); + chunk_char_idx + tmp } -pub fn nth_next_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize { - // TODO: implement this more efficiently. This has to do a lot of - // re-scanning of rope chunks. Probably move the main implementation here, - // and have next_grapheme_boundary call this instead. - let mut char_idx = char_idx; - for _ in 0..n { - char_idx = next_grapheme_boundary(slice, char_idx); - } - char_idx +/// Finds the previous grapheme boundary before the given char position. +pub fn prev_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize { + nth_prev_grapheme_boundary(slice, char_idx, 1) } -/// Finds the next grapheme boundary after the given char position. -pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize { +pub fn nth_next_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize { // Bounds check debug_assert!(char_idx <= slice.len_chars()); // We work with bytes for this, so convert. - let byte_idx = slice.char_to_byte(char_idx); + let mut byte_idx = slice.char_to_byte(char_idx); // Get the chunk with our byte index in it. let (mut chunk, mut chunk_byte_idx, mut chunk_char_idx, _) = slice.chunk_at_byte(byte_idx); @@ -99,27 +84,36 @@ pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize { // Set up the grapheme cursor. let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true); - // Find the next grapheme cluster boundary. - loop { - match gc.next_boundary(chunk, chunk_byte_idx) { - Ok(None) => return slice.len_chars(), - Ok(Some(n)) => { - let tmp = byte_to_char_idx(chunk, n - chunk_byte_idx); - return chunk_char_idx + tmp; - } - Err(GraphemeIncomplete::NextChunk) => { - chunk_byte_idx += chunk.len(); - let (a, _, c, _) = slice.chunk_at_byte(chunk_byte_idx); - chunk = a; - chunk_char_idx = c; - } - Err(GraphemeIncomplete::PreContext(n)) => { - let ctx_chunk = slice.chunk_at_byte(n - 1).0; - gc.provide_context(ctx_chunk, n - ctx_chunk.len()); + // Find the nth next grapheme cluster boundary. + for _ in 0..n { + loop { + match gc.next_boundary(chunk, chunk_byte_idx) { + Ok(None) => return slice.len_chars(), + Ok(Some(n)) => { + byte_idx = n; + break; + } + Err(GraphemeIncomplete::NextChunk) => { + chunk_byte_idx += chunk.len(); + let (a, _, c, _) = slice.chunk_at_byte(chunk_byte_idx); + chunk = a; + chunk_char_idx = c; + } + Err(GraphemeIncomplete::PreContext(n)) => { + let ctx_chunk = slice.chunk_at_byte(n - 1).0; + gc.provide_context(ctx_chunk, n - ctx_chunk.len()); + } + _ => unreachable!(), } - _ => unreachable!(), } } + let tmp = byte_to_char_idx(chunk, byte_idx - chunk_byte_idx); + chunk_char_idx + tmp +} + +/// Finds the next grapheme boundary after the given char position. +pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize { + nth_next_grapheme_boundary(slice, char_idx, 1) } /// Returns whether the given char position is a grapheme boundary. |