From 11c3ba935041437d11e1105dfa37480f2d27a6c4 Mon Sep 17 00:00:00 2001 From: Blaž Hrastnik Date: Wed, 12 Jan 2022 10:22:16 +0900 Subject: Speed up ensure_next_boundary during render This code: let start = ensure_grapheme_boundary_next(text, text.byte_to_char(start)); let end = ensure_grapheme_boundary_next(text, text.byte_to_char(end)); Would convert byte to char index, but then internally immediately convert back to byte index, operate on it, then convert it to char index. This change reduces the amount of time spent in ensure_grapheme_boundary from 29% to 2%. --- helix-core/src/graphemes.rs | 56 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'helix-core/src/graphemes.rs') diff --git a/helix-core/src/graphemes.rs b/helix-core/src/graphemes.rs index c6398875..a6c74f65 100644 --- a/helix-core/src/graphemes.rs +++ b/helix-core/src/graphemes.rs @@ -120,6 +120,43 @@ pub fn nth_next_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) - chunk_char_idx + tmp } +#[must_use] +pub fn nth_next_grapheme_boundary_byte(slice: RopeSlice, mut byte_idx: usize, n: usize) -> usize { + // Bounds check + debug_assert!(byte_idx <= slice.len_bytes()); + + // Get the chunk with our byte index in it. + let (mut chunk, mut chunk_byte_idx, mut _chunk_char_idx, _) = slice.chunk_at_byte(byte_idx); + + // Set up the grapheme cursor. + let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true); + + // Find the nth next grapheme cluster boundary. + for _ in 0..n { + loop { + match gc.next_boundary(chunk, chunk_byte_idx) { + Ok(None) => return slice.len_bytes(), + Ok(Some(n)) => { + byte_idx = n; + break; + } + Err(GraphemeIncomplete::NextChunk) => { + chunk_byte_idx += chunk.len(); + let (a, _, _c, _) = slice.chunk_at_byte(chunk_byte_idx); + chunk = a; + // chunk_char_idx = c; + } + Err(GraphemeIncomplete::PreContext(n)) => { + let ctx_chunk = slice.chunk_at_byte(n - 1).0; + gc.provide_context(ctx_chunk, n - ctx_chunk.len()); + } + _ => unreachable!(), + } + } + } + byte_idx +} + /// Finds the next grapheme boundary after the given char position. #[must_use] #[inline(always)] @@ -127,6 +164,13 @@ pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize { nth_next_grapheme_boundary(slice, char_idx, 1) } +/// Finds the next grapheme boundary after the given byte position. +#[must_use] +#[inline(always)] +pub fn next_grapheme_boundary_byte(slice: RopeSlice, byte_idx: usize) -> usize { + nth_next_grapheme_boundary_byte(slice, byte_idx, 1) +} + /// Returns the passed char index if it's already a grapheme boundary, /// or the next grapheme boundary char index if not. #[must_use] @@ -151,6 +195,18 @@ pub fn ensure_grapheme_boundary_prev(slice: RopeSlice, char_idx: usize) -> usize } } +/// Returns the passed byte index if it's already a grapheme boundary, +/// or the next grapheme boundary byte index if not. +#[must_use] +#[inline] +pub fn ensure_grapheme_boundary_next_byte(slice: RopeSlice, byte_idx: usize) -> usize { + if byte_idx == 0 { + byte_idx + } else { + next_grapheme_boundary_byte(slice, byte_idx - 1) + } +} + /// Returns whether the given char position is a grapheme boundary. #[must_use] pub fn is_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> bool { -- cgit v1.2.3-70-g09d2 From 2302869836eedf6bba718815fbff3a5fd7afee36 Mon Sep 17 00:00:00 2001 From: Blaž Hrastnik Date: Fri, 14 Jan 2022 12:13:53 +0900 Subject: fix: ensure_grapheme_boundary_next_byte needs to index at valid char --- helix-core/src/graphemes.rs | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) (limited to 'helix-core/src/graphemes.rs') diff --git a/helix-core/src/graphemes.rs b/helix-core/src/graphemes.rs index a6c74f65..aa898684 100644 --- a/helix-core/src/graphemes.rs +++ b/helix-core/src/graphemes.rs @@ -203,7 +203,12 @@ pub fn ensure_grapheme_boundary_next_byte(slice: RopeSlice, byte_idx: usize) -> if byte_idx == 0 { byte_idx } else { - next_grapheme_boundary_byte(slice, byte_idx - 1) + // TODO: optimize so we're not constructing grapheme cursor twice + if is_grapheme_boundary_byte(slice, byte_idx) { + byte_idx + } else { + next_grapheme_boundary_byte(slice, byte_idx) + } } } @@ -235,6 +240,31 @@ pub fn is_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> bool { } } +/// Returns whether the given byte position is a grapheme boundary. +#[must_use] +pub fn is_grapheme_boundary_byte(slice: RopeSlice, byte_idx: usize) -> bool { + // Bounds check + debug_assert!(byte_idx <= slice.len_bytes()); + + // Get the chunk with our byte index in it. + let (chunk, chunk_byte_idx, _, _) = slice.chunk_at_byte(byte_idx); + + // Set up the grapheme cursor. + let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true); + + // Determine if the given position is a grapheme cluster boundary. + loop { + match gc.is_boundary(chunk, chunk_byte_idx) { + Ok(n) => return n, + Err(GraphemeIncomplete::PreContext(n)) => { + let (ctx_chunk, ctx_byte_start, _, _) = slice.chunk_at_byte(n - 1); + gc.provide_context(ctx_chunk, ctx_byte_start); + } + Err(_) => unreachable!(), + } + } +} + /// An iterator over the graphemes of a `RopeSlice`. #[derive(Clone)] pub struct RopeGraphemes<'a> { -- cgit v1.2.3-70-g09d2