diff options
Diffstat (limited to 'helix-core/src')
-rw-r--r-- | helix-core/src/comment.rs | 6 | ||||
-rw-r--r-- | helix-core/src/graphemes.rs | 23 | ||||
-rw-r--r-- | helix-core/src/line_ending.rs | 7 | ||||
-rw-r--r-- | helix-core/src/match_brackets.rs | 7 | ||||
-rw-r--r-- | helix-core/src/movement.rs | 748 | ||||
-rw-r--r-- | helix-core/src/object.rs | 2 | ||||
-rw-r--r-- | helix-core/src/position.rs | 144 | ||||
-rw-r--r-- | helix-core/src/search.rs | 38 | ||||
-rw-r--r-- | helix-core/src/selection.rs | 558 | ||||
-rw-r--r-- | helix-core/src/surround.rs | 75 | ||||
-rw-r--r-- | helix-core/src/syntax.rs | 18 | ||||
-rw-r--r-- | helix-core/src/textobject.rs | 261 |
12 files changed, 1174 insertions, 713 deletions
diff --git a/helix-core/src/comment.rs b/helix-core/src/comment.rs index fadd88e0..6fc1234d 100644 --- a/helix-core/src/comment.rs +++ b/helix-core/src/comment.rs @@ -64,8 +64,10 @@ pub fn toggle_line_comments(doc: &Rope, selection: &Selection, token: Option<&st let mut min_next_line = 0; for selection in selection { - let start = text.char_to_line(selection.from()).max(min_next_line); - let end = text.char_to_line(selection.to()) + 1; + let (start, end) = selection.line_range(text); + let start = start.max(min_next_line).min(text.len_lines()); + let end = (end + 1).min(text.len_lines()); + lines.extend(start..end); min_next_line = end + 1; } diff --git a/helix-core/src/graphemes.rs b/helix-core/src/graphemes.rs index f71b6d5f..0465fe51 100644 --- a/helix-core/src/graphemes.rs +++ b/helix-core/src/graphemes.rs @@ -71,6 +71,8 @@ pub fn nth_prev_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) - } /// Finds the previous grapheme boundary before the given char position. +#[must_use] +#[inline(always)] pub fn prev_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize { nth_prev_grapheme_boundary(slice, char_idx, 1) } @@ -117,21 +119,38 @@ pub fn nth_next_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) - } /// Finds the next grapheme boundary after the given char position. +#[must_use] +#[inline(always)] pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize { nth_next_grapheme_boundary(slice, char_idx, 1) } /// Returns the passed char index if it's already a grapheme boundary, /// or the next grapheme boundary char index if not. -pub fn ensure_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize { +#[must_use] +#[inline] +pub fn ensure_grapheme_boundary_next(slice: RopeSlice, char_idx: usize) -> usize { if char_idx == 0 { - 0 + char_idx } else { next_grapheme_boundary(slice, char_idx - 1) } } +/// Returns the passed char index if it's already a grapheme boundary, +/// or the prev grapheme boundary char index if not. +#[must_use] +#[inline] +pub fn ensure_grapheme_boundary_prev(slice: RopeSlice, char_idx: usize) -> usize { + if char_idx == slice.len_chars() { + char_idx + } else { + prev_grapheme_boundary(slice, char_idx + 1) + } +} + /// Returns whether the given char position is a grapheme boundary. +#[must_use] pub fn is_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> bool { // Bounds check debug_assert!(char_idx <= slice.len_chars()); diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index e3ff6478..18ea5f9f 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -159,6 +159,13 @@ pub fn line_end_char_index(slice: &RopeSlice, line: usize) -> usize { .unwrap_or(0) } +/// Fetches line `line_idx` from the passed rope slice, sans any line ending. +pub fn line_without_line_ending<'a>(slice: &'a RopeSlice, line_idx: usize) -> RopeSlice<'a> { + let start = slice.line_to_char(line_idx); + let end = line_end_char_index(slice, line_idx); + slice.slice(start..end) +} + /// Returns the char index of the end of the given RopeSlice, not including /// any final line ending. pub fn rope_end_without_line_ending(slice: &RopeSlice) -> usize { diff --git a/helix-core/src/match_brackets.rs b/helix-core/src/match_brackets.rs index 2aa87620..f3d9e845 100644 --- a/helix-core/src/match_brackets.rs +++ b/helix-core/src/match_brackets.rs @@ -24,12 +24,13 @@ pub fn find(syntax: &Syntax, doc: &Rope, pos: usize) -> Option<usize> { return None; } - let start_byte = node.start_byte(); let len = doc.len_bytes(); - if start_byte >= len { + let start_byte = node.start_byte(); + let end_byte = node.end_byte() - 1; // it's end exclusive + if start_byte >= len || end_byte >= len { return None; } - let end_byte = node.end_byte() - 1; // it's end exclusive + let start_char = doc.byte_to_char(start_byte); let end_char = doc.byte_to_char(end_byte); diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs index f9e5deb4..4af82a1d 100644 --- a/helix-core/src/movement.rs +++ b/helix-core/src/movement.rs @@ -1,12 +1,14 @@ -use std::iter::{self, from_fn}; +use std::iter; use ropey::iter::Chars; use crate::{ chars::{categorize_char, char_is_line_ending, CharCategory}, coords_at_pos, - graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary}, - line_ending::{get_line_ending, line_end_char_index}, + graphemes::{ + next_grapheme_boundary, nth_next_grapheme_boundary, nth_prev_grapheme_boundary, + prev_grapheme_boundary, + }, pos_at_coords, Position, Range, RopeSlice, }; @@ -29,25 +31,17 @@ pub fn move_horizontally( count: usize, behaviour: Movement, ) -> Range { - let pos = range.head; - let line = slice.char_to_line(pos); - // TODO: we can optimize clamping by passing in RopeSlice limited to current line. that way - // we stop calculating past start/end of line. - let pos = match dir { - Direction::Backward => { - let start = slice.line_to_char(line); - nth_prev_grapheme_boundary(slice, pos, count).max(start) - } - Direction::Forward => { - let end_char_idx = line_end_char_index(&slice, line); - nth_next_grapheme_boundary(slice, pos, count).min(end_char_idx) - } - }; - let anchor = match behaviour { - Movement::Extend => range.anchor, - Movement::Move => pos, + let pos = range.cursor(slice); + + // Compute the new position. + let new_pos = if dir == Direction::Backward { + nth_prev_grapheme_boundary(slice, pos, count) + } else { + nth_next_grapheme_boundary(slice, pos, count) }; - Range::new(anchor, pos) + + // Compute the final new range. + range.put_cursor(slice, new_pos, behaviour == Movement::Extend) } pub fn move_vertically( @@ -57,36 +51,34 @@ pub fn move_vertically( count: usize, behaviour: Movement, ) -> Range { - let Position { row, col } = coords_at_pos(slice, range.head); + let pos = range.cursor(slice); + // Compute the current position's 2d coordinates. + let Position { row, col } = coords_at_pos(slice, pos); let horiz = range.horiz.unwrap_or(col as u32); - let new_line = match dir { - Direction::Backward => row.saturating_sub(count), - Direction::Forward => std::cmp::min( - row.saturating_add(count), - slice.len_lines().saturating_sub(2), - ), - }; - - // Length of the line sans line-ending. - let new_line_len = { - let line = slice.line(new_line); - line.len_chars() - get_line_ending(&line).map(|le| le.len_chars()).unwrap_or(0) + // Compute the new position. + let (new_pos, new_row) = { + let new_row = if dir == Direction::Backward { + row.saturating_sub(count) + } else { + (row + count).min(slice.len_lines().saturating_sub(1)) + }; + let new_col = col.max(horiz as usize); + ( + pos_at_coords(slice, Position::new(new_row, new_col), true), + new_row, + ) }; - let new_col = std::cmp::min(horiz as usize, new_line_len); - - let pos = pos_at_coords(slice, Position::new(new_line, new_col)); - - let anchor = match behaviour { - Movement::Extend => range.anchor, - Movement::Move => pos, - }; + // Special-case to avoid moving to the end of the last non-empty line. + if behaviour == Movement::Extend && slice.line(new_row).len_chars() == 0 { + return range; + } - let mut range = Range::new(anchor, pos); - range.horiz = Some(horiz); - range + let mut new_range = range.put_cursor(slice, new_pos, behaviour == Movement::Extend); + new_range.horiz = Some(horiz); + new_range } pub fn move_next_word_start(slice: RopeSlice, range: Range, count: usize) -> Range { @@ -118,8 +110,41 @@ pub fn move_prev_word_end(slice: RopeSlice, range: Range, count: usize) -> Range } fn word_move(slice: RopeSlice, range: Range, count: usize, target: WordMotionTarget) -> Range { - (0..count).fold(range, |range, _| { - slice.chars_at(range.head).range_to_target(target, range) + let is_prev = matches!( + target, + WordMotionTarget::PrevWordStart + | WordMotionTarget::PrevLongWordStart + | WordMotionTarget::PrevWordEnd + ); + + // Special-case early-out. + if (is_prev && range.head == 0) || (!is_prev && range.head == slice.len_chars()) { + return range; + } + + // Prepare the range appropriately based on the target movement + // direction. This is addressing two things at once: + // + // 1. Block-cursor semantics. + // 2. The anchor position being irrelevant to the output result. + #[allow(clippy::collapsible_else_if)] // Makes the structure clearer in this case. + let start_range = if is_prev { + if range.anchor < range.head { + Range::new(range.head, prev_grapheme_boundary(slice, range.head)) + } else { + Range::new(next_grapheme_boundary(slice, range.head), range.head) + } + } else { + if range.anchor < range.head { + Range::new(prev_grapheme_boundary(slice, range.head), range.head) + } else { + Range::new(range.head, next_grapheme_boundary(slice, range.head)) + } + }; + + // Do the main work. + (0..count).fold(start_range, |r, _| { + slice.chars_at(r.head).range_to_target(target, r) }) } @@ -176,79 +201,75 @@ pub trait CharHelpers { fn range_to_target(&mut self, target: WordMotionTarget, origin: Range) -> Range; } -enum WordMotionPhase { - Start, - SkipNewlines, - ReachTarget, -} - impl CharHelpers for Chars<'_> { + /// Note: this only changes the anchor of the range if the head is effectively + /// starting on a boundary (either directly or after skipping newline characters). + /// Any other changes to the anchor should be handled by the calling code. fn range_to_target(&mut self, target: WordMotionTarget, origin: Range) -> Range { - // Characters are iterated forward or backwards depending on the motion direction. - let characters: Box<dyn Iterator<Item = char>> = match target { + let is_prev = matches!( + target, WordMotionTarget::PrevWordStart - | WordMotionTarget::PrevLongWordStart - | WordMotionTarget::PrevWordEnd => { + | WordMotionTarget::PrevLongWordStart + | WordMotionTarget::PrevWordEnd + ); + + // Reverse the iterator if needed for the motion direction. + if is_prev { + self.reverse(); + } + + // Function to advance index in the appropriate motion direction. + let advance: &dyn Fn(&mut usize) = if is_prev { + &|idx| *idx = idx.saturating_sub(1) + } else { + &|idx| *idx += 1 + }; + + // Initialize state variables. + let mut anchor = origin.anchor; + let mut head = origin.head; + let mut prev_ch = { + let ch = self.prev(); + if ch.is_some() { self.next(); - Box::new(from_fn(|| self.prev())) } - _ => Box::new(self), + ch }; - // Index advancement also depends on the direction. - let advance: &dyn Fn(&mut usize) = match target { - WordMotionTarget::PrevWordStart - | WordMotionTarget::PrevLongWordStart - | WordMotionTarget::PrevWordEnd => &|u| *u = u.saturating_sub(1), - _ => &|u| *u += 1, - }; + // Skip any initial newline characters. + while let Some(ch) = self.next() { + if char_is_line_ending(ch) { + prev_ch = Some(ch); + advance(&mut head); + } else { + self.prev(); + break; + } + } + if prev_ch.map(char_is_line_ending).unwrap_or(false) { + anchor = head; + } - let mut characters = characters.peekable(); - let mut phase = WordMotionPhase::Start; - let mut head = origin.head; - let mut anchor: Option<usize> = None; - let is_boundary = - |a: char, b: Option<char>| categorize_char(a) != categorize_char(b.unwrap_or(a)); - while let Some(peek) = characters.peek().copied() { - phase = match phase { - WordMotionPhase::Start => { - characters.next(); - if characters.peek().is_none() { - break; // We're at the end, so there's nothing to do. - } - // Anchor may remain here if the head wasn't at a boundary - if !is_boundary(peek, characters.peek().copied()) && !char_is_line_ending(peek) - { - anchor = Some(head); - } - // First character is always skipped by the head - advance(&mut head); - WordMotionPhase::SkipNewlines - } - WordMotionPhase::SkipNewlines => { - if char_is_line_ending(peek) { - characters.next(); - if characters.peek().is_some() { - advance(&mut head); - } - WordMotionPhase::SkipNewlines - } else { - WordMotionPhase::ReachTarget - } - } - WordMotionPhase::ReachTarget => { - characters.next(); - anchor = anchor.or(Some(head)); - if reached_target(target, peek, characters.peek()) { - break; - } else { - advance(&mut head); - } - WordMotionPhase::ReachTarget + // Find our target position(s). + let head_start = head; + while let Some(next_ch) = self.next() { + if prev_ch.is_none() || reached_target(target, prev_ch.unwrap(), next_ch) { + if head == head_start { + anchor = head; + } else { + break; } } + prev_ch = Some(next_ch); + advance(&mut head); + } + + // Un-reverse the iterator if needed. + if is_prev { + self.reverse(); } - Range::new(anchor.unwrap_or(origin.anchor), head) + + Range::new(anchor, head) } } @@ -265,28 +286,23 @@ fn is_long_word_boundary(a: char, b: char) -> bool { } } -fn reached_target(target: WordMotionTarget, peek: char, next_peek: Option<&char>) -> bool { - let next_peek = match next_peek { - Some(next_peek) => next_peek, - None => return true, - }; - +fn reached_target(target: WordMotionTarget, prev_ch: char, next_ch: char) -> bool { match target { WordMotionTarget::NextWordStart | WordMotionTarget::PrevWordEnd => { - is_word_boundary(peek, *next_peek) - && (char_is_line_ending(*next_peek) || !next_peek.is_whitespace()) + is_word_boundary(prev_ch, next_ch) + && (char_is_line_ending(next_ch) || !next_ch.is_whitespace()) } WordMotionTarget::NextWordEnd | WordMotionTarget::PrevWordStart => { - is_word_boundary(peek, *next_peek) - && (!peek.is_whitespace() || char_is_line_ending(*next_peek)) + is_word_boundary(prev_ch, next_ch) + && (!prev_ch.is_whitespace() || char_is_line_ending(next_ch)) } WordMotionTarget::NextLongWordStart => { - is_long_word_boundary(peek, *next_peek) - && (char_is_line_ending(*next_peek) || !next_peek.is_whitespace()) + is_long_word_boundary(prev_ch, next_ch) + && (char_is_line_ending(next_ch) || !next_ch.is_whitespace()) } WordMotionTarget::NextLongWordEnd | WordMotionTarget::PrevLongWordStart => { - is_long_word_boundary(peek, *next_peek) - && (!peek.is_whitespace() || char_is_line_ending(*next_peek)) + is_long_word_boundary(prev_ch, next_ch) + && (!prev_ch.is_whitespace() || char_is_line_ending(next_ch)) } } } @@ -317,7 +333,7 @@ mod test { fn test_vertical_move() { let text = Rope::from("abcd\nefg\nwrs"); let slice = text.slice(..); - let pos = pos_at_coords(slice, (0, 4).into()); + let pos = pos_at_coords(slice, (0, 4).into(), true); let range = Range::new(pos, pos); assert_eq!( @@ -330,10 +346,10 @@ mod test { } #[test] - fn horizontal_moves_through_single_line_in_single_line_text() { + fn horizontal_moves_through_single_line_text() { let text = Rope::from(SINGLE_LINE_SAMPLE); let slice = text.slice(..); - let position = pos_at_coords(slice, (0, 0).into()); + let position = pos_at_coords(slice, (0, 0).into(), true); let mut range = Range::point(position); @@ -353,23 +369,23 @@ mod test { } #[test] - fn horizontal_moves_through_single_line_in_multiline_text() { + fn horizontal_moves_through_multiline_text() { let text = Rope::from(MULTILINE_SAMPLE); let slice = text.slice(..); - let position = pos_at_coords(slice, (0, 0).into()); + let position = pos_at_coords(slice, (0, 0).into(), true); let mut range = Range::point(position); let moves_and_expected_coordinates = IntoIter::new([ - ((Direction::Forward, 1usize), (0, 1)), // M|ultiline\n - ((Direction::Forward, 2usize), (0, 3)), // Mul|tiline\n - ((Direction::Backward, 6usize), (0, 0)), // |Multiline\n - ((Direction::Backward, 999usize), (0, 0)), // |Multiline\n - ((Direction::Forward, 3usize), (0, 3)), // Mul|tiline\n - ((Direction::Forward, 0usize), (0, 3)), // Mul|tiline\n - ((Direction::Backward, 0usize), (0, 3)), // Mul|tiline\n - ((Direction::Forward, 999usize), (0, 9)), // Multiline|\n - ((Direction::Forward, 999usize), (0, 9)), // Multiline|\n + ((Direction::Forward, 11usize), (1, 1)), // Multiline\nt|ext sample\n... + ((Direction::Backward, 1usize), (1, 0)), // Multiline\n|text sample\n... + ((Direction::Backward, 5usize), (0, 5)), // Multi|line\ntext sample\n... + ((Direction::Backward, 999usize), (0, 0)), // |Multiline\ntext sample\n... + ((Direction::Forward, 3usize), (0, 3)), // Mul|tiline\ntext sample\n... + ((Direction::Forward, 0usize), (0, 3)), // Mul|tiline\ntext sample\n... + ((Direction::Backward, 0usize), (0, 3)), // Mul|tiline\ntext sample\n... + ((Direction::Forward, 999usize), (5, 0)), // ...and whitespaced\n| + ((Direction::Forward, 999usize), (5, 0)), // ...and whitespaced\n| ]); for ((direction, amount), coordinates) in moves_and_expected_coordinates { @@ -383,7 +399,7 @@ mod test { fn selection_extending_moves_in_single_line_text() { let text = Rope::from(SINGLE_LINE_SAMPLE); let slice = text.slice(..); - let position = pos_at_coords(slice, (0, 0).into()); + let position = pos_at_coords(slice, (0, 0).into(), true); let mut range = Range::point(position); let original_anchor = range.anchor; @@ -403,18 +419,19 @@ mod test { #[test] fn vertical_moves_in_single_column() { let text = Rope::from(MULTILINE_SAMPLE); - let slice = dbg!(&text).slice(..); - let position = pos_at_coords(slice, (0, 0).into()); + let slice = text.slice(..); + let position = pos_at_coords(slice, (0, 0).into(), true); let mut range = Range::point(position); let moves_and_expected_coordinates = IntoIter::new([ ((Direction::Forward, 1usize), (1, 0)), ((Direction::Forward, 2usize), (3, 0)), + ((Direction::Forward, 1usize), (4, 0)), ((Direction::Backward, 999usize), (0, 0)), - ((Direction::Forward, 3usize), (3, 0)), - ((Direction::Forward, 0usize), (3, 0)), - ((Direction::Backward, 0usize), (3, 0)), - ((Direction::Forward, 5), (4, 0)), - ((Direction::Forward, 999usize), (4, 0)), + ((Direction::Forward, 4usize), (4, 0)), + ((Direction::Forward, 0usize), (4, 0)), + ((Direction::Backward, 0usize), (4, 0)), + ((Direction::Forward, 5), (5, 0)), + ((Direction::Forward, 999usize), (5, 0)), ]); for ((direction, amount), coordinates) in moves_and_expected_coordinates { @@ -428,7 +445,7 @@ mod test { fn vertical_moves_jumping_column() { let text = Rope::from(MULTILINE_SAMPLE); let slice = text.slice(..); - let position = pos_at_coords(slice, (0, 0).into()); + let position = pos_at_coords(slice, (0, 0).into(), true); let mut range = Range::point(position); enum Axis { @@ -446,7 +463,8 @@ mod test { ((Axis::V, Direction::Forward, 1usize), (3, 8)), // Behaviour is preserved even through long jumps ((Axis::V, Direction::Backward, 999usize), (0, 8)), - ((Axis::V, Direction::Forward, 999usize), (4, 8)), + ((Axis::V, Direction::Forward, 4usize), (4, 8)), + ((Axis::V, Direction::Forward, 999usize), (5, 0)), ]); for ((axis, direction, amount), coordinates) in moves_and_expected_coordinates { @@ -460,10 +478,10 @@ mod test { } #[test] - fn multibyte_character_column_jumps() { + fn multibyte_character_wide_column_jumps() { let text = Rope::from(MULTIBYTE_CHARACTER_SAMPLE); let slice = text.slice(..); - let position = pos_at_coords(slice, (0, 0).into()); + let position = pos_at_coords(slice, (0, 0).into(), true); let mut range = Range::point(position); // FIXME: The behaviour captured in this test diverges from both Kakoune and Vim. These @@ -474,10 +492,14 @@ mod test { V, } let moves_and_expected_coordinates = IntoIter::new([ - // Places cursor at the fourth kana + // Places cursor at the fourth kana. ((Axis::H, Direction::Forward, 4), (0, 4)), - // Descent places cursor at the fourth character. + // Descent places cursor at the 4th character. ((Axis::V, Direction::Forward, 1usize), (1, 4)), + // Moving back 1 character. + ((Axis::H, Direction::Backward, 1usize), (1, 3)), + // Jumping back up 1 line. + ((Axis::V, Direction::Backward, 1usize), (0, 3)), ]); for ((axis, direction, amount), coordinates) in moves_and_expected_coordinates { @@ -512,42 +534,42 @@ mod test { fn test_behaviour_when_moving_to_start_of_next_words() { let tests = array::IntoIter::new([ ("Basic forward motion stops at the first space", - vec![(1, Range::new(0, 0), Range::new(0, 5))]), + vec![(1, Range::new(0, 0), Range::new(0, 6))]), (" Starting from a boundary advances the anchor", - vec![(1, Range::new(0, 0), Range::new(1, 9))]), + vec![(1, Range::new(0, 0), Range::new(1, 10))]), ("Long whitespace gap is bridged by the head", - vec![(1, Range::new(0, 0), Range::new(0, 10))]), + vec![(1, Range::new(0, 0), Range::new(0, 11))]), ("Previous anchor is irrelevant for forward motions", - vec![(1, Range::new(12, 0), Range::new(0, 8))]), + vec![(1, Range::new(12, 0), Range::new(0, 9))]), (" Starting from whitespace moves to last space in sequence", - vec![(1, Range::new(0, 0), Range::new(0, 3))]), + vec![(1, Range::new(0, 0), Range::new(0, 4))]), ("Starting from mid-word leaves anchor at start position and moves head", - vec![(1, Range::new(3, 3), Range::new(3, 8))]), + vec![(1, Range::new(3, 3), Range::new(3, 9))]), ("Identifiers_with_underscores are considered a single word", - vec![(1, Range::new(0, 0), Range::new(0, 28))]), + vec![(1, Range::new(0, 0), Range::new(0, 29))]), ("Jumping\n into starting whitespace selects the spaces before 'into'", - vec![(1, Range::new(0, 6), Range::new(8, 11))]), + vec![(1, Range::new(0, 7), Range::new(8, 12))]), ("alphanumeric.!,and.?=punctuation are considered 'words' for the purposes of word motion", vec![ - (1, Range::new(0, 0), Range::new(0, 11)), - (1, Range::new(0, 11), Range::new(12, 14)), - (1, Range::new(12, 14), Range::new(15, 17)) + (1, Range::new(0, 0), Range::new(0, 12)), + (1, Range::new(0, 12), Range::new(12, 15)), + (1, Range::new(12, 15), Range::new(15, 18)) ]), ("... ... punctuation and spaces behave as expected", vec![ - (1, Range::new(0, 0), Range::new(0, 5)), - (1, Range::new(0, 5), Range::new(6, 9)), + (1, Range::new(0, 0), Range::new(0, 6)), + (1, Range::new(0, 6), Range::new(6, 10)), ]), (".._.._ punctuation is not joined by underscores into a single block", - vec![(1, Range::new(0, 0), Range::new(0, 1))]), + vec![(1, Range::new(0, 0), Range::new(0, 2))]), ("Newlines\n\nare bridged seamlessly.", vec![ - (1, Range::new(0, 0), Range::new(0, 7)), - (1, Range::new(0, 7), Range::new(10, 13)), + (1, Range::new(0, 0), Range::new(0, 8)), + (1, Range::new(0, 8), Range::new(10, 14)), ]), ("Jumping\n\n\n\n\n\n from newlines to whitespace selects whitespace.", vec![ - (1, Range::new(0, 8), Range::new(13, 15)), + (1, Range::new(0, 9), Range::new(13, 16)), ]), ("A failed motion does not modify the range", vec![ @@ -555,17 +577,17 @@ mod test { ]), ("oh oh oh two character words!", vec![ - (1, Range::new(0, 0), Range::new(0, 2)), - (1, Range::new(0, 2), Range::new(3, 5)), - (1, Range::new(0, 1), Range::new(2, 2)), + (1, Range::new(0, 0), Range::new(0, 3)), + (1, Range::new(0, 3), Range::new(3, 6)), + (1, Range::new(0, 2), Range::new(1, 3)), ]), ("Multiple motions at once resolve correctly", vec![ - (3, Range::new(0, 0), Range::new(17, 19)), + (3, Range::new(0, 0), Range::new(17, 20)), ]), ("Excessive motions are performed partially", vec![ - (999, Range::new(0, 0), Range::new(32, 40)), + (999, Range::new(0, 0), Range::new(32, 41)), ]), ("", // Edge case of moving forward in empty string vec![ @@ -573,16 +595,16 @@ mod test { ]), ("\n\n\n\n\n", // Edge case of moving forward in all newlines vec![ - (1, Range::new(0, 0), Range::new(0, 4)), + (1, Range::new(0, 0), Range::new(5, 5)), ]), ("\n \n \n Jumping through alternated space blocks and newlines selects the space blocks", vec![ - (1, Range::new(0, 0), Range::new(1, 3)), - (1, Range::new(1, 3), Range::new(5, 7)), + (1, Range::new(0, 0), Range::new(1, 4)), + (1, Range::new(1, 4), Range::new(5, 8)), ]), ("ヒーリクス multibyte characters behave as normal characters", vec![ - (1, Range::new(0, 0), Range::new(0, 5)), + (1, Range::new(0, 0), Range::new(0, 6)), ]), ]); @@ -598,40 +620,40 @@ mod test { fn test_behaviour_when_moving_to_start_of_next_long_words() { let tests = array::IntoIter::new([ ("Basic forward motion stops at the first space", - vec![(1, Range::new(0, 0), Range::new(0, 5))]), + vec![(1, Range::new(0, 0), Range::new(0, 6))]), (" Starting from a boundary advances the anchor", - vec![(1, Range::new(0, 0), Range::new(1, 9))]), + vec![(1, Range::new(0, 0), Range::new(1, 10))]), ("Long whitespace gap is bridged by the head", - vec![(1, Range::new(0, 0), Range::new(0, 10))]), + vec![(1, Range::new(0, 0), Range::new(0, 11))]), ("Previous anchor is irrelevant for forward motions", - vec![(1, Range::new(12, 0), Range::new(0, 8))]), + vec![(1, Range::new(12, 0), Range::new(0, 9))]), (" Starting from whitespace moves to last space in sequence", - vec![(1, Range::new(0, 0), Range::new(0, 3))]), + vec![(1, Range::new(0, 0), Range::new(0, 4))]), ("Starting from mid-word leaves anchor at start position and moves head", - vec![(1, Range::new(3, 3), Range::new(3, 8))]), + vec![(1, Range::new(3, 3), Range::new(3, 9))]), ("Identifiers_with_underscores are considered a single word", - vec![(1, Range::new(0, 0), Range::new(0, 28))]), + vec![(1, Range::new(0, 0), Range::new(0, 29))]), ("Jumping\n into starting whitespace selects the spaces before 'into'", - vec![(1, Range::new(0, 6), Range::new(8, 11))]), + vec![(1, Range::new(0, 7), Range::new(8, 12))]), ("alphanumeric.!,and.?=punctuation are not treated any differently than alphanumerics", vec![ - (1, Range::new(0, 0), Range::new(0, 32)), + (1, Range::new(0, 0), Range::new(0, 33)), ]), ("... ... punctuation and spaces behave as expected", vec![ - (1, Range::new(0, 0), Range::new(0, 5)), - (1, Range::new(0, 5), Range::new(6, 9)), + (1, Range::new(0, 0), Range::new(0, 6)), + (1, Range::new(0, 6), Range::new(6, 10)), ]), (".._.._ punctuation is joined by underscores into a single word, as it behaves like alphanumerics", - vec![(1, Range::new(0, 0), Range::new(0, 6))]), + vec![(1, Range::new(0, 0), Range::new(0, 7))]), ("Newlines\n\nare bridged seamlessly.", vec![ - (1, Range::new(0, 0), Range::new(0, 7)), - (1, Range::new(0, 7), Range::new(10, 13)), + (1, Range::new(0, 0), Range::new(0, 8)), + (1, Range::new(0, 8), Range::new(10, 14)), ]), ("Jumping\n\n\n\n\n\n from newlines to whitespace selects whitespace.", vec![ - (1, Range::new(0, 8), Range::new(13, 15)), + (1, Range::new(0, 9), Range::new(13, 16)), ]), ("A failed motion does not modify the range", vec![ @@ -639,17 +661,17 @@ mod test { ]), ("oh oh oh two character words!", vec![ - (1, Range::new(0, 0), Range::new(0, 2)), - (1, Range::new(0, 2), Range::new(3, 5)), - (1, Range::new(0, 1), Range::new(2, 2)), + (1, Range::new(0, 0), Range::new(0, 3)), + (1, Range::new(0, 3), Range::new(3, 6)), + (1, Range::new(0, 1), Range::new(0, 3)), ]), ("Multiple motions at once resolve correctly", vec![ - (3, Range::new(0, 0), Range::new(17, 19)), + (3, Range::new(0, 0), Range::new(17, 20)), ]), ("Excessive motions are performed partially", vec![ - (999, Range::new(0, 0), Range::new(32, 40)), + (999, Range::new(0, 0), Range::new(32, 41)), ]), ("", // Edge case of moving forward in empty string vec![ @@ -657,16 +679,16 @@ mod test { ]), ("\n\n\n\n\n", // Edge case of moving forward in all newlines vec![ - (1, Range::new(0, 0), Range::new(0, 4)), + (1, Range::new(0, 0), Range::new(5, 5)), ]), ("\n \n \n Jumping through alternated space blocks and newlines selects the space blocks", vec![ - (1, Range::new(0, 0), Range::new(1, 3)), - (1, Range::new(1, 3), Range::new(5, 7)), + (1, Range::new(0, 0), Range::new(1, 4)), + (1, Range::new(1, 4), Range::new(5, 8)), ]), ("ヒー..リクス multibyte characters behave as normal characters, including their interaction with punctuation", vec![ - (1, Range::new(0, 0), Range::new(0, 7)), + (1, Range::new(0, 0), Range::new(0, 8)), ]), ]); @@ -682,44 +704,47 @@ mod test { fn test_behaviour_when_moving_to_start_of_previous_words() { let tests = array::IntoIter::new([ ("Basic backward motion from the middle of a word", - vec![(1, Range::new(3, 3), Range::new(3, 0))]), - ("Starting from after boundary retreats the anchor", - vec![(1, Range::new(0, 8), Range::new(7, 0))]), + vec![(1, Range::new(3, 3), Range::new(4, 0))]), + + // // Why do we want this behavior? The current behavior fails this + // // test, but seems better and more consistent. + // ("Starting from after boundary retreats the anchor", + // vec![(1, Range::new(0, 9), Range::new(8, 0))]), + (" Jump to start of a word preceded by whitespace", - vec![(1, Range::new(5, 5), Range::new(5, 4))]), + vec![(1, Range::new(5, 5), Range::new(6, 4))]), (" Jump to start of line from start of word preceded by whitespace", - vec![(1, Range::new(4, 4), Range::new(3, 0))]), + vec![(1, Range::new(4, 4), Range::new(4, 0))]), ("Previous anchor is irrelevant for backward motions", - vec![(1, Range::new(12, 5), Range::new(5, 0))]), + vec![(1, Range::new(12, 5), Range::new(6, 0))]), (" Starting from whitespace moves to first space in sequence", - vec![(1, Range::new(0, 3), Range::new(3, 0))]), + vec![(1, Range::new(0, 4), Range::new(4, 0))]), ("Identifiers_with_underscores are considered a single word", vec![(1, Range::new(0, 20), Range::new(20, 0))]), ("Jumping\n \nback through a newline selects whitespace", - vec![(1, Range::new(0, 13), Range::new(11, 8))]), + vec![(1, Range::new(0, 13), Range::new(12, 8))]), ("Jumping to start of word from the end selects the word", - vec![(1, Range::new(6, 6), Range::new(6, 0))]), + vec![(1, Range::new(6, 7), Range::new(7, 0))]), ("alphanumeric.!,and.?=punctuation are considered 'words' for the purposes of word motion", vec![ - (1, Range::new(30, 30), Range::new(30, 21)), - (1, Range::new(30, 21), Range::new(20, 18)), - (1, Range::new(20, 18), Range::new(17, 15)) + (1, Range::new(29, 30), Range::new(30, 21)), + (1, Range::new(30, 21), Range::new(21, 18)), + (1, Range::new(21, 18), Range::new(18, 15)) ]), - ("... ... punctuation and spaces behave as expected", vec![ - (1, Range::new(0, 10), Range::new(9, 6)), - (1, Range::new(9, 6), Range::new(5, 0)), + (1, Range::new(0, 10), Range::new(10, 6)), + (1, Range::new(10, 6), Range::new(6, 0)), ]), (".._.._ punctuation is not joined by underscores into a single block", - vec![(1, Range::new(0, 5), Range::new(4, 3))]), + vec![(1, Range::new(0, 6), Range::new(5, 3))]), ("Newlines\n\nare bridged seamlessly.", vec![ - (1, Range::new(0, 10), Range::new(7, 0)), + (1, Range::new(0, 10), Range::new(8, 0)), ]), ("Jumping \n\n\n\n\nback from within a newline group selects previous block", vec![ - (1, Range::new(0, 13), Range::new(10, 0)), + (1, Range::new(0, 13), Range::new(11, 0)), ]), ("Failed motions do not modify the range", vec![ @@ -727,11 +752,11 @@ mod test { ]), ("Multiple motions at once resolve correctly", vec![ - (3, Range::new(18, 18), Range::new(8, 0)), + (3, Range::new(18, 18), Range::new(9, 0)), ]), ("Excessive motions are performed partially", vec![ - (999, Range::new(40, 40), Range::new(9, 0)), + (999, Range::new(40, 40), Range::new(10, 0)), ]), ("", // Edge case of moving backwards in empty string vec![ @@ -739,16 +764,16 @@ mod test { ]), ("\n\n\n\n\n", // Edge case of moving backwards in all newlines vec![ - (1, Range::new(0, 0), Range::new(0, 0)), + (1, Range::new(5, 5), Range::new(0, 0)), ]), (" \n \nJumping back through alternated space blocks and newlines selects the space blocks", vec![ - (1, Range::new(0, 7), Range::new(6, 4)), - (1, Range::new(6, 4), Range::new(2, 0)), + (1, Range::new(0, 8), Range::new(7, 4)), + (1, Range::new(7, 4), Range::new(3, 0)), ]), ("ヒーリクス multibyte characters behave as normal characters", vec![ - (1, Range::new(0, 5), Range::new(4, 0)), + (1, Range::new(0, 6), Range::new(6, 0)), ]), ]); @@ -763,72 +788,89 @@ mod test { #[test] fn test_behaviour_when_moving_to_start_of_previous_long_words() { let tests = array::IntoIter::new([ - ("Basic backward motion from the middle of a word", - vec![(1, Range::new(3, 3), Range::new(3, 0))]), - ("Starting from after boundary retreats the anchor", - vec![(1, Range::new(0, 8), Range::new(7, 0))]), - (" Jump to start of a word preceded by whitespace", - vec![(1, Range::new(5, 5), Range::new(5, 4))]), - (" Jump to start of line from start of word preceded by whitespace", - vec![(1, Range::new(4, 4), Range::new(3, 0))]), + ( + "Basic backward motion from the middle of a word", + vec![(1, Range::new(3, 3), Range::new(4, 0))], + ), + + // // Why do we want this behavior? The current behavior fails this + // // test, but seems better and more consistent. + // ("Starting from after boundary retreats the anchor", + // vec![(1, Range::new(0, 9), Range::new(8, 0))]), + + ( + " Jump to start of a word preceded by whitespace", + vec![(1, Range::new(5, 5), Range::new(6, 4))], + ), + ( + " Jump to start of line from start of word preceded by whitespace", + vec![(1, Range::new(3, 4), Range::new(4, 0))], + ), ("Previous anchor is irrelevant for backward motions", - vec![(1, Range::new(12, 5), Range::new(5, 0))]), - (" Starting from whitespace moves to first space in sequence", - vec![(1, Range::new(0, 3), Range::new(3, 0))]), + vec![(1, Range::new(12, 5), Range::new(6, 0))]), + ( + " Starting from whitespace moves to first space in sequence", + vec![(1, Range::new(0, 4), Range::new(4, 0))], + ), ("Identifiers_with_underscores are considered a single word", vec![(1, Range::new(0, 20), Range::new(20, 0))]), - ("Jumping\n \nback through a newline selects whitespace", - vec![(1, Range::new(0, 13), Range::new(11, 8))]), - ("Jumping to start of word from the end selects the word", - vec![(1, Range::new(6, 6), Range::new(6, 0))]), - ("alphanumeric.!,and.?=punctuation are treated exactly the same", - vec![ - (1, Range::new(30, 30), Range::new(30, 0)), - ]), - - ("... ... punctuation and spaces behave as expected", + ( + "Jumping\n \nback through a newline selects whitespace", + vec![(1, Range::new(0, 13), Range::new(12, 8))], + ), + ( + "Jumping to start of word from the end selects the word", + vec![(1, Range::new(6, 7), Range::new(7, 0))], + ), + ( + "alphanumeric.!,and.?=punctuation are treated exactly the same", + vec![(1, Range::new(29, 30), Range::new(30, 0))], + ), + ( + "... ... punctuation and spaces behave as expected", vec![ - (1, Range::new(0, 10), Range::new(9, 6)), - (1, Range::new(9, 6), Range::new(5, 0)), - ]), + (1, Range::new(0, 10), Range::new(10, 6)), + (1, Range::new(10, 6), Range::new(6, 0)), + ], + ), (".._.._ punctuation is joined by underscores into a single block", - vec![(1, Range::new(0, 5), Range::new(4, 0))]), - ("Newlines\n\nare bridged seamlessly.", - vec![ - (1, Range::new(0, 10), Range::new(7, 0)), - ]), - ("Jumping \n\n\n\n\nback from within a newline group selects previous block", - vec![ - (1, Range::new(0, 13), Range::new(10, 0)), - ]), - ("Failed motions do not modify the range", - vec![ - (0, Range::new(3, 0), Range::new(3, 0)), - ]), - ("Multiple motions at once resolve correctly", - vec![ - (3, Range::new(18, 18), Range::new(8, 0)), - ]), - ("Excessive motions are performed partially", - vec![ - (999, Range::new(40, 40), Range::new(9, 0)), - ]), - ("", // Edge case of moving backwards in empty string - vec![ - (1, Range::new(0, 0), Range::new(0, 0)), - ]), - ("\n\n\n\n\n", // Edge case of moving backwards in all newlines - vec![ - (1, Range::new(0, 0), Range::new(0, 0)), - ]), + vec![(1, Range::new(0, 6), Range::new(6, 0))]), + ( + "Newlines\n\nare bridged seamlessly.", + vec![(1, Range::new(0, 10), Range::new(8, 0))], + ), + ( + "Jumping \n\n\n\n\nback from within a newline group selects previous block", + vec![(1, Range::new(0, 13), Range::new(11, 0))], + ), + ( + "Failed motions do not modify the range", + vec![(0, Range::new(3, 0), Range::new(3, 0))], + ), + ( + "Multiple motions at once resolve correctly", + vec![(3, Range::new(19, 19), Range::new(9, 0))], + ), + ( + "Excessive motions are performed partially", + vec![(999, Range::new(40, 40), Range::new(10, 0))], + ), + ( + "", // Edge case of moving backwards in empty string + vec![(1, Range::new(0, 0), Range::new(0, 0))], + ), + ( + "\n\n\n\n\n", // Edge case of moving backwards in all newlines + vec![(1, Range::new(5, 5), Range::new(0, 0))], + ), (" \n \nJumping back through alternated space blocks and newlines selects the space blocks", vec![ - (1, Range::new(0, 7), Range::new(6, 4)), - (1, Range::new(6, 4), Range::new(2, 0)), + (1, Range::new(0, 8), Range::new(7, 4)), + (1, Range::new(7, 4), Range::new(3, 0)), ]), ("ヒーリ..クス multibyte characters behave as normal characters, including when interacting with punctuation", vec![ - (1, Range::new(0, 7), Range::new(6, 0)), + (1, Range::new(0, 8), Range::new(8, 0)), ]), ]); @@ -844,42 +886,46 @@ mod test { fn test_behaviour_when_moving_to_end_of_next_words() { let tests = array::IntoIter::new([ ("Basic forward motion from the start of a word to the end of it", - vec![(1, Range::new(0, 0), Range::new(0, 4))]), + vec![(1, Range::new(0, 0), Range::new(0, 5))]), ("Basic forward motion from the end of a word to the end of the next", - vec![(1, Range::new(0, 4), Range::new(5, 12))]), + vec![(1, Range::new(0, 5), Range::new(5, 13))]), ("Basic forward motion from the middle of a word to the end of it", - vec![(1, Range::new(2, 2), Range::new(2, 4))]), + vec![(1, Range::new(2, 2), Range::new(2, 5))]), (" Jumping to end of a word preceded by whitespace", - vec![(1, Range::new(0, 0), Range::new(0, 10))]), - (" Starting from a boundary advances the anchor", - vec![(1, Range::new(0, 0), Range::new(1, 8))]), + vec![(1, Range::new(0, 0), Range::new(0, 11))]), + + // // Why do we want this behavior? The current behavior fails this + // // test, but seems better and more consistent. + // (" Starting from a boundary advances the anchor", + // vec![(1, Range::new(0, 0), Range::new(1, 9))]), + ("Previous anchor is irrelevant for end of word motion", - vec![(1, Range::new(12, 2), Range::new(2, 7))]), + vec![(1, Range::new(12, 2), Range::new(2, 8))]), ("Identifiers_with_underscores are considered a single word", - vec![(1, Range::new(0, 0), Range::new(0, 27))]), + vec![(1, Range::new(0, 0), Range::new(0, 28))]), ("Jumping\n into starting whitespace selects up to the end of next word", - vec![(1, Range::new(0, 6), Range::new(8, 15))]), + vec![(1, Range::new(0, 7), Range::new(8, 16))]), ("alphanumeric.!,and.?=punctuation are considered 'words' for the purposes of word motion", vec![ - (1, Range::new(0, 0), Range::new(0, 11)), - (1, Range::new(0, 11), Range::new(12, 14)), - (1, Range::new(12, 14), Range::new(15, 17)) + (1, Range::new(0, 0), Range::new(0, 12)), + (1, Range::new(0, 12), Range::new(12, 15)), + (1, Range::new(12, 15), Range::new(15, 18)) ]), ("... ... punctuation and spaces behave as expected", vec![ - (1, Range::new(0, 0), Range::new(0, 2)), - (1, Range::new(0, 2), Range::new(3, 8)), + (1, Range::new(0, 0), Range::new(0, 3)), + (1, Range::new(0, 3), Range::new(3, 9)), ]), (".._.._ punctuation is not joined by underscores into a single block", - vec![(1, Range::new(0, 0), Range::new(0, 1))]), + vec![(1, Range::new(0, 0), Range::new(0, 2))]), ("Newlines\n\nare bridged seamlessly.", vec![ - (1, Range::new(0, 0), Range::new(0, 7)), - (1, Range::new(0, 7), Range::new(10, 12)), + (1, Range::new(0, 0), Range::new(0, 8)), + (1, Range::new(0, 8), Range::new(10, 13)), ]), ("Jumping\n\n\n\n\n\n from newlines to whitespace selects to end of next word.", vec![ - (1, Range::new(0, 8), Range::new(13, 19)), + (1, Range::new(0, 8), Range::new(13, 20)), ]), ("A failed motion does not modify the range", vec![ @@ -887,11 +933,11 @@ mod test { ]), ("Multiple motions at once resolve correctly", vec![ - (3, Range::new(0, 0), Range::new(16, 18)), + (3, Range::new(0, 0), Range::new(16, 19)), ]), ("Excessive motions are performed partially", vec![ - (999, Range::new(0, 0), Range::new(31, 40)), + (999, Range::new(0, 0), Range::new(31, 41)), ]), ("", // Edge case of moving forward in empty string vec![ @@ -899,16 +945,16 @@ mod test { ]), ("\n\n\n\n\n", // Edge case of moving forward in all newlines vec![ - (1, Range::new(0, 0), Range::new(0, 4)), + (1, Range::new(0, 0), Range::new(5, 5)), ]), ("\n \n \n Jumping through alternated space blocks and newlines selects the space blocks", vec![ - (1, Range::new(0, 0), Range::new(1, 3)), - (1, Range::new(1, 3), Range::new(5, 7)), + (1, Range::new(0, 0), Range::new(1, 4)), + (1, Range::new(1, 4), Range::new(5, 8)), ]), ("ヒーリクス multibyte characters behave as normal characters", vec![ - (1, Range::new(0, 0), Range::new(0, 4)), + (1, Range::new(0, 0), Range::new(0, 5)), ]), ]); @@ -924,44 +970,44 @@ mod test { fn test_behaviour_when_moving_to_end_of_previous_words() { let tests = array::IntoIter::new([ ("Basic backward motion from the middle of a word", - vec![(1, Range::new(9, 9), Range::new(9, 5))]), + vec![(1, Range::new(9, 9), Range::new(10, 5))]), ("Starting from after boundary retreats the anchor", - vec![(1, Range::new(0, 13), Range::new(12, 8))]), + vec![(1, Range::new(0, 14), Range::new(13, 8))]), ("Jump to end of a word succeeded by whitespace", - vec![(1, Range::new(10, 10), Range::new(10, 4))]), + vec![(1, Range::new(11, 11), Range::new(11, 4))]), (" Jump to start of line from end of word preceded by whitespace", - vec![(1, Range::new(7, 7), Range::new(7, 0))]), + vec![(1, Range::new(8, 8), Range::new(8, 0))]), ("Previous anchor is irrelevant for backward motions", - vec![(1, Range::new(26, 12), Range::new(12, 8))]), + vec![(1, Range::new(26, 12), Range::new(13, 8))]), (" Starting from whitespace moves to first space in sequence", - vec![(1, Range::new(0, 3), Range::new(3, 0))]), + vec![(1, Range::new(0, 4), Range::new(4, 0))]), ("Test identifiers_with_underscores are considered a single word", vec![(1, Range::new(0, 25), Range::new(25, 4))]), ("Jumping\n \nback through a newline selects whitespace", - vec![(1, Range::new(0, 13), Range::new(11, 8))]), + vec![(1, Range::new(0, 13), Range::new(12, 8))]), ("Jumping to start of word from the end selects the whole word", - vec![(1, Range::new(15, 15), Range::new(15, 10))]), + vec![(1, Range::new(16, 16), Range::new(16, 10))]), ("alphanumeric.!,and.?=punctuation are considered 'words' for the purposes of word motion", vec![ - (1, Range::new(30, 30), Range::new(30, 21)), - (1, Range::new(30, 21), Range::new(20, 18)), - (1, Range::new(20, 18), Range::new(17, 15)) + (1, Range::new(30, 30), Range::new(31, 21)), + (1, Range::new(31, 21), Range::new(21, 18)), + (1, Range::new(21, 18), Range::new(18, 15)) ]), ("... ... punctuation and spaces behave as expected", vec![ - (1, Range::new(0, 10), Range::new(9, 9)), - (1, Range::new(9, 6), Range::new(5, 3)), + (1, Range::new(0, 10), Range::new(9, 3)), + (1, Range::new(9, 3), Range::new(3, 0)), ]), (".._.._ punctuation is not joined by underscores into a single block", - vec![(1, Range::new(0, 5), Range::new(4, 3))]), + vec![(1, Range::new(0, 5), Range::new(5, 3))]), ("Newlines\n\nare bridged seamlessly.", vec![ - (1, Range::new(0, 10), Range::new(7, 0)), + (1, Range::new(0, 10), Range::new(8, 0)), ]), ("Jumping \n\n\n\n\nback from within a newline group selects previous block", vec![ - (1, Range::new(0, 13), Range::new(10, 7)), + (1, Range::new(0, 13), Range::new(11, 7)), ]), ("Failed motions do not modify the range", vec![ @@ -969,11 +1015,11 @@ mod test { ]), ("Multiple motions at once resolve correctly", vec![ - (3, Range::new(23, 23), Range::new(15, 8)), + (3, Range::new(24, 24), Range::new(16, 8)), ]), ("Excessive motions are performed partially", vec![ - (999, Range::new(40, 40), Range::new(8, 0)), + (999, Range::new(40, 40), Range::new(9, 0)), ]), ("", // Edge case of moving backwards in empty string vec![ @@ -981,16 +1027,16 @@ mod test { ]), ("\n\n\n\n\n", // Edge case of moving backwards in all newlines vec![ - (1, Range::new(0, 0), Range::new(0, 0)), + (1, Range::new(5, 5), Range::new(0, 0)), ]), (" \n \nJumping back through alternated space blocks and newlines selects the space blocks", vec![ - (1, Range::new(0, 7), Range::new(6, 4)), - (1, Range::new(6, 4), Range::new(2, 0)), + (1, Range::new(0, 8), Range::new(7, 4)), + (1, Range::new(7, 4), Range::new(3, 0)), ]), ("Test ヒーリクス multibyte characters behave as normal characters", vec![ - (1, Range::new(0, 9), Range::new(9, 4)), + (1, Range::new(0, 10), Range::new(10, 4)), ]), ]); @@ -1006,40 +1052,44 @@ mod test { fn test_behaviour_when_moving_to_end_of_next_long_words() { let tests = array::IntoIter::new([ ("Basic forward motion from the start of a word to the end of it", - vec![(1, Range::new(0, 0), Range::new(0, 4))]), + vec![(1, Range::new(0, 0), Range::new(0, 5))]), ("Basic forward motion from the end of a word to the end of the next", - vec![(1, Range::new(0, 4), Range::new(5, 12))]), + vec![(1, Range::new(0, 5), Range::new(5, 13))]), ("Basic forward motion from the middle of a word to the end of it", - vec![(1, Range::new(2, 2), Range::new(2, 4))]), + vec![(1, Range::new(2, 2), Range::new(2, 5))]), (" Jumping to end of a word preceded by whitespace", - vec![(1, Range::new(0, 0), Range::new(0, 10))]), - (" Starting from a boundary advances the anchor", - vec![(1, Range::new(0, 0), Range::new(1, 8))]), + vec![(1, Range::new(0, 0), Range::new(0, 11))]), + + // // Why do we want this behavior? The current behavior fails this + // // test, but seems better and more consistent. + // (" Starting from a boundary advances the anchor", + // vec![(1, Range::new(0, 0), Range::new(1, 9))]), + ("Previous anchor is irrelevant for end of word motion", - vec![(1, Range::new(12, 2), Range::new(2, 7))]), + vec![(1, Range::new(12, 2), Range::new(2, 8))]), ("Identifiers_with_underscores are considered a single word", - vec![(1, Range::new(0, 0), Range::new(0, 27))]), + vec![(1, Range::new(0, 0), Range::new(0, 28))]), ("Jumping\n into starting whitespace selects up to the end of next word", - vec![(1, Range::new(0, 6), Range::new(8, 15))]), + vec![(1, Range::new(0, 7), Range::new(8, 16))]), ("alphanumeric.!,and.?=punctuation are treated the same way", vec![ - (1, Range::new(0, 0), Range::new(0, 31)), + (1, Range::new(0, 0), Range::new(0, 32)), ]), ("... ... punctuation and spaces behave as expected", vec![ - (1, Range::new(0, 0), Range::new(0, 2)), - (1, Range::new(0, 2), Range::new(3, 8)), + (1, Range::new(0, 0), Range::new(0, 3)), + (1, Range::new(0, 3), Range::new(3, 9)), ]), (".._.._ punctuation is joined by underscores into a single block", - vec![(1, Range::new(0, 0), Range::new(0, 5))]), + vec![(1, Range::new(0, 0), Range::new(0, 6))]), ("Newlines\n\nare bridged seamlessly.", vec![ - (1, Range::new(0, 0), Range::new(0, 7)), - (1, Range::new(0, 7), Range::new(10, 12)), + (1, Range::new(0, 0), Range::new(0, 8)), + (1, Range::new(0, 8), Range::new(10, 13)), ]), ("Jumping\n\n\n\n\n\n from newlines to whitespace selects to end of next word.", vec![ - (1, Range::new(0, 8), Range::new(13, 19)), + (1, Range::new(0, 9), Range::new(13, 20)), ]), ("A failed motion does not modify the range", vec![ @@ -1047,11 +1097,11 @@ mod test { ]), ("Multiple motions at once resolve correctly", vec![ - (3, Range::new(0, 0), Range::new(16, 18)), + (3, Range::new(0, 0), Range::new(16, 19)), ]), ("Excessive motions are performed partially", vec![ - (999, Range::new(0, 0), Range::new(31, 40)), + (999, Range::new(0, 0), Range::new(31, 41)), ]), ("", // Edge case of moving forward in empty string vec![ @@ -1059,16 +1109,16 @@ mod test { ]), ("\n\n\n\n\n", // Edge case of moving forward in all newlines vec![ - (1, Range::new(0, 0), Range::new(0, 4)), + (1, Range::new(0, 0), Range::new(5, 5)), ]), ("\n \n \n Jumping through alternated space blocks and newlines selects the space blocks", vec![ - (1, Range::new(0, 0), Range::new(1, 3)), - (1, Range::new(1, 3), Range::new(5, 7)), + (1, Range::new(0, 0), Range::new(1, 4)), + (1, Range::new(1, 4), Range::new(5, 8)), ]), ("ヒーリ..クス multibyte characters behave as normal characters, including when they interact with punctuation", vec![ - (1, Range::new(0, 0), Range::new(0, 6)), + (1, Range::new(0, 0), Range::new(0, 7)), ]), ]); diff --git a/helix-core/src/object.rs b/helix-core/src/object.rs index 33d90971..d9558dd8 100644 --- a/helix-core/src/object.rs +++ b/helix-core/src/object.rs @@ -5,7 +5,7 @@ use crate::{Range, RopeSlice, Selection, Syntax}; pub fn expand_selection(syntax: &Syntax, text: RopeSlice, selection: &Selection) -> Selection { let tree = syntax.tree(); - selection.transform(|range| { + selection.clone().transform(|range| { let from = text.char_to_byte(range.from()); let to = text.char_to_byte(range.to()); diff --git a/helix-core/src/position.rs b/helix-core/src/position.rs index 3d114b52..611e6b76 100644 --- a/helix-core/src/position.rs +++ b/helix-core/src/position.rs @@ -1,6 +1,7 @@ use crate::{ chars::char_is_line_ending, - graphemes::{nth_next_grapheme_boundary, RopeGraphemes}, + graphemes::{ensure_grapheme_boundary_prev, RopeGraphemes}, + line_ending::line_end_char_index, RopeSlice, }; @@ -52,19 +53,50 @@ impl From<Position> for tree_sitter::Point { } } /// Convert a character index to (line, column) coordinates. +/// +/// TODO: this should be split into two methods: one for visual +/// row/column, and one for "objective" row/column (possibly with +/// the column specified in `char`s). The former would be used +/// for cursor movement, and the latter would be used for e.g. the +/// row:column display in the status line. pub fn coords_at_pos(text: RopeSlice, pos: usize) -> Position { let line = text.char_to_line(pos); + let line_start = text.line_to_char(line); + let pos = ensure_grapheme_boundary_prev(text, pos); let col = RopeGraphemes::new(text.slice(line_start..pos)).count(); + Position::new(line, col) } /// Convert (line, column) coordinates to a character index. -pub fn pos_at_coords(text: RopeSlice, coords: Position) -> usize { +/// +/// `is_1_width` specifies whether the position should be treated +/// as a block cursor or not. This effects how line-ends are handled. +/// `false` corresponds to properly round-tripping with `coords_at_pos()`, +/// whereas `true` will ensure that block cursors don't jump off the +/// end of the line. +/// +/// TODO: this should be changed to work in terms of visual row/column, not +/// graphemes. +pub fn pos_at_coords(text: RopeSlice, coords: Position, is_1_width: bool) -> usize { let Position { row, col } = coords; let line_start = text.line_to_char(row); - // line_start + col - nth_next_grapheme_boundary(text, line_start, col) + let line_end = if is_1_width { + line_end_char_index(&text, row) + } else { + text.line_to_char((row + 1).min(text.len_lines())) + }; + + let mut col_char_offset = 0; + for (i, g) in RopeGraphemes::new(text.slice(line_start..line_end)).enumerate() { + if i == col { + break; + } + col_char_offset += g.chars().count(); + } + + line_start + col_char_offset } #[cfg(test)] @@ -80,55 +112,109 @@ mod test { #[test] fn test_coords_at_pos() { - // let text = Rope::from("ḧëḷḷö\nẅöṛḷḋ"); - // let slice = text.slice(..); - // assert_eq!(coords_at_pos(slice, 0), (0, 0).into()); - // assert_eq!(coords_at_pos(slice, 5), (0, 5).into()); // position on \n - // assert_eq!(coords_at_pos(slice, 6), (1, 0).into()); // position on w - // assert_eq!(coords_at_pos(slice, 7), (1, 1).into()); // position on o - // assert_eq!(coords_at_pos(slice, 10), (1, 4).into()); // position on d - - // test with grapheme clusters + let text = Rope::from("ḧëḷḷö\nẅöṛḷḋ"); + let slice = text.slice(..); + assert_eq!(coords_at_pos(slice, 0), (0, 0).into()); + assert_eq!(coords_at_pos(slice, 5), (0, 5).into()); // position on \n + assert_eq!(coords_at_pos(slice, 6), (1, 0).into()); // position on w + assert_eq!(coords_at_pos(slice, 7), (1, 1).into()); // position on o + assert_eq!(coords_at_pos(slice, 10), (1, 4).into()); // position on d + + // Test with wide characters. + // TODO: account for character width. + let text = Rope::from("今日はいい\n"); + let slice = text.slice(..); + assert_eq!(coords_at_pos(slice, 0), (0, 0).into()); + assert_eq!(coords_at_pos(slice, 1), (0, 1).into()); + assert_eq!(coords_at_pos(slice, 2), (0, 2).into()); + assert_eq!(coords_at_pos(slice, 3), (0, 3).into()); + assert_eq!(coords_at_pos(slice, 4), (0, 4).into()); + assert_eq!(coords_at_pos(slice, 5), (0, 5).into()); + assert_eq!(coords_at_pos(slice, 6), (1, 0).into()); + + // Test with grapheme clusters. let text = Rope::from("a̐éö̲\r\n"); let slice = text.slice(..); assert_eq!(coords_at_pos(slice, 0), (0, 0).into()); assert_eq!(coords_at_pos(slice, 2), (0, 1).into()); assert_eq!(coords_at_pos(slice, 4), (0, 2).into()); assert_eq!(coords_at_pos(slice, 7), (0, 3).into()); + assert_eq!(coords_at_pos(slice, 9), (1, 0).into()); - let text = Rope::from("किमपि"); + // Test with wide-character grapheme clusters. + // TODO: account for character width. + let text = Rope::from("किमपि\n"); let slice = text.slice(..); assert_eq!(coords_at_pos(slice, 0), (0, 0).into()); assert_eq!(coords_at_pos(slice, 2), (0, 1).into()); assert_eq!(coords_at_pos(slice, 3), (0, 2).into()); assert_eq!(coords_at_pos(slice, 5), (0, 3).into()); + assert_eq!(coords_at_pos(slice, 6), (1, 0).into()); + + // Test with tabs. + // Todo: account for tab stops. + let text = Rope::from("\tHello\n"); + let slice = text.slice(..); + assert_eq!(coords_at_pos(slice, 0), (0, 0).into()); + assert_eq!(coords_at_pos(slice, 1), (0, 1).into()); + assert_eq!(coords_at_pos(slice, 2), (0, 2).into()); } #[test] fn test_pos_at_coords() { let text = Rope::from("ḧëḷḷö\nẅöṛḷḋ"); let slice = text.slice(..); - assert_eq!(pos_at_coords(slice, (0, 0).into()), 0); - assert_eq!(pos_at_coords(slice, (0, 5).into()), 5); // position on \n - assert_eq!(pos_at_coords(slice, (1, 0).into()), 6); // position on w - assert_eq!(pos_at_coords(slice, (1, 1).into()), 7); // position on o - assert_eq!(pos_at_coords(slice, (1, 4).into()), 10); // position on d + assert_eq!(pos_at_coords(slice, (0, 0).into(), false), 0); + assert_eq!(pos_at_coords(slice, (0, 5).into(), false), 5); // position on \n + assert_eq!(pos_at_coords(slice, (0, 6).into(), false), 6); // position after \n + assert_eq!(pos_at_coords(slice, (0, 6).into(), true), 5); // position after \n + assert_eq!(pos_at_coords(slice, (1, 0).into(), false), 6); // position on w + assert_eq!(pos_at_coords(slice, (1, 1).into(), false), 7); // position on o + assert_eq!(pos_at_coords(slice, (1, 4).into(), false), 10); // position on d - // test with grapheme clusters + // Test with wide characters. + // TODO: account for character width. + let text = Rope::from("今日はいい\n"); + let slice = text.slice(..); + assert_eq!(pos_at_coords(slice, (0, 0).into(), false), 0); + assert_eq!(pos_at_coords(slice, (0, 1).into(), false), 1); + assert_eq!(pos_at_coords(slice, (0, 2).into(), false), 2); + assert_eq!(pos_at_coords(slice, (0, 3).into(), false), 3); + assert_eq!(pos_at_coords(slice, (0, 4).into(), false), 4); + assert_eq!(pos_at_coords(slice, (0, 5).into(), false), 5); + assert_eq!(pos_at_coords(slice, (0, 6).into(), false), 6); + assert_eq!(pos_at_coords(slice, (0, 6).into(), true), 5); + assert_eq!(pos_at_coords(slice, (1, 0).into(), false), 6); + + // Test with grapheme clusters. let text = Rope::from("a̐éö̲\r\n"); let slice = text.slice(..); - assert_eq!(pos_at_coords(slice, (0, 0).into()), 0); - assert_eq!(pos_at_coords(slice, (0, 1).into()), 2); - assert_eq!(pos_at_coords(slice, (0, 2).into()), 4); - assert_eq!(pos_at_coords(slice, (0, 3).into()), 7); // \r\n is one char here - assert_eq!(pos_at_coords(slice, (0, 4).into()), 9); + assert_eq!(pos_at_coords(slice, (0, 0).into(), false), 0); + assert_eq!(pos_at_coords(slice, (0, 1).into(), false), 2); + assert_eq!(pos_at_coords(slice, (0, 2).into(), false), 4); + assert_eq!(pos_at_coords(slice, (0, 3).into(), false), 7); // \r\n is one char here + assert_eq!(pos_at_coords(slice, (0, 4).into(), false), 9); + assert_eq!(pos_at_coords(slice, (0, 4).into(), true), 7); + assert_eq!(pos_at_coords(slice, (1, 0).into(), false), 9); + + // Test with wide-character grapheme clusters. + // TODO: account for character width. let text = Rope::from("किमपि"); // 2 - 1 - 2 codepoints // TODO: delete handling as per https://news.ycombinator.com/item?id=20058454 let slice = text.slice(..); - assert_eq!(pos_at_coords(slice, (0, 0).into()), 0); - assert_eq!(pos_at_coords(slice, (0, 1).into()), 2); - assert_eq!(pos_at_coords(slice, (0, 2).into()), 3); - assert_eq!(pos_at_coords(slice, (0, 3).into()), 5); // eol + assert_eq!(pos_at_coords(slice, (0, 0).into(), false), 0); + assert_eq!(pos_at_coords(slice, (0, 1).into(), false), 2); + assert_eq!(pos_at_coords(slice, (0, 2).into(), false), 3); + assert_eq!(pos_at_coords(slice, (0, 3).into(), false), 5); + assert_eq!(pos_at_coords(slice, (0, 3).into(), true), 5); + + // Test with tabs. + // Todo: account for tab stops. + let text = Rope::from("\tHello\n"); + let slice = text.slice(..); + assert_eq!(pos_at_coords(slice, (0, 0).into(), false), 0); + assert_eq!(pos_at_coords(slice, (0, 1).into(), false), 1); + assert_eq!(pos_at_coords(slice, (0, 2).into(), false), 2); } } diff --git a/helix-core/src/search.rs b/helix-core/src/search.rs index 73be68c7..243ac227 100644 --- a/helix-core/src/search.rs +++ b/helix-core/src/search.rs @@ -1,18 +1,11 @@ use crate::RopeSlice; -pub fn find_nth_next( - text: RopeSlice, - ch: char, - mut pos: usize, - n: usize, - inclusive: bool, -) -> Option<usize> { - if pos >= text.len_chars() { +pub fn find_nth_next(text: RopeSlice, ch: char, mut pos: usize, n: usize) -> Option<usize> { + if pos >= text.len_chars() || n == 0 { return None; } - // start searching right after pos - let mut chars = text.chars_at(pos + 1); + let mut chars = text.chars_at(pos); for _ in 0..n { loop { @@ -26,28 +19,21 @@ pub fn find_nth_next( } } - if !inclusive { - pos -= 1; - } - - Some(pos) + Some(pos - 1) } -pub fn find_nth_prev( - text: RopeSlice, - ch: char, - mut pos: usize, - n: usize, - inclusive: bool, -) -> Option<usize> { - // start searching right before pos +pub fn find_nth_prev(text: RopeSlice, ch: char, mut pos: usize, n: usize) -> Option<usize> { + if pos == 0 || n == 0 { + return None; + } + let mut chars = text.chars_at(pos); for _ in 0..n { loop { let c = chars.prev()?; - pos = pos.saturating_sub(1); + pos -= 1; if c == ch { break; @@ -55,9 +41,5 @@ pub fn find_nth_prev( } } - if !inclusive { - pos += 1; - } - Some(pos) } diff --git a/helix-core/src/selection.rs b/helix-core/src/selection.rs index 63b9b557..247a69fe 100644 --- a/helix-core/src/selection.rs +++ b/helix-core/src/selection.rs @@ -1,30 +1,50 @@ -//! Selections are the primary editing construct. Even a single cursor is defined as an empty -//! single selection range. +//! Selections are the primary editing construct. Even a single cursor is +//! defined as a single empty or 1-wide selection range. //! //! All positioning is done via `char` offsets into the buffer. -use crate::{Assoc, ChangeSet, RopeSlice}; +use crate::{ + graphemes::{ + ensure_grapheme_boundary_next, ensure_grapheme_boundary_prev, next_grapheme_boundary, + prev_grapheme_boundary, + }, + Assoc, ChangeSet, RopeSlice, +}; use smallvec::{smallvec, SmallVec}; use std::borrow::Cow; -#[inline] -fn abs_difference(x: usize, y: usize) -> usize { - if x < y { - y - x - } else { - x - y - } -} - -/// A single selection range. Anchor-inclusive, head-exclusive. +/// A single selection range. +/// +/// The range consists of an "anchor" and "head" position in +/// the text. The head is the part that the user moves when +/// directly extending the selection. The head and anchor +/// can be in any order: either can precede or follow the +/// other in the text, and they can share the same position +/// for a zero-width range. +/// +/// Below are some example `Range` configurations to better +/// illustrate. The anchor and head indices are show as +/// "(anchor, head)", followed by example text with "[" and "]" +/// inserted to visually represent the anchor and head positions: +/// +/// - (0, 3): [Som]e text. +/// - (3, 0): ]Som[e text. +/// - (2, 7): So[me te]xt. +/// - (1, 1): S[]ome text. +/// +/// Ranges are considered to be inclusive on the left and +/// exclusive on the right, regardless of anchor-head ordering. +/// This means, for example, that non-zero-width ranges that +/// are directly adjecent, sharing an edge, do not overlap. +/// However, a zero-width range will overlap with the shared +/// left-edge of another range. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct Range { - // TODO: optimize into u32 /// The anchor of the range: the side that doesn't move when extending. pub anchor: usize, /// The head of the range, moved when extending. pub head: usize, pub horiz: Option<u32>, -} // TODO: might be cheaper to store normalized as from/to and an inverted flag +} impl Range { pub fn new(anchor: usize, head: usize) -> Self { @@ -53,6 +73,27 @@ impl Range { std::cmp::max(self.anchor, self.head) } + /// The line number that the block-cursor is on. + #[inline] + #[must_use] + pub fn cursor_line(&self, text: RopeSlice) -> usize { + text.char_to_line(self.cursor(text)) + } + + /// The (inclusive) range of lines that the range overlaps. + #[inline] + #[must_use] + pub fn line_range(&self, text: RopeSlice) -> (usize, usize) { + let from = self.from(); + let to = if self.is_empty() { + self.to() + } else { + prev_grapheme_boundary(text, self.to()).max(from) + }; + + (text.char_to_line(from), text.char_to_line(to)) + } + /// `true` when head and anchor are at the same position. #[inline] pub fn is_empty(&self) -> bool { @@ -62,37 +103,39 @@ impl Range { /// Check two ranges for overlap. #[must_use] pub fn overlaps(&self, other: &Self) -> bool { - // cursor overlap is checked differently - if self.is_empty() { - let pos = self.head; - pos >= other.from() && other.to() >= pos - } else { - self.to() > other.from() && other.to() > self.from() - } + // To my eye, it's non-obvious why this works, but I arrived + // at it after transforming the slower version that explicitly + // enumerated more cases. The unit tests are thorough. + self.from() == other.from() || (self.to() > other.from() && other.to() > self.from()) } pub fn contains(&self, pos: usize) -> bool { - if self.is_empty() { - return false; - } - - if self.anchor < self.head { - self.anchor <= pos && pos < self.head - } else { - self.head < pos && pos <= self.anchor - } + self.from() <= pos && pos < self.to() } /// Map a range through a set of changes. Returns a new range representing the same position /// after the changes are applied. pub fn map(self, changes: &ChangeSet) -> Self { - let anchor = changes.map_pos(self.anchor, Assoc::After); - let head = changes.map_pos(self.head, Assoc::After); - - // TODO: possibly unnecessary - if self.anchor == anchor && self.head == head { - return self; - } + use std::cmp::Ordering; + let (anchor, head) = match self.anchor.cmp(&self.head) { + Ordering::Equal => ( + changes.map_pos(self.anchor, Assoc::After), + changes.map_pos(self.head, Assoc::After), + ), + Ordering::Less => ( + changes.map_pos(self.anchor, Assoc::After), + changes.map_pos(self.head, Assoc::Before), + ), + Ordering::Greater => ( + changes.map_pos(self.anchor, Assoc::Before), + changes.map_pos(self.head, Assoc::After), + ), + }; + + // We want to return a new `Range` with `horiz == None` every time, + // even if the anchor and head haven't changed, because we don't + // know if the *visual* position hasn't changed due to + // character-width or grapheme changes earlier in the text. Self { anchor, head, @@ -103,22 +146,141 @@ impl Range { /// Extend the range to cover at least `from` `to`. #[must_use] pub fn extend(&self, from: usize, to: usize) -> Self { - if from <= self.anchor && to >= self.anchor { - return Self { - anchor: from, - head: to, + debug_assert!(from <= to); + + if self.anchor <= self.head { + Self { + anchor: self.anchor.min(from), + head: self.head.max(to), horiz: None, - }; + } + } else { + Self { + anchor: self.anchor.max(to), + head: self.head.min(from), + horiz: None, + } } + } - Self { - anchor: self.anchor, - head: if abs_difference(from, self.anchor) > abs_difference(to, self.anchor) { - from + /// Returns a range that encompasses both input ranges. + /// + /// This is like `extend()`, but tries to negotiate the + /// anchor/head ordering between the two input ranges. + #[must_use] + pub fn merge(&self, other: Self) -> Self { + if self.anchor > self.head && other.anchor > other.head { + Range { + anchor: self.anchor.max(other.anchor), + head: self.head.min(other.head), + horiz: None, + } + } else { + Range { + anchor: self.from().min(other.from()), + head: self.to().max(other.to()), + horiz: None, + } + } + } + + /// Compute a possibly new range from this range, attempting to ensure + /// a minimum range width of 1 char by shifting the head in the forward + /// direction as needed. + /// + /// This method will never shift the anchor, and will only shift the + /// head in the forward direction. Therefore, this method can fail + /// at ensuring the minimum width if and only if the passed range is + /// both zero-width and at the end of the `RopeSlice`. + /// + /// If the input range is grapheme-boundary aligned, the returned range + /// will also be. Specifically, if the head needs to shift to achieve + /// the minimum width, it will shift to the next grapheme boundary. + #[must_use] + #[inline] + pub fn min_width_1(&self, slice: RopeSlice) -> Self { + if self.anchor == self.head { + Range { + anchor: self.anchor, + head: next_grapheme_boundary(slice, self.head), + horiz: self.horiz, + } + } else { + *self + } + } + + /// Compute a possibly new range from this range, with its ends + /// shifted as needed to align with grapheme boundaries. + /// + /// Zero-width ranges will always stay zero-width, and non-zero-width + /// ranges will never collapse to zero-width. + #[must_use] + pub fn grapheme_aligned(&self, slice: RopeSlice) -> Self { + use std::cmp::Ordering; + let (new_anchor, new_head) = match self.anchor.cmp(&self.head) { + Ordering::Equal => { + let pos = ensure_grapheme_boundary_prev(slice, self.anchor); + (pos, pos) + } + Ordering::Less => ( + ensure_grapheme_boundary_prev(slice, self.anchor), + ensure_grapheme_boundary_next(slice, self.head), + ), + Ordering::Greater => ( + ensure_grapheme_boundary_next(slice, self.anchor), + ensure_grapheme_boundary_prev(slice, self.head), + ), + }; + Range { + anchor: new_anchor, + head: new_head, + horiz: if new_anchor == self.anchor { + self.horiz } else { - to + None }, - horiz: None, + } + } + + /// Gets the left-side position of the block cursor. + #[must_use] + #[inline] + pub fn cursor(self, text: RopeSlice) -> usize { + if self.head > self.anchor { + prev_grapheme_boundary(text, self.head) + } else { + self.head + } + } + + /// Puts the left side of the block cursor at `char_idx`, optionally extending. + /// + /// This follows "1-width" semantics, and therefore does a combination of anchor + /// and head moves to behave as if both the front and back of the range are 1-width + /// blocks + /// + /// This method assumes that the range and `char_idx` are already properly + /// grapheme-aligned. + #[must_use] + #[inline] + pub fn put_cursor(self, text: RopeSlice, char_idx: usize, extend: bool) -> Range { + if extend { + let anchor = if self.head >= self.anchor && char_idx < self.anchor { + next_grapheme_boundary(text, self.anchor) + } else if self.head < self.anchor && char_idx >= self.anchor { + prev_grapheme_boundary(text, self.anchor) + } else { + self.anchor + }; + + if anchor <= char_idx { + Range::new(anchor, next_grapheme_boundary(text, char_idx)) + } else { + Range::new(anchor, char_idx) + } + } else { + Range::point(char_idx) } } @@ -126,7 +288,7 @@ impl Range { #[inline] pub fn fragment<'a, 'b: 'a>(&'a self, text: RopeSlice<'b>) -> Cow<'b, str> { - Cow::from(text.slice(self.from()..self.to() + 1)) + text.slice(self.from()..self.to()).into() } } @@ -157,11 +319,6 @@ impl Selection { self.ranges[self.primary_index] } - #[must_use] - pub fn cursor(&self) -> usize { - self.primary().head - } - /// Ensure selection containing only the primary selection. pub fn into_single(self) -> Self { if self.ranges.len() == 1 { @@ -174,13 +331,12 @@ impl Selection { } } + /// Adds a new range to the selection and makes it the primary range. pub fn push(mut self, range: Range) -> Self { - let index = self.ranges.len(); self.ranges.push(range); - - Self::normalize(self.ranges, index) + self.set_primary_index(self.ranges().len() - 1); + self.normalize() } - // replace_range /// Map selections over a set of changes. Useful for adjusting the selection position after /// applying changes to a document. @@ -206,6 +362,11 @@ impl Selection { self.primary_index } + pub fn set_primary_index(&mut self, idx: usize) { + assert!(idx < self.ranges.len()); + self.primary_index = idx; + } + #[must_use] /// Constructs a selection holding a single range. pub fn single(anchor: usize, head: usize) -> Self { @@ -224,80 +385,74 @@ impl Selection { Self::single(pos, pos) } - fn normalize(mut ranges: SmallVec<[Range; 1]>, mut primary_index: usize) -> Self { - let primary = ranges[primary_index]; - ranges.sort_unstable_by_key(Range::from); - primary_index = ranges.iter().position(|&range| range == primary).unwrap(); - - let mut result = SmallVec::with_capacity(ranges.len()); // approx - - // TODO: we could do with one vec by removing elements as we mutate - - let mut i = 0; - - for range in ranges.into_iter() { - // if previous value exists - if let Some(prev) = result.last_mut() { - // and we overlap it - - // TODO: we used to simply check range.from() <(=) prev.to() - // avoiding two comparisons - if range.overlaps(prev) { - let from = prev.from(); - let to = std::cmp::max(range.to(), prev.to()); - - if i <= primary_index { - primary_index -= 1 - } - - // merge into previous - if range.anchor > range.head { - prev.anchor = to; - prev.head = from; - } else { - prev.anchor = from; - prev.head = to; - } - continue; - } + /// Normalizes a `Selection`. + fn normalize(mut self) -> Self { + let primary = self.ranges[self.primary_index]; + self.ranges.sort_unstable_by_key(Range::from); + self.primary_index = self + .ranges + .iter() + .position(|&range| range == primary) + .unwrap(); + + let mut prev_i = 0; + for i in 1..self.ranges.len() { + if self.ranges[prev_i].overlaps(&self.ranges[i]) { + self.ranges[prev_i] = self.ranges[prev_i].merge(self.ranges[i]); + } else { + prev_i += 1; + self.ranges[prev_i] = self.ranges[i]; + } + if i == self.primary_index { + self.primary_index = prev_i; } - - result.push(range); - i += 1 } - Self { - ranges: result, - primary_index, - } + self.ranges.truncate(prev_i + 1); + + self } // TODO: consume an iterator or a vec to reduce allocations? #[must_use] pub fn new(ranges: SmallVec<[Range; 1]>, primary_index: usize) -> Self { assert!(!ranges.is_empty()); + debug_assert!(primary_index < ranges.len()); - // fast path for a single selection (cursor) - if ranges.len() == 1 { - return Self { - ranges, - primary_index: 0, - }; + let mut selection = Self { + ranges, + primary_index, + }; + + if selection.ranges.len() > 1 { + // TODO: only normalize if needed (any ranges out of order) + selection = selection.normalize(); } - // TODO: only normalize if needed (any ranges out of order) - Self::normalize(ranges, primary_index) + selection } - /// Takes a closure and maps each selection over the closure. - pub fn transform<F>(&self, f: F) -> Self + /// Takes a closure and maps each `Range` over the closure. + pub fn transform<F>(mut self, f: F) -> Self where F: Fn(Range) -> Range, { - Self::new( - self.ranges.iter().copied().map(f).collect(), - self.primary_index, - ) + for range in self.ranges.iter_mut() { + *range = f(*range) + } + + self.normalize() + } + + /// A convenience short-cut for `transform(|r| r.min_width_1(text))`. + pub fn min_width_1(self, text: RopeSlice) -> Self { + self.transform(|r| r.min_width_1(text)) + } + + /// Transforms the selection into all of the left-side head positions, + /// using block-cursor semantics. + pub fn cursors(self, text: RopeSlice) -> Self { + self.transform(|range| Range::point(range.cursor(text))) } pub fn fragments<'a>(&'a self, text: RopeSlice<'a>) -> impl Iterator<Item = Cow<str>> + 'a { @@ -363,7 +518,7 @@ pub fn select_on_matches( let start = text.byte_to_char(start_byte + mat.start()); let end = text.byte_to_char(start_byte + mat.end()); - result.push(Range::new(start, end.saturating_sub(1))); + result.push(Range::new(start, end)); } } @@ -384,6 +539,12 @@ pub fn split_on_matches( let mut result = SmallVec::with_capacity(selection.len()); for sel in selection { + // Special case: zero-width selection. + if sel.from() == sel.to() { + result.push(*sel); + continue; + } + // TODO: can't avoid occasional allocations since Regex can't operate on chunks yet let fragment = sel.fragment(text); @@ -396,13 +557,12 @@ pub fn split_on_matches( for mat in regex.find_iter(&fragment) { // TODO: retain range direction - let end = text.byte_to_char(start_byte + mat.start()); - result.push(Range::new(start, end.saturating_sub(1))); + result.push(Range::new(start, end)); start = text.byte_to_char(start_byte + mat.end()); } - if start <= sel_end { + if start < sel_end { result.push(Range::new(start, sel_end)); } } @@ -484,7 +644,7 @@ mod test { .collect::<Vec<String>>() .join(","); - assert_eq!(res, "8/10,10/12"); + assert_eq!(res, "8/10,10/12,12/12"); } #[test] @@ -498,35 +658,171 @@ mod test { assert_eq!(range.contains(13), false); let range = Range::new(9, 6); - assert_eq!(range.contains(9), true); + assert_eq!(range.contains(9), false); assert_eq!(range.contains(7), true); - assert_eq!(range.contains(6), false); + assert_eq!(range.contains(6), true); + } + + #[test] + fn test_overlaps() { + fn overlaps(a: (usize, usize), b: (usize, usize)) -> bool { + Range::new(a.0, a.1).overlaps(&Range::new(b.0, b.1)) + } + + // Two non-zero-width ranges, no overlap. + assert!(!overlaps((0, 3), (3, 6))); + assert!(!overlaps((0, 3), (6, 3))); + assert!(!overlaps((3, 0), (3, 6))); + assert!(!overlaps((3, 0), (6, 3))); + assert!(!overlaps((3, 6), (0, 3))); + assert!(!overlaps((3, 6), (3, 0))); + assert!(!overlaps((6, 3), (0, 3))); + assert!(!overlaps((6, 3), (3, 0))); + + // Two non-zero-width ranges, overlap. + assert!(overlaps((0, 4), (3, 6))); + assert!(overlaps((0, 4), (6, 3))); + assert!(overlaps((4, 0), (3, 6))); + assert!(overlaps((4, 0), (6, 3))); + assert!(overlaps((3, 6), (0, 4))); + assert!(overlaps((3, 6), (4, 0))); + assert!(overlaps((6, 3), (0, 4))); + assert!(overlaps((6, 3), (4, 0))); + + // Zero-width and non-zero-width range, no overlap. + assert!(!overlaps((0, 3), (3, 3))); + assert!(!overlaps((3, 0), (3, 3))); + assert!(!overlaps((3, 3), (0, 3))); + assert!(!overlaps((3, 3), (3, 0))); + + // Zero-width and non-zero-width range, overlap. + assert!(overlaps((1, 4), (1, 1))); + assert!(overlaps((4, 1), (1, 1))); + assert!(overlaps((1, 1), (1, 4))); + assert!(overlaps((1, 1), (4, 1))); + + assert!(overlaps((1, 4), (3, 3))); + assert!(overlaps((4, 1), (3, 3))); + assert!(overlaps((3, 3), (1, 4))); + assert!(overlaps((3, 3), (4, 1))); + + // Two zero-width ranges, no overlap. + assert!(!overlaps((0, 0), (1, 1))); + assert!(!overlaps((1, 1), (0, 0))); + + // Two zero-width ranges, overlap. + assert!(overlaps((1, 1), (1, 1))); + } + + #[test] + fn test_graphem_aligned() { + let r = Rope::from_str("\r\nHi\r\n"); + let s = r.slice(..); + + // Zero-width. + assert_eq!(Range::new(0, 0).grapheme_aligned(s), Range::new(0, 0)); + assert_eq!(Range::new(1, 1).grapheme_aligned(s), Range::new(0, 0)); + assert_eq!(Range::new(2, 2).grapheme_aligned(s), Range::new(2, 2)); + assert_eq!(Range::new(3, 3).grapheme_aligned(s), Range::new(3, 3)); + assert_eq!(Range::new(4, 4).grapheme_aligned(s), Range::new(4, 4)); + assert_eq!(Range::new(5, 5).grapheme_aligned(s), Range::new(4, 4)); + assert_eq!(Range::new(6, 6).grapheme_aligned(s), Range::new(6, 6)); + + // Forward. + assert_eq!(Range::new(0, 1).grapheme_aligned(s), Range::new(0, 2)); + assert_eq!(Range::new(1, 2).grapheme_aligned(s), Range::new(0, 2)); + assert_eq!(Range::new(2, 3).grapheme_aligned(s), Range::new(2, 3)); + assert_eq!(Range::new(3, 4).grapheme_aligned(s), Range::new(3, 4)); + assert_eq!(Range::new(4, 5).grapheme_aligned(s), Range::new(4, 6)); + assert_eq!(Range::new(5, 6).grapheme_aligned(s), Range::new(4, 6)); + + assert_eq!(Range::new(0, 2).grapheme_aligned(s), Range::new(0, 2)); + assert_eq!(Range::new(1, 3).grapheme_aligned(s), Range::new(0, 3)); + assert_eq!(Range::new(2, 4).grapheme_aligned(s), Range::new(2, 4)); + assert_eq!(Range::new(3, 5).grapheme_aligned(s), Range::new(3, 6)); + assert_eq!(Range::new(4, 6).grapheme_aligned(s), Range::new(4, 6)); + + // Reverse. + assert_eq!(Range::new(1, 0).grapheme_aligned(s), Range::new(2, 0)); + assert_eq!(Range::new(2, 1).grapheme_aligned(s), Range::new(2, 0)); + assert_eq!(Range::new(3, 2).grapheme_aligned(s), Range::new(3, 2)); + assert_eq!(Range::new(4, 3).grapheme_aligned(s), Range::new(4, 3)); + assert_eq!(Range::new(5, 4).grapheme_aligned(s), Range::new(6, 4)); + assert_eq!(Range::new(6, 5).grapheme_aligned(s), Range::new(6, 4)); + + assert_eq!(Range::new(2, 0).grapheme_aligned(s), Range::new(2, 0)); + assert_eq!(Range::new(3, 1).grapheme_aligned(s), Range::new(3, 0)); + assert_eq!(Range::new(4, 2).grapheme_aligned(s), Range::new(4, 2)); + assert_eq!(Range::new(5, 3).grapheme_aligned(s), Range::new(6, 3)); + assert_eq!(Range::new(6, 4).grapheme_aligned(s), Range::new(6, 4)); + } + + #[test] + fn test_min_width_1() { + let r = Rope::from_str("\r\nHi\r\n"); + let s = r.slice(..); + + // Zero-width. + assert_eq!(Range::new(0, 0).min_width_1(s), Range::new(0, 2)); + assert_eq!(Range::new(1, 1).min_width_1(s), Range::new(1, 2)); + assert_eq!(Range::new(2, 2).min_width_1(s), Range::new(2, 3)); + assert_eq!(Range::new(3, 3).min_width_1(s), Range::new(3, 4)); + assert_eq!(Range::new(4, 4).min_width_1(s), Range::new(4, 6)); + assert_eq!(Range::new(5, 5).min_width_1(s), Range::new(5, 6)); + assert_eq!(Range::new(6, 6).min_width_1(s), Range::new(6, 6)); + + // Forward. + assert_eq!(Range::new(0, 1).min_width_1(s), Range::new(0, 1)); + assert_eq!(Range::new(1, 2).min_width_1(s), Range::new(1, 2)); + assert_eq!(Range::new(2, 3).min_width_1(s), Range::new(2, 3)); + assert_eq!(Range::new(3, 4).min_width_1(s), Range::new(3, 4)); + assert_eq!(Range::new(4, 5).min_width_1(s), Range::new(4, 5)); + assert_eq!(Range::new(5, 6).min_width_1(s), Range::new(5, 6)); + + // Reverse. + assert_eq!(Range::new(1, 0).min_width_1(s), Range::new(1, 0)); + assert_eq!(Range::new(2, 1).min_width_1(s), Range::new(2, 1)); + assert_eq!(Range::new(3, 2).min_width_1(s), Range::new(3, 2)); + assert_eq!(Range::new(4, 3).min_width_1(s), Range::new(4, 3)); + assert_eq!(Range::new(5, 4).min_width_1(s), Range::new(5, 4)); + assert_eq!(Range::new(6, 5).min_width_1(s), Range::new(6, 5)); } #[test] fn test_split_on_matches() { use crate::regex::Regex; - let text = Rope::from("abcd efg wrs xyz 123 456"); + let text = Rope::from(" abcd efg wrs xyz 123 456"); - let selection = Selection::new(smallvec![Range::new(0, 8), Range::new(10, 19),], 0); + let selection = Selection::new(smallvec![Range::new(0, 9), Range::new(11, 20),], 0); let result = split_on_matches(text.slice(..), &selection, &Regex::new(r"\s+").unwrap()); assert_eq!( result.ranges(), &[ - Range::new(0, 3), - Range::new(5, 7), - Range::new(10, 11), - Range::new(15, 17), - Range::new(19, 19), + // TODO: rather than this behavior, maybe we want it + // to be based on which side is the anchor? + // + // We get a leading zero-width range when there's + // a leading match because ranges are inclusive on + // the left. Imagine, for example, if the entire + // selection range were matched: you'd still want + // at least one range to remain after the split. + Range::new(0, 0), + Range::new(1, 5), + Range::new(6, 9), + Range::new(11, 13), + Range::new(16, 19), + // In contrast to the comment above, there is no + // _trailing_ zero-width range despite the trailing + // match, because ranges are exclusive on the right. ] ); assert_eq!( result.fragments(text.slice(..)).collect::<Vec<_>>(), - &["abcd", "efg", "rs", "xyz", "1"] + &["", "abcd", "efg", "rs", "xyz"] ); } } diff --git a/helix-core/src/surround.rs b/helix-core/src/surround.rs index 52f60cab..4d3ed5f5 100644 --- a/helix-core/src/surround.rs +++ b/helix-core/src/surround.rs @@ -1,3 +1,4 @@ +use crate::graphemes::next_grapheme_boundary; use crate::{search, Selection}; use ropey::RopeSlice; @@ -40,23 +41,34 @@ pub fn find_nth_pairs_pos( ) -> Option<(usize, usize)> { let (open, close) = get_pair(ch); - let (open_pos, close_pos) = if open == close { - let prev = search::find_nth_prev(text, open, pos, n, true); - let next = search::find_nth_next(text, close, pos, n, true); - if text.char(pos) == open { - // cursor is *on* a pair - next.map(|n| (pos, n)).or_else(|| prev.map(|p| (p, pos)))? + if text.len_chars() < 2 || pos >= text.len_chars() { + return None; + } + + if open == close { + if Some(open) == text.get_char(pos) { + // Special case: cursor is directly on a matching char. + match pos { + 0 => Some((pos, search::find_nth_next(text, close, pos + 1, n)? + 1)), + _ if (pos + 1) == text.len_chars() => { + Some((search::find_nth_prev(text, open, pos, n)?, text.len_chars())) + } + // We return no match because there's no way to know which + // side of the char we should be searching on. + _ => None, + } } else { - (prev?, next?) + Some(( + search::find_nth_prev(text, open, pos, n)?, + search::find_nth_next(text, close, pos, n)? + 1, + )) } } else { - ( + Some(( find_nth_open_pair(text, open, close, pos, n)?, - find_nth_close_pair(text, open, close, pos, n)?, - ) - }; - - Some((open_pos, close_pos)) + next_grapheme_boundary(text, find_nth_close_pair(text, open, close, pos, n)?), + )) + } } fn find_nth_open_pair( @@ -173,12 +185,13 @@ mod test { let slice = doc.slice(..); // cursor on [t]ext - assert_eq!(find_nth_pairs_pos(slice, '(', 6, 1), Some((5, 10))); - assert_eq!(find_nth_pairs_pos(slice, ')', 6, 1), Some((5, 10))); + assert_eq!(find_nth_pairs_pos(slice, '(', 6, 1), Some((5, 11))); + assert_eq!(find_nth_pairs_pos(slice, ')', 6, 1), Some((5, 11))); // cursor on so[m]e assert_eq!(find_nth_pairs_pos(slice, '(', 2, 1), None); // cursor on bracket itself - assert_eq!(find_nth_pairs_pos(slice, '(', 5, 1), Some((5, 10))); + assert_eq!(find_nth_pairs_pos(slice, '(', 5, 1), Some((5, 11))); + assert_eq!(find_nth_pairs_pos(slice, '(', 10, 1), Some((5, 11))); } #[test] @@ -187,9 +200,9 @@ mod test { let slice = doc.slice(..); // cursor on go[o]d - assert_eq!(find_nth_pairs_pos(slice, '(', 13, 1), Some((10, 15))); - assert_eq!(find_nth_pairs_pos(slice, '(', 13, 2), Some((4, 21))); - assert_eq!(find_nth_pairs_pos(slice, '(', 13, 3), Some((0, 27))); + assert_eq!(find_nth_pairs_pos(slice, '(', 13, 1), Some((10, 16))); + assert_eq!(find_nth_pairs_pos(slice, '(', 13, 2), Some((4, 22))); + assert_eq!(find_nth_pairs_pos(slice, '(', 13, 3), Some((0, 28))); } #[test] @@ -198,14 +211,14 @@ mod test { let slice = doc.slice(..); // cursor on go[o]d - assert_eq!(find_nth_pairs_pos(slice, '\'', 13, 1), Some((10, 15))); - assert_eq!(find_nth_pairs_pos(slice, '\'', 13, 2), Some((4, 21))); - assert_eq!(find_nth_pairs_pos(slice, '\'', 13, 3), Some((0, 27))); + assert_eq!(find_nth_pairs_pos(slice, '\'', 13, 1), Some((10, 16))); + assert_eq!(find_nth_pairs_pos(slice, '\'', 13, 2), Some((4, 22))); + assert_eq!(find_nth_pairs_pos(slice, '\'', 13, 3), Some((0, 28))); // cursor on the quotes - assert_eq!(find_nth_pairs_pos(slice, '\'', 10, 1), Some((10, 15))); + assert_eq!(find_nth_pairs_pos(slice, '\'', 10, 1), None); // this is the best we can do since opening and closing pairs are same - assert_eq!(find_nth_pairs_pos(slice, '\'', 0, 1), Some((0, 4))); - assert_eq!(find_nth_pairs_pos(slice, '\'', 27, 1), Some((21, 27))); + assert_eq!(find_nth_pairs_pos(slice, '\'', 0, 1), Some((0, 5))); + assert_eq!(find_nth_pairs_pos(slice, '\'', 27, 1), Some((21, 28))); } #[test] @@ -214,8 +227,8 @@ mod test { let slice = doc.slice(..); // cursor on go[o]d - assert_eq!(find_nth_pairs_pos(slice, '(', 15, 1), Some((5, 24))); - assert_eq!(find_nth_pairs_pos(slice, '(', 15, 2), Some((0, 31))); + assert_eq!(find_nth_pairs_pos(slice, '(', 15, 1), Some((5, 25))); + assert_eq!(find_nth_pairs_pos(slice, '(', 15, 2), Some((0, 32))); } #[test] @@ -224,9 +237,9 @@ mod test { let slice = doc.slice(..); // cursor on go[o]d - assert_eq!(find_nth_pairs_pos(slice, '{', 13, 1), Some((10, 15))); - assert_eq!(find_nth_pairs_pos(slice, '[', 13, 1), Some((4, 21))); - assert_eq!(find_nth_pairs_pos(slice, '(', 13, 1), Some((0, 27))); + assert_eq!(find_nth_pairs_pos(slice, '{', 13, 1), Some((10, 16))); + assert_eq!(find_nth_pairs_pos(slice, '[', 13, 1), Some((4, 22))); + assert_eq!(find_nth_pairs_pos(slice, '(', 13, 1), Some((0, 28))); } #[test] @@ -243,7 +256,7 @@ mod test { get_surround_pos(slice, &selection, '(', 1) .unwrap() .as_slice(), - &[0, 5, 7, 13, 15, 23] + &[0, 6, 7, 14, 15, 24] ); } diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 14f36a0a..c8cb0557 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -1760,10 +1760,20 @@ impl<I: Iterator<Item = HighlightEvent>> Iterator for Merge<I> { self.next_event = self.iter.next(); Some(event) } - // can happen if deleting and cursor at EOF, and diagnostic reaches past the end - (None, Some((_, _))) => { - self.next_span = None; - None + // Can happen if cursor at EOF and/or diagnostic reaches past the end. + // We need to actually emit events for the cursor-at-EOF situation, + // even though the range is past the end of the text. This needs to be + // handled appropriately by the drawing code by not assuming that + // all `Source` events point to valid indices in the rope. + (None, Some((span, range))) => { + let event = HighlightStart(Highlight(*span)); + self.queue.push(HighlightEnd); + self.queue.push(Source { + start: range.start, + end: range.end, + }); + self.next_span = self.spans.next(); + Some(event) } (None, None) => None, e => unreachable!("{:?}", e), diff --git a/helix-core/src/textobject.rs b/helix-core/src/textobject.rs index fbf66256..b06bca5d 100644 --- a/helix-core/src/textobject.rs +++ b/helix-core/src/textobject.rs @@ -1,21 +1,16 @@ use ropey::RopeSlice; -use crate::chars::{categorize_char, char_is_line_ending, char_is_whitespace, CharCategory}; -use crate::movement::{self, Direction}; +use crate::chars::{categorize_char, CharCategory}; +use crate::graphemes::{next_grapheme_boundary, prev_grapheme_boundary}; +use crate::movement::Direction; use crate::surround; use crate::Range; -fn this_word_end_pos(slice: RopeSlice, pos: usize) -> usize { - this_word_bound_pos(slice, pos, Direction::Forward) -} - -fn this_word_start_pos(slice: RopeSlice, pos: usize) -> usize { - this_word_bound_pos(slice, pos, Direction::Backward) -} +fn find_word_boundary(slice: RopeSlice, mut pos: usize, direction: Direction) -> usize { + use CharCategory::{Eol, Whitespace}; -fn this_word_bound_pos(slice: RopeSlice, mut pos: usize, direction: Direction) -> usize { let iter = match direction { - Direction::Forward => slice.chars_at(pos + 1), + Direction::Forward => slice.chars_at(pos), Direction::Backward => { let mut iter = slice.chars_at(pos); iter.reverse(); @@ -23,25 +18,31 @@ fn this_word_bound_pos(slice: RopeSlice, mut pos: usize, direction: Direction) - } }; - match categorize_char(slice.char(pos)) { - CharCategory::Eol | CharCategory::Whitespace => pos, - category => { - for peek in iter { - let curr_category = categorize_char(peek); - if curr_category != category - || curr_category == CharCategory::Eol - || curr_category == CharCategory::Whitespace - { + let mut prev_category = match direction { + Direction::Forward if pos == 0 => Whitespace, + Direction::Forward => categorize_char(slice.char(pos - 1)), + Direction::Backward if pos == slice.len_chars() => Whitespace, + Direction::Backward => categorize_char(slice.char(pos)), + }; + + for ch in iter { + match categorize_char(ch) { + Eol | Whitespace => return pos, + category => { + if category != prev_category && pos != 0 && pos != slice.len_chars() { return pos; - } - pos = match direction { - Direction::Forward => pos + 1, - Direction::Backward => pos.saturating_sub(1), + } else { + match direction { + Direction::Forward => pos += 1, + Direction::Backward => pos = pos.saturating_sub(1), + } + prev_category = category; } } - pos } } + + pos } #[derive(Copy, Clone, PartialEq, Eq, Debug)] @@ -55,46 +56,37 @@ pub fn textobject_word( slice: RopeSlice, range: Range, textobject: TextObject, - count: usize, + _count: usize, ) -> Range { - let this_word_start = this_word_start_pos(slice, range.head); - let this_word_end = this_word_end_pos(slice, range.head); + let pos = range.cursor(slice); + + let word_start = find_word_boundary(slice, pos, Direction::Backward); + let word_end = match slice.get_char(pos).map(categorize_char) { + None | Some(CharCategory::Whitespace | CharCategory::Eol) => pos, + _ => find_word_boundary(slice, pos + 1, Direction::Forward), + }; + + // Special case. + if word_start == word_end { + return Range::new(word_start, word_end); + } - let (anchor, head); match textobject { - TextObject::Inside => { - anchor = this_word_start; - head = this_word_end; - } - TextObject::Around => { - if slice - .get_char(this_word_end + 1) - .map_or(true, char_is_line_ending) + TextObject::Inside => Range::new(word_start, word_end), + TextObject::Around => Range::new( + match slice + .get_char(word_start.saturating_sub(1)) + .map(categorize_char) { - head = this_word_end; - if slice - .get_char(this_word_start.saturating_sub(1)) - .map_or(true, char_is_line_ending) - { - // single word on a line - anchor = this_word_start; - } else { - // last word on a line, select the whitespace before it too - anchor = movement::move_prev_word_end(slice, range, count).head; - } - } else if char_is_whitespace(slice.char(range.head)) { - // select whole whitespace and next word - head = movement::move_next_word_end(slice, range, count).head; - anchor = movement::backwards_skip_while(slice, range.head, |c| c.is_whitespace()) - .map(|p| p + 1) // p is first *non* whitespace char, so +1 to get whitespace pos - .unwrap_or(0); - } else { - head = movement::move_next_word_start(slice, range, count).head; - anchor = this_word_start; - } - } - }; - Range::new(anchor, head) + None | Some(CharCategory::Eol) => word_start, + _ => prev_grapheme_boundary(slice, word_start), + }, + match slice.get_char(word_end).map(categorize_char) { + None | Some(CharCategory::Eol) => word_end, + _ => next_grapheme_boundary(slice, word_end), + }, + ), + } } pub fn textobject_surround( @@ -106,7 +98,10 @@ pub fn textobject_surround( ) -> Range { surround::find_nth_pairs_pos(slice, ch, range.head, count) .map(|(anchor, head)| match textobject { - TextObject::Inside => Range::new(anchor + 1, head.saturating_sub(1)), + TextObject::Inside => Range::new( + next_grapheme_boundary(slice, anchor), + prev_grapheme_boundary(slice, head), + ), TextObject::Around => Range::new(anchor, head), }) .unwrap_or(range) @@ -126,70 +121,70 @@ mod test { let tests = &[ ( "cursor at beginning of doc", - vec![(0, Inside, (0, 5)), (0, Around, (0, 6))], + vec![(0, Inside, (0, 6)), (0, Around, (0, 7))], ), ( "cursor at middle of word", vec![ - (13, Inside, (10, 15)), - (10, Inside, (10, 15)), - (15, Inside, (10, 15)), - (13, Around, (10, 16)), - (10, Around, (10, 16)), - (15, Around, (10, 16)), + (13, Inside, (10, 16)), + (10, Inside, (10, 16)), + (15, Inside, (10, 16)), + (13, Around, (9, 17)), + (10, Around, (9, 17)), + (15, Around, (9, 17)), ], ), ( "cursor between word whitespace", - vec![(6, Inside, (6, 6)), (6, Around, (6, 13))], + vec![(6, Inside, (6, 6)), (6, Around, (6, 6))], ), ( "cursor on word before newline\n", vec![ - (22, Inside, (22, 28)), - (28, Inside, (22, 28)), - (25, Inside, (22, 28)), - (22, Around, (21, 28)), - (28, Around, (21, 28)), - (25, Around, (21, 28)), + (22, Inside, (22, 29)), + (28, Inside, (22, 29)), + (25, Inside, (22, 29)), + (22, Around, (21, 29)), + (28, Around, (21, 29)), + (25, Around, (21, 29)), ], ), ( "cursor on newline\nnext line", - vec![(17, Inside, (17, 17)), (17, Around, (17, 22))], + vec![(17, Inside, (17, 17)), (17, Around, (17, 17))], ), ( "cursor on word after newline\nnext line", vec![ - (29, Inside, (29, 32)), - (30, Inside, (29, 32)), - (32, Inside, (29, 32)), - (29, Around, (29, 33)), - (30, Around, (29, 33)), - (32, Around, (29, 33)), + (29, Inside, (29, 33)), + (30, Inside, (29, 33)), + (32, Inside, (29, 33)), + (29, Around, (29, 34)), + (30, Around, (29, 34)), + (32, Around, (29, 34)), ], ), ( "cursor on #$%:;* punctuation", vec![ - (13, Inside, (10, 15)), - (10, Inside, (10, 15)), - (15, Inside, (10, 15)), - (13, Around, (10, 16)), - (10, Around, (10, 16)), - (15, Around, (10, 16)), + (13, Inside, (10, 16)), + (10, Inside, (10, 16)), + (15, Inside, (10, 16)), + (13, Around, (9, 17)), + (10, Around, (9, 17)), + (15, Around, (9, 17)), ], ), ( "cursor on punc%^#$:;.tuation", vec![ - (14, Inside, (14, 20)), - (20, Inside, (14, 20)), - (17, Inside, (14, 20)), - (14, Around, (14, 20)), + (14, Inside, (14, 21)), + (20, Inside, (14, 21)), + (17, Inside, (14, 21)), + (14, Around, (13, 22)), // FIXME: edge case // (20, Around, (14, 20)), - (17, Around, (14, 20)), + (17, Around, (13, 22)), ], ), ( @@ -198,14 +193,14 @@ mod test { (9, Inside, (9, 9)), (10, Inside, (10, 10)), (11, Inside, (11, 11)), - (9, Around, (9, 16)), - (10, Around, (9, 16)), - (11, Around, (9, 16)), + (9, Around, (9, 9)), + (10, Around, (10, 10)), + (11, Around, (11, 11)), ], ), ( "cursor at end of doc", - vec![(19, Inside, (17, 19)), (19, Around, (16, 19))], + vec![(19, Inside, (17, 20)), (19, Around, (16, 20))], ), ]; @@ -234,67 +229,67 @@ mod test { "simple (single) surround pairs", vec![ (3, Inside, (3, 3), '(', 1), - (7, Inside, (8, 13), ')', 1), - (10, Inside, (8, 13), '(', 1), - (14, Inside, (8, 13), ')', 1), + (7, Inside, (8, 14), ')', 1), + (10, Inside, (8, 14), '(', 1), + (14, Inside, (8, 14), ')', 1), (3, Around, (3, 3), '(', 1), - (7, Around, (7, 14), ')', 1), - (10, Around, (7, 14), '(', 1), - (14, Around, (7, 14), ')', 1), + (7, Around, (7, 15), ')', 1), + (10, Around, (7, 15), '(', 1), + (14, Around, (7, 15), ')', 1), ], ), ( "samexx 'single' surround pairs", vec![ (3, Inside, (3, 3), '\'', 1), - (7, Inside, (8, 13), '\'', 1), - (10, Inside, (8, 13), '\'', 1), - (14, Inside, (8, 13), '\'', 1), + (7, Inside, (7, 7), '\'', 1), + (10, Inside, (8, 14), '\'', 1), + (14, Inside, (14, 14), '\'', 1), (3, Around, (3, 3), '\'', 1), - (7, Around, (7, 14), '\'', 1), - (10, Around, (7, 14), '\'', 1), - (14, Around, (7, 14), '\'', 1), + (7, Around, (7, 7), '\'', 1), + (10, Around, (7, 15), '\'', 1), + (14, Around, (14, 14), '\'', 1), ], ), ( "(nested (surround (pairs)) 3 levels)", vec![ - (0, Inside, (1, 34), '(', 1), - (6, Inside, (1, 34), ')', 1), - (8, Inside, (9, 24), '(', 1), - (8, Inside, (9, 34), ')', 2), - (20, Inside, (9, 24), '(', 2), - (20, Inside, (1, 34), ')', 3), - (0, Around, (0, 35), '(', 1), - (6, Around, (0, 35), ')', 1), - (8, Around, (8, 25), '(', 1), - (8, Around, (8, 35), ')', 2), - (20, Around, (8, 25), '(', 2), - (20, Around, (0, 35), ')', 3), + (0, Inside, (1, 35), '(', 1), + (6, Inside, (1, 35), ')', 1), + (8, Inside, (9, 25), '(', 1), + (8, Inside, (9, 35), ')', 2), + (20, Inside, (9, 25), '(', 2), + (20, Inside, (1, 35), ')', 3), + (0, Around, (0, 36), '(', 1), + (6, Around, (0, 36), ')', 1), + (8, Around, (8, 26), '(', 1), + (8, Around, (8, 36), ')', 2), + (20, Around, (8, 26), '(', 2), + (20, Around, (0, 36), ')', 3), ], ), ( "(mixed {surround [pair] same} line)", vec![ - (2, Inside, (1, 33), '(', 1), - (9, Inside, (8, 27), '{', 1), - (18, Inside, (18, 21), '[', 1), - (2, Around, (0, 34), '(', 1), - (9, Around, (7, 28), '{', 1), - (18, Around, (17, 22), '[', 1), + (2, Inside, (1, 34), '(', 1), + (9, Inside, (8, 28), '{', 1), + (18, Inside, (18, 22), '[', 1), + (2, Around, (0, 35), '(', 1), + (9, Around, (7, 29), '{', 1), + (18, Around, (17, 23), '[', 1), ], ), ( "(stepped (surround) pairs (should) skip)", - vec![(22, Inside, (1, 38), '(', 1), (22, Around, (0, 39), '(', 1)], + vec![(22, Inside, (1, 39), '(', 1), (22, Around, (0, 40), '(', 1)], ), ( "[surround pairs{\non different]\nlines}", vec![ - (7, Inside, (1, 28), '[', 1), - (15, Inside, (16, 35), '{', 1), - (7, Around, (0, 29), '[', 1), - (15, Around, (15, 36), '{', 1), + (7, Inside, (1, 29), '[', 1), + (15, Inside, (16, 36), '{', 1), + (7, Around, (0, 30), '[', 1), + (15, Around, (15, 37), '{', 1), ], ), ]; |