4 files changed, 424 insertions, 5 deletions
diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs
index c2bb8c55..3684a93e 100644
--- a/helix-core/src/lib.rs
+++ b/helix-core/src/lib.rs
@@ -18,6 +18,7 @@ pub mod selection;
 mod state;
 pub mod surround;
 pub mod syntax;
+pub mod textobject;
 mod transaction;
 
 pub mod unicode {
diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs
index acc95e7e..f9e5deb4 100644
--- a/helix-core/src/movement.rs
+++ b/helix-core/src/movement.rs
@@ -113,6 +113,10 @@ pub fn move_prev_long_word_start(slice: RopeSlice, range: Range, count: usize) -
     word_move(slice, range, count, WordMotionTarget::PrevLongWordStart)
 }
 
+pub fn move_prev_word_end(slice: RopeSlice, range: Range, count: usize) -> Range {
+    word_move(slice, range, count, WordMotionTarget::PrevWordEnd)
+}
+
 fn word_move(slice: RopeSlice, range: Range, count: usize, target: WordMotionTarget) -> Range {
     (0..count).fold(range, |range, _| {
         slice.chars_at(range.head).range_to_target(target, range)
@@ -159,6 +163,7 @@ pub enum WordMotionTarget {
     NextWordStart,
     NextWordEnd,
     PrevWordStart,
+    PrevWordEnd,
     // A "Long word" (also known as a WORD in vim/kakoune) is strictly
     // delimited by whitespace, and can consist of punctuation as well
     // as alphanumerics.
@@ -181,7 +186,9 @@ impl CharHelpers for Chars<'_> {
     fn range_to_target(&mut self, target: WordMotionTarget, origin: Range) -> Range {
         // Characters are iterated forward or backwards depending on the motion direction.
         let characters: Box<dyn Iterator<Item = char>> = match target {
-            WordMotionTarget::PrevWordStart | WordMotionTarget::PrevLongWordStart => {
+            WordMotionTarget::PrevWordStart
+            | WordMotionTarget::PrevLongWordStart
+            | WordMotionTarget::PrevWordEnd => {
                 self.next();
                 Box::new(from_fn(|| self.prev()))
             }
@@ -190,9 +197,9 @@ impl CharHelpers for Chars<'_> {
 
         // Index advancement also depends on the direction.
         let advance: &dyn Fn(&mut usize) = match target {
-            WordMotionTarget::PrevWordStart | WordMotionTarget::PrevLongWordStart => {
-                &|u| *u = u.saturating_sub(1)
-            }
+            WordMotionTarget::PrevWordStart
+            | WordMotionTarget::PrevLongWordStart
+            | WordMotionTarget::PrevWordEnd => &|u| *u = u.saturating_sub(1),
             _ => &|u| *u += 1,
         };
 
@@ -265,7 +272,7 @@ fn reached_target(target: WordMotionTarget, peek: char, next_peek: Option<&char>
     };
 
     match target {
-        WordMotionTarget::NextWordStart => {
+        WordMotionTarget::NextWordStart | WordMotionTarget::PrevWordEnd => {
             is_word_boundary(peek, *next_peek)
                 && (char_is_line_ending(*next_peek) || !next_peek.is_whitespace())
         }
@@ -914,6 +921,88 @@ mod test {
     }
 
     #[test]
+    fn test_behaviour_when_moving_to_end_of_previous_words() {
+        let tests = array::IntoIter::new([
+            ("Basic backward motion from the middle of a word",
+                vec![(1, Range::new(9, 9), Range::new(9, 5))]),
+            ("Starting from after boundary retreats the anchor",
+                vec![(1, Range::new(0, 13), Range::new(12, 8))]),
+            ("Jump     to end of a word succeeded by whitespace",
+                vec![(1, Range::new(10, 10), Range::new(10, 4))]),
+            ("    Jump to start of line from end of word preceded by whitespace",
+                vec![(1, Range::new(7, 7), Range::new(7, 0))]),
+            ("Previous anchor is irrelevant for backward motions",
+                vec![(1, Range::new(26, 12), Range::new(12, 8))]),
+            ("    Starting from whitespace moves to first space in sequence",
+                vec![(1, Range::new(0, 3), Range::new(3, 0))]),
+            ("Test identifiers_with_underscores are considered a single word",
+                vec![(1, Range::new(0, 25), Range::new(25, 4))]),
+            ("Jumping\n    \nback through a newline selects whitespace",
+                vec![(1, Range::new(0, 13), Range::new(11, 8))]),
+            ("Jumping to start of word from the end selects the whole word",
+                vec![(1, Range::new(15, 15), Range::new(15, 10))]),
+            ("alphanumeric.!,and.?=punctuation are considered 'words' for the purposes of word motion",
+                vec![
+                    (1, Range::new(30, 30), Range::new(30, 21)),
+                    (1, Range::new(30, 21), Range::new(20, 18)),
+                    (1, Range::new(20, 18), Range::new(17, 15))
+                ]),
+
+            ("...   ... punctuation and spaces behave as expected",
+                vec![
+                    (1, Range::new(0, 10), Range::new(9, 9)),
+                    (1, Range::new(9, 6), Range::new(5, 3)),
+                ]),
+            (".._.._ punctuation is not joined by underscores into a single block",
+                vec![(1, Range::new(0, 5), Range::new(4, 3))]),
+            ("Newlines\n\nare bridged seamlessly.",
+                vec![
+                    (1, Range::new(0, 10), Range::new(7, 0)),
+                ]),
+            ("Jumping    \n\n\n\n\nback from within a newline group selects previous block",
+                vec![
+                    (1, Range::new(0, 13), Range::new(10, 7)),
+                ]),
+            ("Failed motions do not modify the range",
+                vec![
+                    (0, Range::new(3, 0), Range::new(3, 0)),
+                ]),
+            ("Multiple motions at once resolve correctly",
+                vec![
+                    (3, Range::new(23, 23), Range::new(15, 8)),
+                ]),
+            ("Excessive motions are performed partially",
+                vec![
+                    (999, Range::new(40, 40), Range::new(8, 0)),
+                ]),
+            ("", // Edge case of moving backwards in empty string
+                vec![
+                    (1, Range::new(0, 0), Range::new(0, 0)),
+                ]),
+            ("\n\n\n\n\n", // Edge case of moving backwards in all newlines
+                vec![
+                    (1, Range::new(0, 0), Range::new(0, 0)),
+                ]),
+            ("   \n   \nJumping back through alternated space blocks and newlines selects the space blocks",
+                vec![
+                    (1, Range::new(0, 7), Range::new(6, 4)),
+                    (1, Range::new(6, 4), Range::new(2, 0)),
+                ]),
+            ("Test ヒーリクス multibyte characters behave as normal characters",
+                vec![
+                    (1, Range::new(0, 9), Range::new(9, 4)),
+                ]),
+        ]);
+
+        for (sample, scenario) in tests {
+            for (count, begin, expected_end) in scenario.into_iter() {
+                let range = move_prev_word_end(Rope::from(sample).slice(..), begin, count);
+                assert_eq!(range, expected_end, "Case failed: [{}]", sample);
+            }
+        }
+    }
+
+    #[test]
     fn test_behaviour_when_moving_to_end_of_next_long_words() {
         let tests = array::IntoIter::new([
             ("Basic forward motion from the start of a word to the end of it",
diff --git a/helix-core/src/selection.rs b/helix-core/src/selection.rs
index d99e2aff..63b9b557 100644
--- a/helix-core/src/selection.rs
+++ b/helix-core/src/selection.rs
@@ -130,6 +130,16 @@ impl Range {
     }
 }
 
+impl From<(usize, usize)> for Range {
+    fn from(tuple: (usize, usize)) -> Self {
+        Self {
+            anchor: tuple.0,
+            head: tuple.1,
+            horiz: None,
+        }
+    }
+}
+
 /// A selection consists of one or more selection ranges.
 /// invariant: A selection can never be empty (always contains at least primary range).
 #[derive(Debug, Clone, PartialEq, Eq)]
diff --git a/helix-core/src/textobject.rs b/helix-core/src/textobject.rs
new file mode 100644
index 00000000..d29eb03c
--- /dev/null
+++ b/helix-core/src/textobject.rs
@@ -0,0 +1,319 @@
+use ropey::RopeSlice;
+
+use crate::chars::{categorize_char, char_is_line_ending, char_is_whitespace, CharCategory};
+use crate::movement::{self, Direction};
+use crate::surround;
+use crate::Range;
+
+fn this_word_end_pos(slice: RopeSlice, pos: usize) -> usize {
+    this_word_bound_pos(slice, pos, Direction::Forward)
+}
+
+fn this_word_start_pos(slice: RopeSlice, pos: usize) -> usize {
+    this_word_bound_pos(slice, pos, Direction::Backward)
+}
+
+fn this_word_bound_pos(slice: RopeSlice, mut pos: usize, direction: Direction) -> usize {
+    let iter = match direction {
+        Direction::Forward => slice.chars_at(pos + 1),
+        Direction::Backward => {
+            let mut iter = slice.chars_at(pos);
+            iter.reverse();
+            iter
+        }
+    };
+
+    match categorize_char(slice.char(pos)) {
+        CharCategory::Eol | CharCategory::Whitespace => pos,
+        category => {
+            for peek in iter {
+                let curr_category = categorize_char(peek);
+                if curr_category != category
+                    || curr_category == CharCategory::Eol
+                    || curr_category == CharCategory::Whitespace
+                {
+                    return pos;
+                }
+                pos = match direction {
+                    Direction::Forward => pos + 1,
+                    Direction::Backward => pos.saturating_sub(1),
+                }
+            }
+            pos
+        }
+    }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum TextObject {
+    Around,
+    Inside,
+}
+
+// count doesn't do anything yet
+pub fn textobject_word(
+    slice: RopeSlice,
+    range: Range,
+    textobject: TextObject,
+    count: usize,
+) -> Range {
+    let this_word_start = this_word_start_pos(slice, range.head);
+    let this_word_end = this_word_end_pos(slice, range.head);
+
+    let (anchor, head);
+    match textobject {
+        TextObject::Inside => {
+            anchor = this_word_start;
+            head = this_word_end;
+        }
+        TextObject::Around => {
+            if slice
+                .get_char(this_word_end + 1)
+                .map_or(true, char_is_line_ending)
+            {
+                head = this_word_end;
+                if slice
+                    .get_char(this_word_start.saturating_sub(1))
+                    .map_or(true, char_is_line_ending)
+                {
+                    // single word on a line
+                    anchor = this_word_start;
+                } else {
+                    // last word on a line, select the whitespace before it too
+                    anchor = movement::move_prev_word_end(slice, range, count).head;
+                }
+            } else if char_is_whitespace(slice.char(range.head)) {
+                // select whole whitespace and next word
+                head = movement::move_next_word_end(slice, range, count).head;
+                anchor = movement::backwards_skip_while(slice, range.head, |c| c.is_whitespace())
+                    .map(|p| p + 1) // p is first *non* whitespace char, so +1 to get whitespace pos
+                    .unwrap_or(0);
+            } else {
+                head = movement::move_next_word_start(slice, range, count).head;
+                anchor = this_word_start;
+            }
+        }
+    };
+    Range::new(anchor, head)
+}
+
+pub fn textobject_surround(
+    slice: RopeSlice,
+    range: Range,
+    textobject: TextObject,
+    ch: char,
+    count: usize,
+) -> Range {
+    surround::find_nth_pairs_pos(slice, ch, range.head, count)
+        .map(|(anchor, head)| match textobject {
+            TextObject::Inside => Range::new(anchor + 1, head.saturating_sub(1)),
+            TextObject::Around => Range::new(anchor, head),
+        })
+        .unwrap_or(range)
+}
+
+#[cfg(test)]
+mod test {
+    use super::TextObject::*;
+    use super::*;
+
+    use crate::Range;
+    use ropey::Rope;
+
+    #[test]
+    fn test_textobject_word() {
+        // (text, [(cursor position, textobject, final range), ...])
+        let tests = &[
+            (
+                "cursor at beginning of doc",
+                vec![(0, Inside, (0, 5)), (0, Around, (0, 6))],
+            ),
+            (
+                "cursor at middle of word",
+                vec![
+                    (13, Inside, (10, 15)),
+                    (10, Inside, (10, 15)),
+                    (15, Inside, (10, 15)),
+                    (13, Around, (10, 16)),
+                    (10, Around, (10, 16)),
+                    (15, Around, (10, 16)),
+                ],
+            ),
+            (
+                "cursor between word whitespace",
+                vec![(6, Inside, (6, 6)), (6, Around, (6, 13))],
+            ),
+            (
+                "cursor on word before newline\n",
+                vec![
+                    (22, Inside, (22, 28)),
+                    (28, Inside, (22, 28)),
+                    (25, Inside, (22, 28)),
+                    (22, Around, (21, 28)),
+                    (28, Around, (21, 28)),
+                    (25, Around, (21, 28)),
+                ],
+            ),
+            (
+                "cursor on newline\nnext line",
+                vec![(17, Inside, (17, 17)), (17, Around, (17, 22))],
+            ),
+            (
+                "cursor on word after newline\nnext line",
+                vec![
+                    (29, Inside, (29, 32)),
+                    (30, Inside, (29, 32)),
+                    (32, Inside, (29, 32)),
+                    (29, Around, (29, 33)),
+                    (30, Around, (29, 33)),
+                    (32, Around, (29, 33)),
+                ],
+            ),
+            (
+                "cursor on #$%:;* punctuation",
+                vec![
+                    (13, Inside, (10, 15)),
+                    (10, Inside, (10, 15)),
+                    (15, Inside, (10, 15)),
+                    (13, Around, (10, 16)),
+                    (10, Around, (10, 16)),
+                    (15, Around, (10, 16)),
+                ],
+            ),
+            (
+                "cursor on punc%^#$:;.tuation",
+                vec![
+                    (14, Inside, (14, 20)),
+                    (20, Inside, (14, 20)),
+                    (17, Inside, (14, 20)),
+                    (14, Around, (14, 20)),
+                    // FIXME: edge case
+                    // (20, Around, (14, 20)),
+                    (17, Around, (14, 20)),
+                ],
+            ),
+            (
+                "cursor in   extra whitespace",
+                vec![
+                    (9, Inside, (9, 9)),
+                    (10, Inside, (10, 10)),
+                    (11, Inside, (11, 11)),
+                    (9, Around, (9, 16)),
+                    (10, Around, (9, 16)),
+                    (11, Around, (9, 16)),
+                ],
+            ),
+            (
+                "cursor at end of doc",
+                vec![(19, Inside, (17, 19)), (19, Around, (16, 19))],
+            ),
+        ];
+
+        for (sample, scenario) in tests {
+            let doc = Rope::from(*sample);
+            let slice = doc.slice(..);
+            for &case in scenario {
+                let (pos, objtype, expected_range) = case;
+                let result = textobject_word(slice, Range::point(pos), objtype, 1);
+                assert_eq!(
+                    result,
+                    expected_range.into(),
+                    "\nCase failed: {:?} - {:?}",
+                    sample,
+                    case
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_textobject_surround() {
+        // (text, [(cursor position, textobject, final range, count), ...])
+        let tests = &[
+            (
+                "simple (single) surround pairs",
+                vec![
+                    (3, Inside, (3, 3), '(', 1),
+                    (7, Inside, (8, 13), ')', 1),
+                    (10, Inside, (8, 13), '(', 1),
+                    (14, Inside, (8, 13), ')', 1),
+                    (3, Around, (3, 3), '(', 1),
+                    (7, Around, (7, 14), ')', 1),
+                    (10, Around, (7, 14), '(', 1),
+                    (14, Around, (7, 14), ')', 1),
+                ],
+            ),
+            (
+                "samexx 'single' surround pairs",
+                vec![
+                    (3, Inside, (3, 3), '\'', 1),
+                    // FIXME: surround doesn't work when *on* same chars pair
+                    // (7, Inner, (8, 13), '\'', 1),
+                    (10, Inside, (8, 13), '\'', 1),
+                    // (14, Inner, (8, 13), '\'', 1),
+                    (3, Around, (3, 3), '\'', 1),
+                    // (7, Around, (7, 14), '\'', 1),
+                    (10, Around, (7, 14), '\'', 1),
+                    // (14, Around, (7, 14), '\'', 1),
+                ],
+            ),
+            (
+                "(nested (surround (pairs)) 3 levels)",
+                vec![
+                    (0, Inside, (1, 34), '(', 1),
+                    (6, Inside, (1, 34), ')', 1),
+                    (8, Inside, (9, 24), '(', 1),
+                    (8, Inside, (9, 34), ')', 2),
+                    (20, Inside, (9, 24), '(', 2),
+                    (20, Inside, (1, 34), ')', 3),
+                    (0, Around, (0, 35), '(', 1),
+                    (6, Around, (0, 35), ')', 1),
+                    (8, Around, (8, 25), '(', 1),
+                    (8, Around, (8, 35), ')', 2),
+                    (20, Around, (8, 25), '(', 2),
+                    (20, Around, (0, 35), ')', 3),
+                ],
+            ),
+            (
+                "(mixed {surround [pair] same} line)",
+                vec![
+                    (2, Inside, (1, 33), '(', 1),
+                    (9, Inside, (8, 27), '{', 1),
+                    (18, Inside, (18, 21), '[', 1),
+                    (2, Around, (0, 34), '(', 1),
+                    (9, Around, (7, 28), '{', 1),
+                    (18, Around, (17, 22), '[', 1),
+                ],
+            ),
+            (
+                "(stepped (surround) pairs (should) skip)",
+                vec![(22, Inside, (1, 38), '(', 1), (22, Around, (0, 39), '(', 1)],
+            ),
+            (
+                "[surround pairs{\non different]\nlines}",
+                vec![
+                    (7, Inside, (1, 28), '[', 1),
+                    (15, Inside, (16, 35), '{', 1),
+                    (7, Around, (0, 29), '[', 1),
+                    (15, Around, (15, 36), '{', 1),
+                ],
+            ),
+        ];
+
+        for (sample, scenario) in tests {
+            let doc = Rope::from(*sample);
+            let slice = doc.slice(..);
+            for &case in scenario {
+                let (pos, objtype, expected_range, ch, count) = case;
+                let result = textobject_surround(slice, Range::point(pos), objtype, ch, count);
+                assert_eq!(
+                    result,
+                    expected_range.into(),
+                    "\nCase failed: {:?} - {:?}",
+                    sample,
+                    case
+                );
+            }
+        }
+    }
+}