8 files changed, 513 insertions, 10 deletions
diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml
index 726e90cc..80d559a9 100644
--- a/helix-core/Cargo.toml
+++ b/helix-core/Cargo.toml
@@ -31,5 +31,10 @@ regex = "1"
 serde = { version = "1.0", features = ["derive"] }
 toml = "0.5"
 
+similar = "1.3"
+
 etcetera = "0.3"
 rust-embed = { version = "5.9.0", optional = true }
+
+[dev-dependencies]
+quickcheck = { version = "1", default-features = false }
diff --git a/helix-core/src/diff.rs b/helix-core/src/diff.rs
new file mode 100644
index 00000000..9c1fc999
--- /dev/null
+++ b/helix-core/src/diff.rs
@@ -0,0 +1,70 @@
+use ropey::Rope;
+
+use crate::{Change, Transaction};
+
+/// Compares `old` and `new` to generate a [`Transaction`] describing
+/// the steps required to get from `old` to `new`.
+pub fn compare_ropes(old: &Rope, new: &Rope) -> Transaction {
+    // `similar` only works on contiguous data, so a `Rope` has
+    // to be temporarily converted into a `String`.
+    let old_converted = old.to_string();
+    let new_converted = new.to_string();
+
+    // A timeout is set so after 1 seconds, the algorithm will start
+    // approximating. This is especially important for big `Rope`s or
+    // `Rope`s that are extremely dissimilar to each other.
+    //
+    // Note: Ignore the clippy warning, as the trait bounds of
+    // `Transaction::change()` require an iterator implementing
+    // `ExactIterator`.
+    let mut config = similar::TextDiff::configure();
+    config.timeout(std::time::Duration::from_secs(1));
+
+    let diff = config.diff_chars(&old_converted, &new_converted);
+
+    // The current position of the change needs to be tracked to
+    // construct the `Change`s.
+    let mut pos = 0;
+    let changes: Vec<Change> = diff
+        .ops()
+        .iter()
+        .map(|op| op.as_tag_tuple())
+        .filter_map(|(tag, old_range, new_range)| {
+            // `old_pos..pos` is equivalent to `start..end` for where
+            // the change should be applied.
+            let old_pos = pos;
+            pos += old_range.end - old_range.start;
+
+            match tag {
+                // Semantically, inserts and replacements are the same thing.
+                similar::DiffTag::Insert | similar::DiffTag::Replace => {
+                    // This is the text from the `new` rope that should be
+                    // inserted into `old`.
+                    let text: &str = {
+                        let start = new.char_to_byte(new_range.start);
+                        let end = new.char_to_byte(new_range.end);
+                        &new_converted[start..end]
+                    };
+                    Some((old_pos, pos, Some(text.into())))
+                }
+                similar::DiffTag::Delete => Some((old_pos, pos, None)),
+                similar::DiffTag::Equal => None,
+            }
+        })
+        .collect();
+    Transaction::change(old, changes.into_iter())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    quickcheck::quickcheck! {
+        fn test_compare_ropes(a: String, b: String) -> bool {
+            let mut old = Rope::from(a);
+            let new = Rope::from(b);
+            compare_ropes(&old, &new).apply(&mut old);
+            old.to_string() == new.to_string()
+        }
+    }
+}
diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs
index dfbbd748..3684a93e 100644
--- a/helix-core/src/lib.rs
+++ b/helix-core/src/lib.rs
@@ -2,6 +2,7 @@ pub mod auto_pairs;
 pub mod chars;
 pub mod comment;
 pub mod diagnostic;
+pub mod diff;
 pub mod graphemes;
 pub mod history;
 pub mod indent;
@@ -17,6 +18,7 @@ pub mod selection;
 mod state;
 pub mod surround;
 pub mod syntax;
+pub mod textobject;
 mod transaction;
 
 pub mod unicode {
diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs
index 62311ee4..bc56f9a4 100644
--- a/helix-core/src/movement.rs
+++ b/helix-core/src/movement.rs
@@ -176,6 +176,10 @@ pub fn move_prev_long_word_start(slice: RopeSlice, range: Range, count: usize) -
     word_move(slice, range, count, WordMotionTarget::PrevLongWordStart)
 }
 
+pub fn move_prev_word_end(slice: RopeSlice, range: Range, count: usize) -> Range {
+    word_move(slice, range, count, WordMotionTarget::PrevWordEnd)
+}
+
 fn word_move(slice: RopeSlice, range: Range, count: usize, target: WordMotionTarget) -> Range {
     (0..count).fold(range, |range, _| {
         slice.chars_at(range.head).range_to_target(target, range)
@@ -222,6 +226,7 @@ pub enum WordMotionTarget {
     NextWordStart,
     NextWordEnd,
     PrevWordStart,
+    PrevWordEnd,
     // A "Long word" (also known as a WORD in vim/kakoune) is strictly
     // delimited by whitespace, and can consist of punctuation as well
     // as alphanumerics.
@@ -244,7 +249,9 @@ impl CharHelpers for Chars<'_> {
     fn range_to_target(&mut self, target: WordMotionTarget, origin: Range) -> Range {
         // Characters are iterated forward or backwards depending on the motion direction.
         let characters: Box<dyn Iterator<Item = char>> = match target {
-            WordMotionTarget::PrevWordStart | WordMotionTarget::PrevLongWordStart => {
+            WordMotionTarget::PrevWordStart
+            | WordMotionTarget::PrevLongWordStart
+            | WordMotionTarget::PrevWordEnd => {
                 self.next();
                 Box::new(from_fn(|| self.prev()))
             }
@@ -253,9 +260,9 @@ impl CharHelpers for Chars<'_> {
 
         // Index advancement also depends on the direction.
         let advance: &dyn Fn(&mut usize) = match target {
-            WordMotionTarget::PrevWordStart | WordMotionTarget::PrevLongWordStart => {
-                &|u| *u = u.saturating_sub(1)
-            }
+            WordMotionTarget::PrevWordStart
+            | WordMotionTarget::PrevLongWordStart
+            | WordMotionTarget::PrevWordEnd => &|u| *u = u.saturating_sub(1),
             _ => &|u| *u += 1,
         };
 
@@ -328,7 +335,7 @@ fn reached_target(target: WordMotionTarget, peek: char, next_peek: Option<&char>
     };
 
     match target {
-        WordMotionTarget::NextWordStart => {
+        WordMotionTarget::NextWordStart | WordMotionTarget::PrevWordEnd => {
             is_word_boundary(peek, *next_peek)
                 && (char_is_line_ending(*next_peek) || !next_peek.is_whitespace())
         }
@@ -979,6 +986,88 @@ mod test {
     }
 
     #[test]
+    fn test_behaviour_when_moving_to_end_of_previous_words() {
+        let tests = array::IntoIter::new([
+            ("Basic backward motion from the middle of a word",
+                vec![(1, Range::new(9, 9), Range::new(9, 5))]),
+            ("Starting from after boundary retreats the anchor",
+                vec![(1, Range::new(0, 13), Range::new(12, 8))]),
+            ("Jump     to end of a word succeeded by whitespace",
+                vec![(1, Range::new(10, 10), Range::new(10, 4))]),
+            ("    Jump to start of line from end of word preceded by whitespace",
+                vec![(1, Range::new(7, 7), Range::new(7, 0))]),
+            ("Previous anchor is irrelevant for backward motions",
+                vec![(1, Range::new(26, 12), Range::new(12, 8))]),
+            ("    Starting from whitespace moves to first space in sequence",
+                vec![(1, Range::new(0, 3), Range::new(3, 0))]),
+            ("Test identifiers_with_underscores are considered a single word",
+                vec![(1, Range::new(0, 25), Range::new(25, 4))]),
+            ("Jumping\n    \nback through a newline selects whitespace",
+                vec![(1, Range::new(0, 13), Range::new(11, 8))]),
+            ("Jumping to start of word from the end selects the whole word",
+                vec![(1, Range::new(15, 15), Range::new(15, 10))]),
+            ("alphanumeric.!,and.?=punctuation are considered 'words' for the purposes of word motion",
+                vec![
+                    (1, Range::new(30, 30), Range::new(30, 21)),
+                    (1, Range::new(30, 21), Range::new(20, 18)),
+                    (1, Range::new(20, 18), Range::new(17, 15))
+                ]),
+
+            ("...   ... punctuation and spaces behave as expected",
+                vec![
+                    (1, Range::new(0, 10), Range::new(9, 9)),
+                    (1, Range::new(9, 6), Range::new(5, 3)),
+                ]),
+            (".._.._ punctuation is not joined by underscores into a single block",
+                vec![(1, Range::new(0, 5), Range::new(4, 3))]),
+            ("Newlines\n\nare bridged seamlessly.",
+                vec![
+                    (1, Range::new(0, 10), Range::new(7, 0)),
+                ]),
+            ("Jumping    \n\n\n\n\nback from within a newline group selects previous block",
+                vec![
+                    (1, Range::new(0, 13), Range::new(10, 7)),
+                ]),
+            ("Failed motions do not modify the range",
+                vec![
+                    (0, Range::new(3, 0), Range::new(3, 0)),
+                ]),
+            ("Multiple motions at once resolve correctly",
+                vec![
+                    (3, Range::new(23, 23), Range::new(15, 8)),
+                ]),
+            ("Excessive motions are performed partially",
+                vec![
+                    (999, Range::new(40, 40), Range::new(8, 0)),
+                ]),
+            ("", // Edge case of moving backwards in empty string
+                vec![
+                    (1, Range::new(0, 0), Range::new(0, 0)),
+                ]),
+            ("\n\n\n\n\n", // Edge case of moving backwards in all newlines
+                vec![
+                    (1, Range::new(0, 0), Range::new(0, 0)),
+                ]),
+            ("   \n   \nJumping back through alternated space blocks and newlines selects the space blocks",
+                vec![
+                    (1, Range::new(0, 7), Range::new(6, 4)),
+                    (1, Range::new(6, 4), Range::new(2, 0)),
+                ]),
+            ("Test ヒーリクス multibyte characters behave as normal characters",
+                vec![
+                    (1, Range::new(0, 9), Range::new(9, 4)),
+                ]),
+        ]);
+
+        for (sample, scenario) in tests {
+            for (count, begin, expected_end) in scenario.into_iter() {
+                let range = move_prev_word_end(Rope::from(sample).slice(..), begin, count);
+                assert_eq!(range, expected_end, "Case failed: [{}]", sample);
+            }
+        }
+    }
+
+    #[test]
     fn test_behaviour_when_moving_to_end_of_next_long_words() {
         let tests = array::IntoIter::new([
             ("Basic forward motion from the start of a word to the end of it",
diff --git a/helix-core/src/selection.rs b/helix-core/src/selection.rs
index 6ca798a6..64ff51d8 100644
--- a/helix-core/src/selection.rs
+++ b/helix-core/src/selection.rs
@@ -216,6 +216,16 @@ impl Range {
     }
 }
 
+impl From<(usize, usize)> for Range {
+    fn from(tuple: (usize, usize)) -> Self {
+        Self {
+            anchor: tuple.0,
+            head: tuple.1,
+            horiz: None,
+        }
+    }
+}
+
 /// A selection consists of one or more selection ranges.
 /// invariant: A selection can never be empty (always contains at least primary range).
 #[derive(Debug, Clone, PartialEq, Eq)]
diff --git a/helix-core/src/surround.rs b/helix-core/src/surround.rs
index 61981d6e..52f60cab 100644
--- a/helix-core/src/surround.rs
+++ b/helix-core/src/surround.rs
@@ -41,11 +41,14 @@ pub fn find_nth_pairs_pos(
     let (open, close) = get_pair(ch);
 
     let (open_pos, close_pos) = if open == close {
-        // find_nth* do not consider current character; +1/-1 to include them
-        (
-            search::find_nth_prev(text, open, pos + 1, n, true)?,
-            search::find_nth_next(text, close, pos - 1, n, true)?,
-        )
+        let prev = search::find_nth_prev(text, open, pos, n, true);
+        let next = search::find_nth_next(text, close, pos, n, true);
+        if text.char(pos) == open {
+            // cursor is *on* a pair
+            next.map(|n| (pos, n)).or_else(|| prev.map(|p| (p, pos)))?
+        } else {
+            (prev?, next?)
+        }
     } else {
         (
             find_nth_open_pair(text, open, close, pos, n)?,
@@ -198,6 +201,11 @@ mod test {
         assert_eq!(find_nth_pairs_pos(slice, '\'', 13, 1), Some((10, 15)));
         assert_eq!(find_nth_pairs_pos(slice, '\'', 13, 2), Some((4, 21)));
         assert_eq!(find_nth_pairs_pos(slice, '\'', 13, 3), Some((0, 27)));
+        // cursor on the quotes
+        assert_eq!(find_nth_pairs_pos(slice, '\'', 10, 1), Some((10, 15)));
+        // this is the best we can do since opening and closing pairs are same
+        assert_eq!(find_nth_pairs_pos(slice, '\'', 0, 1), Some((0, 4)));
+        assert_eq!(find_nth_pairs_pos(slice, '\'', 27, 1), Some((21, 27)));
     }
 
     #[test]
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index d4379a8e..84a5f9bd 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -94,6 +94,7 @@ fn load_runtime_file(language: &str, filename: &str) -> Result<String, std::io::
 #[cfg(feature = "embed_runtime")]
 fn load_runtime_file(language: &str, filename: &str) -> Result<String, Box<dyn std::error::Error>> {
     use std::fmt;
+    use std::path::PathBuf;
 
     #[derive(rust_embed::RustEmbed)]
     #[folder = "../runtime/"]
diff --git a/helix-core/src/textobject.rs b/helix-core/src/textobject.rs
new file mode 100644
index 00000000..fbf66256
--- /dev/null
+++ b/helix-core/src/textobject.rs
@@ -0,0 +1,318 @@
+use ropey::RopeSlice;
+
+use crate::chars::{categorize_char, char_is_line_ending, char_is_whitespace, CharCategory};
+use crate::movement::{self, Direction};
+use crate::surround;
+use crate::Range;
+
+fn this_word_end_pos(slice: RopeSlice, pos: usize) -> usize {
+    this_word_bound_pos(slice, pos, Direction::Forward)
+}
+
+fn this_word_start_pos(slice: RopeSlice, pos: usize) -> usize {
+    this_word_bound_pos(slice, pos, Direction::Backward)
+}
+
+fn this_word_bound_pos(slice: RopeSlice, mut pos: usize, direction: Direction) -> usize {
+    let iter = match direction {
+        Direction::Forward => slice.chars_at(pos + 1),
+        Direction::Backward => {
+            let mut iter = slice.chars_at(pos);
+            iter.reverse();
+            iter
+        }
+    };
+
+    match categorize_char(slice.char(pos)) {
+        CharCategory::Eol | CharCategory::Whitespace => pos,
+        category => {
+            for peek in iter {
+                let curr_category = categorize_char(peek);
+                if curr_category != category
+                    || curr_category == CharCategory::Eol
+                    || curr_category == CharCategory::Whitespace
+                {
+                    return pos;
+                }
+                pos = match direction {
+                    Direction::Forward => pos + 1,
+                    Direction::Backward => pos.saturating_sub(1),
+                }
+            }
+            pos
+        }
+    }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum TextObject {
+    Around,
+    Inside,
+}
+
+// count doesn't do anything yet
+pub fn textobject_word(
+    slice: RopeSlice,
+    range: Range,
+    textobject: TextObject,
+    count: usize,
+) -> Range {
+    let this_word_start = this_word_start_pos(slice, range.head);
+    let this_word_end = this_word_end_pos(slice, range.head);
+
+    let (anchor, head);
+    match textobject {
+        TextObject::Inside => {
+            anchor = this_word_start;
+            head = this_word_end;
+        }
+        TextObject::Around => {
+            if slice
+                .get_char(this_word_end + 1)
+                .map_or(true, char_is_line_ending)
+            {
+                head = this_word_end;
+                if slice
+                    .get_char(this_word_start.saturating_sub(1))
+                    .map_or(true, char_is_line_ending)
+                {
+                    // single word on a line
+                    anchor = this_word_start;
+                } else {
+                    // last word on a line, select the whitespace before it too
+                    anchor = movement::move_prev_word_end(slice, range, count).head;
+                }
+            } else if char_is_whitespace(slice.char(range.head)) {
+                // select whole whitespace and next word
+                head = movement::move_next_word_end(slice, range, count).head;
+                anchor = movement::backwards_skip_while(slice, range.head, |c| c.is_whitespace())
+                    .map(|p| p + 1) // p is first *non* whitespace char, so +1 to get whitespace pos
+                    .unwrap_or(0);
+            } else {
+                head = movement::move_next_word_start(slice, range, count).head;
+                anchor = this_word_start;
+            }
+        }
+    };
+    Range::new(anchor, head)
+}
+
+pub fn textobject_surround(
+    slice: RopeSlice,
+    range: Range,
+    textobject: TextObject,
+    ch: char,
+    count: usize,
+) -> Range {
+    surround::find_nth_pairs_pos(slice, ch, range.head, count)
+        .map(|(anchor, head)| match textobject {
+            TextObject::Inside => Range::new(anchor + 1, head.saturating_sub(1)),
+            TextObject::Around => Range::new(anchor, head),
+        })
+        .unwrap_or(range)
+}
+
+#[cfg(test)]
+mod test {
+    use super::TextObject::*;
+    use super::*;
+
+    use crate::Range;
+    use ropey::Rope;
+
+    #[test]
+    fn test_textobject_word() {
+        // (text, [(cursor position, textobject, final range), ...])
+        let tests = &[
+            (
+                "cursor at beginning of doc",
+                vec![(0, Inside, (0, 5)), (0, Around, (0, 6))],
+            ),
+            (
+                "cursor at middle of word",
+                vec![
+                    (13, Inside, (10, 15)),
+                    (10, Inside, (10, 15)),
+                    (15, Inside, (10, 15)),
+                    (13, Around, (10, 16)),
+                    (10, Around, (10, 16)),
+                    (15, Around, (10, 16)),
+                ],
+            ),
+            (
+                "cursor between word whitespace",
+                vec![(6, Inside, (6, 6)), (6, Around, (6, 13))],
+            ),
+            (
+                "cursor on word before newline\n",
+                vec![
+                    (22, Inside, (22, 28)),
+                    (28, Inside, (22, 28)),
+                    (25, Inside, (22, 28)),
+                    (22, Around, (21, 28)),
+                    (28, Around, (21, 28)),
+                    (25, Around, (21, 28)),
+                ],
+            ),
+            (
+                "cursor on newline\nnext line",
+                vec![(17, Inside, (17, 17)), (17, Around, (17, 22))],
+            ),
+            (
+                "cursor on word after newline\nnext line",
+                vec![
+                    (29, Inside, (29, 32)),
+                    (30, Inside, (29, 32)),
+                    (32, Inside, (29, 32)),
+                    (29, Around, (29, 33)),
+                    (30, Around, (29, 33)),
+                    (32, Around, (29, 33)),
+                ],
+            ),
+            (
+                "cursor on #$%:;* punctuation",
+                vec![
+                    (13, Inside, (10, 15)),
+                    (10, Inside, (10, 15)),
+                    (15, Inside, (10, 15)),
+                    (13, Around, (10, 16)),
+                    (10, Around, (10, 16)),
+                    (15, Around, (10, 16)),
+                ],
+            ),
+            (
+                "cursor on punc%^#$:;.tuation",
+                vec![
+                    (14, Inside, (14, 20)),
+                    (20, Inside, (14, 20)),
+                    (17, Inside, (14, 20)),
+                    (14, Around, (14, 20)),
+                    // FIXME: edge case
+                    // (20, Around, (14, 20)),
+                    (17, Around, (14, 20)),
+                ],
+            ),
+            (
+                "cursor in   extra whitespace",
+                vec![
+                    (9, Inside, (9, 9)),
+                    (10, Inside, (10, 10)),
+                    (11, Inside, (11, 11)),
+                    (9, Around, (9, 16)),
+                    (10, Around, (9, 16)),
+                    (11, Around, (9, 16)),
+                ],
+            ),
+            (
+                "cursor at end of doc",
+                vec![(19, Inside, (17, 19)), (19, Around, (16, 19))],
+            ),
+        ];
+
+        for (sample, scenario) in tests {
+            let doc = Rope::from(*sample);
+            let slice = doc.slice(..);
+            for &case in scenario {
+                let (pos, objtype, expected_range) = case;
+                let result = textobject_word(slice, Range::point(pos), objtype, 1);
+                assert_eq!(
+                    result,
+                    expected_range.into(),
+                    "\nCase failed: {:?} - {:?}",
+                    sample,
+                    case
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_textobject_surround() {
+        // (text, [(cursor position, textobject, final range, count), ...])
+        let tests = &[
+            (
+                "simple (single) surround pairs",
+                vec![
+                    (3, Inside, (3, 3), '(', 1),
+                    (7, Inside, (8, 13), ')', 1),
+                    (10, Inside, (8, 13), '(', 1),
+                    (14, Inside, (8, 13), ')', 1),
+                    (3, Around, (3, 3), '(', 1),
+                    (7, Around, (7, 14), ')', 1),
+                    (10, Around, (7, 14), '(', 1),
+                    (14, Around, (7, 14), ')', 1),
+                ],
+            ),
+            (
+                "samexx 'single' surround pairs",
+                vec![
+                    (3, Inside, (3, 3), '\'', 1),
+                    (7, Inside, (8, 13), '\'', 1),
+                    (10, Inside, (8, 13), '\'', 1),
+                    (14, Inside, (8, 13), '\'', 1),
+                    (3, Around, (3, 3), '\'', 1),
+                    (7, Around, (7, 14), '\'', 1),
+                    (10, Around, (7, 14), '\'', 1),
+                    (14, Around, (7, 14), '\'', 1),
+                ],
+            ),
+            (
+                "(nested (surround (pairs)) 3 levels)",
+                vec![
+                    (0, Inside, (1, 34), '(', 1),
+                    (6, Inside, (1, 34), ')', 1),
+                    (8, Inside, (9, 24), '(', 1),
+                    (8, Inside, (9, 34), ')', 2),
+                    (20, Inside, (9, 24), '(', 2),
+                    (20, Inside, (1, 34), ')', 3),
+                    (0, Around, (0, 35), '(', 1),
+                    (6, Around, (0, 35), ')', 1),
+                    (8, Around, (8, 25), '(', 1),
+                    (8, Around, (8, 35), ')', 2),
+                    (20, Around, (8, 25), '(', 2),
+                    (20, Around, (0, 35), ')', 3),
+                ],
+            ),
+            (
+                "(mixed {surround [pair] same} line)",
+                vec![
+                    (2, Inside, (1, 33), '(', 1),
+                    (9, Inside, (8, 27), '{', 1),
+                    (18, Inside, (18, 21), '[', 1),
+                    (2, Around, (0, 34), '(', 1),
+                    (9, Around, (7, 28), '{', 1),
+                    (18, Around, (17, 22), '[', 1),
+                ],
+            ),
+            (
+                "(stepped (surround) pairs (should) skip)",
+                vec![(22, Inside, (1, 38), '(', 1), (22, Around, (0, 39), '(', 1)],
+            ),
+            (
+                "[surround pairs{\non different]\nlines}",
+                vec![
+                    (7, Inside, (1, 28), '[', 1),
+                    (15, Inside, (16, 35), '{', 1),
+                    (7, Around, (0, 29), '[', 1),
+                    (15, Around, (15, 36), '{', 1),
+                ],
+            ),
+        ];
+
+        for (sample, scenario) in tests {
+            let doc = Rope::from(*sample);
+            let slice = doc.slice(..);
+            for &case in scenario {
+                let (pos, objtype, expected_range, ch, count) = case;
+                let result = textobject_surround(slice, Range::point(pos), objtype, ch, count);
+                assert_eq!(
+                    result,
+                    expected_range.into(),
+                    "\nCase failed: {:?} - {:?}",
+                    sample,
+                    case
+                );
+            }
+        }
+    }
+}