summaryrefslogtreecommitdiff
path: root/helix-vcs
diff options
context:
space:
mode:
Diffstat (limited to 'helix-vcs')
-rw-r--r--helix-vcs/Cargo.toml28
-rw-r--r--helix-vcs/src/diff.rs198
-rw-r--r--helix-vcs/src/diff/line_cache.rs130
-rw-r--r--helix-vcs/src/diff/worker.rs207
-rw-r--r--helix-vcs/src/diff/worker/test.rs149
-rw-r--r--helix-vcs/src/git.rs80
-rw-r--r--helix-vcs/src/git/test.rs121
-rw-r--r--helix-vcs/src/lib.rs51
8 files changed, 964 insertions, 0 deletions
diff --git a/helix-vcs/Cargo.toml b/helix-vcs/Cargo.toml
new file mode 100644
index 00000000..c114666d
--- /dev/null
+++ b/helix-vcs/Cargo.toml
@@ -0,0 +1,28 @@
+[package]
+name = "helix-vcs"
+version = "0.6.0"
+authors = ["Blaž Hrastnik <blaz@mxxn.io>"]
+edition = "2021"
+license = "MPL-2.0"
+categories = ["editor"]
+repository = "https://github.com/helix-editor/helix"
+homepage = "https://helix-editor.com"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+helix-core = { version = "0.6", path = "../helix-core" }
+
+tokio = { version = "1", features = ["rt", "rt-multi-thread", "time", "sync", "parking_lot", "macros"] }
+parking_lot = "0.12"
+
+git-repository = { version = "0.26", default-features = false , optional = true }
+imara-diff = "0.1.5"
+
+log = "0.4"
+
+[features]
+git = ["git-repository"]
+
+[dev-dependencies]
+tempfile = "3.3" \ No newline at end of file
diff --git a/helix-vcs/src/diff.rs b/helix-vcs/src/diff.rs
new file mode 100644
index 00000000..b1acd1f2
--- /dev/null
+++ b/helix-vcs/src/diff.rs
@@ -0,0 +1,198 @@
+use std::ops::Range;
+use std::sync::Arc;
+
+use helix_core::Rope;
+use imara_diff::Algorithm;
+use parking_lot::{Mutex, MutexGuard};
+use tokio::sync::mpsc::{unbounded_channel, UnboundedSender};
+use tokio::sync::{Notify, OwnedRwLockReadGuard, RwLock};
+use tokio::task::JoinHandle;
+use tokio::time::Instant;
+
+use crate::diff::worker::DiffWorker;
+
+mod line_cache;
+mod worker;
+
+type RedrawHandle = (Arc<Notify>, Arc<RwLock<()>>);
+
+/// A rendering lock passed to the differ the prevents redraws from occurring
+struct RenderLock {
+ pub lock: OwnedRwLockReadGuard<()>,
+ pub timeout: Option<Instant>,
+}
+
+struct Event {
+ text: Rope,
+ is_base: bool,
+ render_lock: Option<RenderLock>,
+}
+
+#[derive(Clone, Debug)]
+pub struct DiffHandle {
+ channel: UnboundedSender<Event>,
+ render_lock: Arc<RwLock<()>>,
+ hunks: Arc<Mutex<Vec<Hunk>>>,
+ inverted: bool,
+}
+
+impl DiffHandle {
+ pub fn new(diff_base: Rope, doc: Rope, redraw_handle: RedrawHandle) -> DiffHandle {
+ DiffHandle::new_with_handle(diff_base, doc, redraw_handle).0
+ }
+
+ fn new_with_handle(
+ diff_base: Rope,
+ doc: Rope,
+ redraw_handle: RedrawHandle,
+ ) -> (DiffHandle, JoinHandle<()>) {
+ let (sender, receiver) = unbounded_channel();
+ let hunks: Arc<Mutex<Vec<Hunk>>> = Arc::default();
+ let worker = DiffWorker {
+ channel: receiver,
+ hunks: hunks.clone(),
+ new_hunks: Vec::default(),
+ redraw_notify: redraw_handle.0,
+ diff_finished_notify: Arc::default(),
+ };
+ let handle = tokio::spawn(worker.run(diff_base, doc));
+ let differ = DiffHandle {
+ channel: sender,
+ hunks,
+ inverted: false,
+ render_lock: redraw_handle.1,
+ };
+ (differ, handle)
+ }
+
+ pub fn invert(&mut self) {
+ self.inverted = !self.inverted;
+ }
+
+ pub fn hunks(&self) -> FileHunks {
+ FileHunks {
+ hunks: self.hunks.lock(),
+ inverted: self.inverted,
+ }
+ }
+
+ /// Updates the document associated with this redraw handle
+ /// This function is only intended to be called from within the rendering loop
+ /// if called from elsewhere it may fail to acquire the render lock and panic
+ pub fn update_document(&self, doc: Rope, block: bool) -> bool {
+ // unwrap is ok here because the rendering lock is
+ // only exclusively locked during redraw.
+ // This function is only intended to be called
+ // from the core rendering loop where no redraw can happen in parallel
+ let lock = self.render_lock.clone().try_read_owned().unwrap();
+ let timeout = if block {
+ None
+ } else {
+ Some(Instant::now() + tokio::time::Duration::from_millis(SYNC_DIFF_TIMEOUT))
+ };
+ self.update_document_impl(doc, self.inverted, Some(RenderLock { lock, timeout }))
+ }
+
+ pub fn update_diff_base(&self, diff_base: Rope) -> bool {
+ self.update_document_impl(diff_base, !self.inverted, None)
+ }
+
+ fn update_document_impl(
+ &self,
+ text: Rope,
+ is_base: bool,
+ render_lock: Option<RenderLock>,
+ ) -> bool {
+ let event = Event {
+ text,
+ is_base,
+ render_lock,
+ };
+ self.channel.send(event).is_ok()
+ }
+}
+
+/// synchronous debounce value should be low
+/// so we can update synchronously most of the time
+const DIFF_DEBOUNCE_TIME_SYNC: u64 = 1;
+/// maximum time that rendering should be blocked until the diff finishes
+const SYNC_DIFF_TIMEOUT: u64 = 12;
+const DIFF_DEBOUNCE_TIME_ASYNC: u64 = 96;
+const ALGORITHM: Algorithm = Algorithm::Histogram;
+const MAX_DIFF_LINES: usize = 64 * u16::MAX as usize;
+// cap average line length to 128 for files with MAX_DIFF_LINES
+const MAX_DIFF_BYTES: usize = MAX_DIFF_LINES * 128;
+
+/// A single change in a file potentially spanning multiple lines
+/// Hunks produced by the differs are always ordered by their position
+/// in the file and non-overlapping.
+/// Specifically for any two hunks `x` and `y` the following properties hold:
+///
+/// ``` no_compile
+/// assert!(x.before.end <= y.before.start);
+/// assert!(x.after.end <= y.after.start);
+/// ```
+#[derive(PartialEq, Eq, Clone, Debug)]
+pub struct Hunk {
+ pub before: Range<u32>,
+ pub after: Range<u32>,
+}
+
+impl Hunk {
+ /// Can be used instead of `Option::None` for better performance
+ /// because lines larger then `i32::MAX` are not supported by `imara-diff` anyways.
+ /// Has some nice properties where it usually is not necessary to check for `None` separately:
+ /// Empty ranges fail contains checks and also fails smaller then checks.
+ pub const NONE: Hunk = Hunk {
+ before: u32::MAX..u32::MAX,
+ after: u32::MAX..u32::MAX,
+ };
+
+ /// Inverts a change so that `before`
+ pub fn invert(&self) -> Hunk {
+ Hunk {
+ before: self.after.clone(),
+ after: self.before.clone(),
+ }
+ }
+
+ pub fn is_pure_insertion(&self) -> bool {
+ self.before.is_empty()
+ }
+
+ pub fn is_pure_removal(&self) -> bool {
+ self.after.is_empty()
+ }
+}
+
+/// A list of changes in a file sorted in ascending
+/// non-overlapping order
+#[derive(Debug)]
+pub struct FileHunks<'a> {
+ hunks: MutexGuard<'a, Vec<Hunk>>,
+ inverted: bool,
+}
+
+impl FileHunks<'_> {
+ pub fn is_inverted(&self) -> bool {
+ self.inverted
+ }
+
+ /// Returns the `Hunk` for the `n`th change in this file.
+ /// if there is no `n`th change `Hunk::NONE` is returned instead.
+ pub fn nth_hunk(&self, n: u32) -> Hunk {
+ match self.hunks.get(n as usize) {
+ Some(hunk) if self.inverted => hunk.invert(),
+ Some(hunk) => hunk.clone(),
+ None => Hunk::NONE,
+ }
+ }
+
+ pub fn len(&self) -> u32 {
+ self.hunks.len() as u32
+ }
+
+ pub fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+}
diff --git a/helix-vcs/src/diff/line_cache.rs b/helix-vcs/src/diff/line_cache.rs
new file mode 100644
index 00000000..c3ee5daa
--- /dev/null
+++ b/helix-vcs/src/diff/line_cache.rs
@@ -0,0 +1,130 @@
+//! This modules encapsulates a tiny bit of unsafe code that
+//! makes diffing significantly faster and more ergonomic to implement.
+//! This code is necessary because diffing requires quick random
+//! access to the lines of the text that is being diffed.
+//!
+//! Therefore it is best to collect the `Rope::lines` iterator into a vec
+//! first because access to the vec is `O(1)` where `Rope::line` is `O(log N)`.
+//! However this process can allocate a (potentially quite large) vector.
+//!
+//! To avoid reallocation for every diff, the vector is reused.
+//! However the RopeSlice references the original rope and therefore forms a self-referential data structure.
+//! A transmute is used to change the lifetime of the slice to static to circumvent that project.
+use std::mem::transmute;
+
+use helix_core::{Rope, RopeSlice};
+use imara_diff::intern::{InternedInput, Interner};
+
+use super::{MAX_DIFF_BYTES, MAX_DIFF_LINES};
+
+/// A cache that stores the `lines` of a rope as a vector.
+/// It allows safely reusing the allocation of the vec when updating the rope
+pub(crate) struct InternedRopeLines {
+ diff_base: Rope,
+ doc: Rope,
+ num_tokens_diff_base: u32,
+ interned: InternedInput<RopeSlice<'static>>,
+}
+
+impl InternedRopeLines {
+ pub fn new(diff_base: Rope, doc: Rope) -> InternedRopeLines {
+ let mut res = InternedRopeLines {
+ interned: InternedInput {
+ before: Vec::with_capacity(diff_base.len_lines()),
+ after: Vec::with_capacity(doc.len_lines()),
+ interner: Interner::new(diff_base.len_lines() + doc.len_lines()),
+ },
+ diff_base,
+ doc,
+ // will be populated by update_diff_base_impl
+ num_tokens_diff_base: 0,
+ };
+ res.update_diff_base_impl();
+ res
+ }
+
+ /// Updates the `diff_base` and optionally the document if `doc` is not None
+ pub fn update_diff_base(&mut self, diff_base: Rope, doc: Option<Rope>) {
+ self.interned.clear();
+ self.diff_base = diff_base;
+ if let Some(doc) = doc {
+ self.doc = doc
+ }
+ if !self.is_too_large() {
+ self.update_diff_base_impl();
+ }
+ }
+
+ /// Updates the `doc` without reinterning the `diff_base`, this function
+ /// is therefore significantly faster than `update_diff_base` when only the document changes.
+ pub fn update_doc(&mut self, doc: Rope) {
+ // Safety: we clear any tokens that were added after
+ // the interning of `self.diff_base` finished so
+ // all lines that refer to `self.doc` have been purged.
+
+ self.interned
+ .interner
+ .erase_tokens_after(self.num_tokens_diff_base.into());
+
+ self.doc = doc;
+ if self.is_too_large() {
+ self.interned.after.clear();
+ } else {
+ self.update_doc_impl();
+ }
+ }
+
+ fn update_diff_base_impl(&mut self) {
+ // Safety: This transmute is safe because it only transmutes a lifetime, which has no effect.
+ // The backing storage for the RopeSlices referred to by the lifetime is stored in `self.diff_base`.
+ // Therefore as long as `self.diff_base` is not dropped/replaced this memory remains valid.
+ // `self.diff_base` is only changed in `self.update_diff_base`, which clears the interner.
+ // When the interned lines are exposed to consumer in `self.diff_input`, the lifetime is bounded to a reference to self.
+ // That means that on calls to update there exist no references to `self.interned`.
+ let before = self
+ .diff_base
+ .lines()
+ .map(|line: RopeSlice| -> RopeSlice<'static> { unsafe { transmute(line) } });
+ self.interned.update_before(before);
+ self.num_tokens_diff_base = self.interned.interner.num_tokens();
+ // the has to be interned again because the interner was fully cleared
+ self.update_doc_impl()
+ }
+
+ fn update_doc_impl(&mut self) {
+ // Safety: This transmute is save because it only transmutes a lifetime, which has no effect.
+ // The backing storage for the RopeSlices referred to by the lifetime is stored in `self.doc`.
+ // Therefore as long as `self.doc` is not dropped/replaced this memory remains valid.
+ // `self.doc` is only changed in `self.update_doc`, which clears the interner.
+ // When the interned lines are exposed to consumer in `self.diff_input`, the lifetime is bounded to a reference to self.
+ // That means that on calls to update there exist no references to `self.interned`.
+ let after = self
+ .doc
+ .lines()
+ .map(|line: RopeSlice| -> RopeSlice<'static> { unsafe { transmute(line) } });
+ self.interned.update_after(after);
+ }
+
+ fn is_too_large(&self) -> bool {
+ // bound both lines and bytes to avoid huge files with few (but huge) lines
+ // or huge file with tiny lines. While this makes no difference to
+ // diff itself (the diff performance only depends on the number of tokens)
+ // the interning runtime depends mostly on filesize and is actually dominant
+ // for large files
+ self.doc.len_lines() > MAX_DIFF_LINES
+ || self.diff_base.len_lines() > MAX_DIFF_LINES
+ || self.doc.len_bytes() > MAX_DIFF_BYTES
+ || self.diff_base.len_bytes() > MAX_DIFF_BYTES
+ }
+
+ /// Returns the `InternedInput` for performing the diff.
+ /// If `diff_base` or `doc` is so large that performing a diff could slow the editor
+ /// this function returns `None`.
+ pub fn interned_lines(&self) -> Option<&InternedInput<RopeSlice>> {
+ if self.is_too_large() {
+ None
+ } else {
+ Some(&self.interned)
+ }
+ }
+}
diff --git a/helix-vcs/src/diff/worker.rs b/helix-vcs/src/diff/worker.rs
new file mode 100644
index 00000000..b8659c9b
--- /dev/null
+++ b/helix-vcs/src/diff/worker.rs
@@ -0,0 +1,207 @@
+use std::mem::swap;
+use std::ops::Range;
+use std::sync::Arc;
+
+use helix_core::{Rope, RopeSlice};
+use imara_diff::intern::InternedInput;
+use parking_lot::Mutex;
+use tokio::sync::mpsc::UnboundedReceiver;
+use tokio::sync::Notify;
+use tokio::time::{timeout, timeout_at, Duration};
+
+use crate::diff::{
+ Event, RenderLock, ALGORITHM, DIFF_DEBOUNCE_TIME_ASYNC, DIFF_DEBOUNCE_TIME_SYNC,
+};
+
+use super::line_cache::InternedRopeLines;
+use super::Hunk;
+
+#[cfg(test)]
+mod test;
+
+pub(super) struct DiffWorker {
+ pub channel: UnboundedReceiver<Event>,
+ pub hunks: Arc<Mutex<Vec<Hunk>>>,
+ pub new_hunks: Vec<Hunk>,
+ pub redraw_notify: Arc<Notify>,
+ pub diff_finished_notify: Arc<Notify>,
+}
+
+impl DiffWorker {
+ async fn accumulate_events(&mut self, event: Event) -> (Option<Rope>, Option<Rope>) {
+ let mut accumulator = EventAccumulator::new();
+ accumulator.handle_event(event).await;
+ accumulator
+ .accumulate_debounced_events(
+ &mut self.channel,
+ self.redraw_notify.clone(),
+ self.diff_finished_notify.clone(),
+ )
+ .await;
+ (accumulator.doc, accumulator.diff_base)
+ }
+
+ pub async fn run(mut self, diff_base: Rope, doc: Rope) {
+ let mut interner = InternedRopeLines::new(diff_base, doc);
+ if let Some(lines) = interner.interned_lines() {
+ self.perform_diff(lines);
+ }
+ self.apply_hunks();
+ while let Some(event) = self.channel.recv().await {
+ let (doc, diff_base) = self.accumulate_events(event).await;
+
+ let process_accumulated_events = || {
+ if let Some(new_base) = diff_base {
+ interner.update_diff_base(new_base, doc)
+ } else {
+ interner.update_doc(doc.unwrap())
+ }
+
+ if let Some(lines) = interner.interned_lines() {
+ self.perform_diff(lines)
+ }
+ };
+
+ // Calculating diffs is computationally expensive and should
+ // not run inside an async function to avoid blocking other futures.
+ // Note: tokio::task::block_in_place does not work during tests
+ #[cfg(test)]
+ process_accumulated_events();
+ #[cfg(not(test))]
+ tokio::task::block_in_place(process_accumulated_events);
+
+ self.apply_hunks();
+ }
+ }
+
+ /// update the hunks (used by the gutter) by replacing it with `self.new_hunks`.
+ /// `self.new_hunks` is always empty after this function runs.
+ /// To improve performance this function tries to reuse the allocation of the old diff previously stored in `self.line_diffs`
+ fn apply_hunks(&mut self) {
+ swap(&mut *self.hunks.lock(), &mut self.new_hunks);
+ self.diff_finished_notify.notify_waiters();
+ self.new_hunks.clear();
+ }
+
+ fn perform_diff(&mut self, input: &InternedInput<RopeSlice>) {
+ imara_diff::diff(ALGORITHM, input, |before: Range<u32>, after: Range<u32>| {
+ self.new_hunks.push(Hunk { before, after })
+ })
+ }
+}
+
+struct EventAccumulator {
+ diff_base: Option<Rope>,
+ doc: Option<Rope>,
+ render_lock: Option<RenderLock>,
+}
+
+impl<'a> EventAccumulator {
+ fn new() -> EventAccumulator {
+ EventAccumulator {
+ diff_base: None,
+ doc: None,
+ render_lock: None,
+ }
+ }
+
+ async fn handle_event(&mut self, event: Event) {
+ let dst = if event.is_base {
+ &mut self.diff_base
+ } else {
+ &mut self.doc
+ };
+
+ *dst = Some(event.text);
+
+ // always prefer the most synchronous requested render mode
+ if let Some(render_lock) = event.render_lock {
+ match &mut self.render_lock {
+ Some(RenderLock { timeout, .. }) => {
+ // A timeout of `None` means that the render should
+ // always wait for the diff to complete (so no timeout)
+ // remove the existing timeout, otherwise keep the previous timeout
+ // because it will be shorter then the current timeout
+ if render_lock.timeout.is_none() {
+ timeout.take();
+ }
+ }
+ None => self.render_lock = Some(render_lock),
+ }
+ }
+ }
+
+ async fn accumulate_debounced_events(
+ &mut self,
+ channel: &mut UnboundedReceiver<Event>,
+ redraw_notify: Arc<Notify>,
+ diff_finished_notify: Arc<Notify>,
+ ) {
+ let async_debounce = Duration::from_millis(DIFF_DEBOUNCE_TIME_ASYNC);
+ let sync_debounce = Duration::from_millis(DIFF_DEBOUNCE_TIME_SYNC);
+ loop {
+ // if we are not blocking rendering use a much longer timeout
+ let debounce = if self.render_lock.is_none() {
+ async_debounce
+ } else {
+ sync_debounce
+ };
+
+ if let Ok(Some(event)) = timeout(debounce, channel.recv()).await {
+ self.handle_event(event).await;
+ } else {
+ break;
+ }
+ }
+
+ // setup task to trigger the rendering
+ match self.render_lock.take() {
+ // diff is performed outside of the rendering loop
+ // request a redraw after the diff is done
+ None => {
+ tokio::spawn(async move {
+ diff_finished_notify.notified().await;
+ redraw_notify.notify_one();
+ });
+ }
+ // diff is performed inside the rendering loop
+ // block redraw until the diff is done or the timeout is expired
+ Some(RenderLock {
+ lock,
+ timeout: Some(timeout),
+ }) => {
+ tokio::spawn(async move {
+ let res = {
+ // Acquire a lock on the redraw handle.
+ // The lock will block the rendering from occurring while held.
+ // The rendering waits for the diff if it doesn't time out
+ timeout_at(timeout, diff_finished_notify.notified()).await
+ };
+ // we either reached the timeout or the diff is finished, release the render lock
+ drop(lock);
+ if res.is_ok() {
+ // Diff finished in time we are done.
+ return;
+ }
+ // Diff failed to complete in time log the event
+ // and wait until the diff occurs to trigger an async redraw
+ log::warn!("Diff computation timed out, update of diffs might appear delayed");
+ diff_finished_notify.notified().await;
+ redraw_notify.notify_one();
+ });
+ }
+ // a blocking diff is performed inside the rendering loop
+ // block redraw until the diff is done
+ Some(RenderLock {
+ lock,
+ timeout: None,
+ }) => {
+ tokio::spawn(async move {
+ diff_finished_notify.notified().await;
+ // diff is done release the lock
+ drop(lock)
+ });
+ }
+ };
+ }
+}
diff --git a/helix-vcs/src/diff/worker/test.rs b/helix-vcs/src/diff/worker/test.rs
new file mode 100644
index 00000000..14442426
--- /dev/null
+++ b/helix-vcs/src/diff/worker/test.rs
@@ -0,0 +1,149 @@
+use helix_core::Rope;
+use tokio::task::JoinHandle;
+
+use crate::diff::{DiffHandle, Hunk};
+
+impl DiffHandle {
+ fn new_test(diff_base: &str, doc: &str) -> (DiffHandle, JoinHandle<()>) {
+ DiffHandle::new_with_handle(
+ Rope::from_str(diff_base),
+ Rope::from_str(doc),
+ Default::default(),
+ )
+ }
+ async fn into_diff(self, handle: JoinHandle<()>) -> Vec<Hunk> {
+ let hunks = self.hunks;
+ // dropping the channel terminates the task
+ drop(self.channel);
+ handle.await.unwrap();
+ let hunks = hunks.lock();
+ Vec::clone(&*hunks)
+ }
+}
+
+#[tokio::test]
+async fn append_line() {
+ let (differ, handle) = DiffHandle::new_test("foo\n", "foo\nbar\n");
+ let line_diffs = differ.into_diff(handle).await;
+ assert_eq!(
+ &line_diffs,
+ &[Hunk {
+ before: 1..1,
+ after: 1..2
+ }]
+ )
+}
+
+#[tokio::test]
+async fn prepend_line() {
+ let (differ, handle) = DiffHandle::new_test("foo\n", "bar\nfoo\n");
+ let line_diffs = differ.into_diff(handle).await;
+ assert_eq!(
+ &line_diffs,
+ &[Hunk {
+ before: 0..0,
+ after: 0..1
+ }]
+ )
+}
+
+#[tokio::test]
+async fn modify() {
+ let (differ, handle) = DiffHandle::new_test("foo\nbar\n", "foo bar\nbar\n");
+ let line_diffs = differ.into_diff(handle).await;
+ assert_eq!(
+ &line_diffs,
+ &[Hunk {
+ before: 0..1,
+ after: 0..1
+ }]
+ )
+}
+
+#[tokio::test]
+async fn delete_line() {
+ let (differ, handle) = DiffHandle::new_test("foo\nfoo bar\nbar\n", "foo\nbar\n");
+ let line_diffs = differ.into_diff(handle).await;
+ assert_eq!(
+ &line_diffs,
+ &[Hunk {
+ before: 1..2,
+ after: 1..1
+ }]
+ )
+}
+
+#[tokio::test]
+async fn delete_line_and_modify() {
+ let (differ, handle) = DiffHandle::new_test("foo\nbar\ntest\nfoo", "foo\ntest\nfoo bar");
+ let line_diffs = differ.into_diff(handle).await;
+ assert_eq!(
+ &line_diffs,
+ &[
+ Hunk {
+ before: 1..2,
+ after: 1..1
+ },
+ Hunk {
+ before: 3..4,
+ after: 2..3
+ },
+ ]
+ )
+}
+
+#[tokio::test]
+async fn add_use() {
+ let (differ, handle) = DiffHandle::new_test(
+ "use ropey::Rope;\nuse tokio::task::JoinHandle;\n",
+ "use ropey::Rope;\nuse ropey::RopeSlice;\nuse tokio::task::JoinHandle;\n",
+ );
+ let line_diffs = differ.into_diff(handle).await;
+ assert_eq!(
+ &line_diffs,
+ &[Hunk {
+ before: 1..1,
+ after: 1..2
+ },]
+ )
+}
+
+#[tokio::test]
+async fn update_document() {
+ let (differ, handle) = DiffHandle::new_test("foo\nbar\ntest\nfoo", "foo\nbar\ntest\nfoo");
+ differ.update_document(Rope::from_str("foo\ntest\nfoo bar"), false);
+ let line_diffs = differ.into_diff(handle).await;
+ assert_eq!(
+ &line_diffs,
+ &[
+ Hunk {
+ before: 1..2,
+ after: 1..1
+ },
+ Hunk {
+ before: 3..4,
+ after: 2..3
+ },
+ ]
+ )
+}
+
+#[tokio::test]
+async fn update_base() {
+ let (differ, handle) = DiffHandle::new_test("foo\ntest\nfoo bar", "foo\ntest\nfoo bar");
+ differ.update_diff_base(Rope::from_str("foo\nbar\ntest\nfoo"));
+ let line_diffs = differ.into_diff(handle).await;
+ assert_eq!(
+ &line_diffs,
+ &[
+ Hunk {
+ before: 1..2,
+ after: 1..1
+ },
+ Hunk {
+ before: 3..4,
+ after: 2..3
+ },
+ ]
+ )
+}
diff --git a/helix-vcs/src/git.rs b/helix-vcs/src/git.rs
new file mode 100644
index 00000000..82b2b558
--- /dev/null
+++ b/helix-vcs/src/git.rs
@@ -0,0 +1,80 @@
+use std::path::Path;
+
+use git::objs::tree::EntryMode;
+use git::sec::trust::DefaultForLevel;
+use git::{Commit, ObjectId, Repository, ThreadSafeRepository};
+use git_repository as git;
+
+use crate::DiffProvider;
+
+#[cfg(test)]
+mod test;
+
+pub struct Git;
+
+impl Git {
+ fn open_repo(path: &Path, ceiling_dir: Option<&Path>) -> Option<ThreadSafeRepository> {
+ // custom open options
+ let mut git_open_opts_map = git::sec::trust::Mapping::<git::open::Options>::default();
+
+ // don't use the global git configs (not needed)
+ let config = git::permissions::Config {
+ system: false,
+ git: false,
+ user: false,
+ env: true,
+ includes: true,
+ git_binary: false,
+ };
+ // change options for config permissions without touching anything else
+ git_open_opts_map.reduced = git_open_opts_map.reduced.permissions(git::Permissions {
+ config,
+ ..git::Permissions::default_for_level(git::sec::Trust::Reduced)
+ });
+ git_open_opts_map.full = git_open_opts_map.full.permissions(git::Permissions {
+ config,
+ ..git::Permissions::default_for_level(git::sec::Trust::Full)
+ });
+
+ let mut open_options = git::discover::upwards::Options::default();
+ if let Some(ceiling_dir) = ceiling_dir {
+ open_options.ceiling_dirs = vec![ceiling_dir.to_owned()];
+ }
+
+ ThreadSafeRepository::discover_with_environment_overrides_opts(
+ path,
+ open_options,
+ git_open_opts_map,
+ )
+ .ok()
+ }
+}
+
+impl DiffProvider for Git {
+ fn get_diff_base(&self, file: &Path) -> Option<Vec<u8>> {
+ debug_assert!(!file.exists() || file.is_file());
+ debug_assert!(file.is_absolute());
+
+ // TODO cache repository lookup
+ let repo = Git::open_repo(file.parent()?, None)?.to_thread_local();
+ let head = repo.head_commit().ok()?;
+ let file_oid = find_file_in_commit(&repo, &head, file)?;
+
+ let file_object = repo.find_object(file_oid).ok()?;
+ Some(file_object.detach().data)
+ }
+}
+
+/// Finds the object that contains the contents of a file at a specific commit.
+fn find_file_in_commit(repo: &Repository, commit: &Commit, file: &Path) -> Option<ObjectId> {
+ let repo_dir = repo.work_dir()?;
+ let rel_path = file.strip_prefix(repo_dir).ok()?;
+ let tree = commit.tree().ok()?;
+ let tree_entry = tree.lookup_entry_by_path(rel_path).ok()??;
+ match tree_entry.mode() {
+ // not a file, everything is new, do not show diff
+ EntryMode::Tree | EntryMode::Commit | EntryMode::Link => None,
+ // found a file
+ EntryMode::Blob | EntryMode::BlobExecutable => Some(tree_entry.object_id()),
+ }
+}
diff --git a/helix-vcs/src/git/test.rs b/helix-vcs/src/git/test.rs
new file mode 100644
index 00000000..d6e9af08
--- /dev/null
+++ b/helix-vcs/src/git/test.rs
@@ -0,0 +1,121 @@
+use std::{fs::File, io::Write, path::Path, process::Command};
+
+use tempfile::TempDir;
+
+use crate::{DiffProvider, Git};
+
+fn exec_git_cmd(args: &str, git_dir: &Path) {
+ let res = Command::new("git")
+ .arg("-C")
+ .arg(git_dir) // execute the git command in this directory
+ .args(args.split_whitespace())
+ .env_remove("GIT_DIR")
+ .env_remove("GIT_ASKPASS")
+ .env_remove("SSH_ASKPASS")
+ .env("GIT_TERMINAL_PROMPT", "false")
+ .env("GIT_AUTHOR_DATE", "2000-01-01 00:00:00 +0000")
+ .env("GIT_AUTHOR_EMAIL", "author@example.com")
+ .env("GIT_AUTHOR_NAME", "author")
+ .env("GIT_COMMITTER_DATE", "2000-01-02 00:00:00 +0000")
+ .env("GIT_COMMITTER_EMAIL", "committer@example.com")
+ .env("GIT_COMMITTER_NAME", "committer")
+ .env("GIT_CONFIG_COUNT", "2")
+ .env("GIT_CONFIG_KEY_0", "commit.gpgsign")
+ .env("GIT_CONFIG_VALUE_0", "false")
+ .env("GIT_CONFIG_KEY_1", "init.defaultBranch")
+ .env("GIT_CONFIG_VALUE_1", "main")
+ .output()
+ .unwrap_or_else(|_| panic!("`git {args}` failed"));
+ if !res.status.success() {
+ println!("{}", String::from_utf8_lossy(&res.stdout));
+ eprintln!("{}", String::from_utf8_lossy(&res.stderr));
+ panic!("`git {args}` failed (see output above)")
+ }
+}
+
+fn create_commit(repo: &Path, add_modified: bool) {
+ if add_modified {
+ exec_git_cmd("add -A", repo);
+ }
+ exec_git_cmd("commit -m message", repo);
+}
+
+fn empty_git_repo() -> TempDir {
+ let tmp = tempfile::tempdir().expect("create temp dir for git testing");
+ exec_git_cmd("init", tmp.path());
+ exec_git_cmd("config user.email test@helix.org", tmp.path());
+ exec_git_cmd("config user.name helix-test", tmp.path());
+ tmp
+}
+
+#[test]
+fn missing_file() {
+ let temp_git = empty_git_repo();
+ let file = temp_git.path().join("file.txt");
+ File::create(&file).unwrap().write_all(b"foo").unwrap();
+
+ assert_eq!(Git.get_diff_base(&file), None);
+}
+
+#[test]
+fn unmodified_file() {
+ let temp_git = empty_git_repo();
+ let file = temp_git.path().join("file.txt");
+ let contents = b"foo".as_slice();
+ File::create(&file).unwrap().write_all(contents).unwrap();
+ create_commit(temp_git.path(), true);
+ assert_eq!(Git.get_diff_base(&file), Some(Vec::from(contents)));
+}
+
+#[test]
+fn modified_file() {
+ let temp_git = empty_git_repo();
+ let file = temp_git.path().join("file.txt");
+ let contents = b"foo".as_slice();
+ File::create(&file).unwrap().write_all(contents).unwrap();
+ create_commit(temp_git.path(), true);
+ File::create(&file).unwrap().write_all(b"bar").unwrap();
+
+ assert_eq!(Git.get_diff_base(&file), Some(Vec::from(contents)));
+}
+
+/// Test that `get_file_head` does not return content for a directory.
+/// This is important to correctly cover cases where a directory is removed and replaced by a file.
+/// If the contents of the directory object were returned a diff between a path and the directory children would be produced.
+#[test]
+fn directory() {
+ let temp_git = empty_git_repo();
+ let dir = temp_git.path().join("file.txt");
+ std::fs::create_dir(&dir).expect("");
+ let file = dir.join("file.txt");
+ let contents = b"foo".as_slice();
+ File::create(&file).unwrap().write_all(contents).unwrap();
+
+ create_commit(temp_git.path(), true);
+
+ std::fs::remove_dir_all(&dir).unwrap();
+ File::create(&dir).unwrap().write_all(b"bar").unwrap();
+ assert_eq!(Git.get_diff_base(&dir), None);
+}
+
+/// Test that `get_file_head` does not return content for a symlink.
+/// This is important to correctly cover cases where a symlink is removed and replaced by a file.
+/// If the contents of the symlink object were returned a diff between a path and the actual file would be produced (bad ui).
+#[cfg(any(unix, windows))]
+#[test]
+fn symlink() {
+ #[cfg(unix)]
+ use std::os::unix::fs::symlink;
+ #[cfg(not(unix))]
+ use std::os::windows::fs::symlink_file as symlink;
+ let temp_git = empty_git_repo();
+ let file = temp_git.path().join("file.txt");
+ let contents = b"foo".as_slice();
+ File::create(&file).unwrap().write_all(contents).unwrap();
+ let file_link = temp_git.path().join("file_link.txt");
+ symlink("file.txt", &file_link).unwrap();
+
+ create_commit(temp_git.path(), true);
+ assert_eq!(Git.get_diff_base(&file_link), None);
+ assert_eq!(Git.get_diff_base(&file), Some(Vec::from(contents)));
+}
diff --git a/helix-vcs/src/lib.rs b/helix-vcs/src/lib.rs
new file mode 100644
index 00000000..97320d32
--- /dev/null
+++ b/helix-vcs/src/lib.rs
@@ -0,0 +1,51 @@
+use std::path::Path;
+
+#[cfg(feature = "git")]
+pub use git::Git;
+#[cfg(not(feature = "git"))]
+pub use Dummy as Git;
+
+#[cfg(feature = "git")]
+mod git;
+
+mod diff;
+
+pub use diff::{DiffHandle, Hunk};
+
+pub trait DiffProvider {
+ /// Returns the data that a diff should be computed against
+ /// if this provider is used.
+ /// The data is returned as raw byte without any decoding or encoding performed
+ /// to ensure all file encodings are handled correctly.
+ fn get_diff_base(&self, file: &Path) -> Option<Vec<u8>>;
+}
+
+#[doc(hidden)]
+pub struct Dummy;
+impl DiffProvider for Dummy {
+ fn get_diff_base(&self, _file: &Path) -> Option<Vec<u8>> {
+ None
+ }
+}
+
+pub struct DiffProviderRegistry {
+ providers: Vec<Box<dyn DiffProvider>>,
+}
+
+impl DiffProviderRegistry {
+ pub fn get_diff_base(&self, file: &Path) -> Option<Vec<u8>> {
+ self.providers
+ .iter()
+ .find_map(|provider| provider.get_diff_base(file))
+ }
+}
+
+impl Default for DiffProviderRegistry {
+ fn default() -> Self {
+ // currently only git is supported
+ // TODO make this configurable when more providers are added
+ let git: Box<dyn DiffProvider> = Box::new(Git);
+ let providers = vec![git];
+ DiffProviderRegistry { providers }
+ }
+}