aboutsummaryrefslogtreecommitdiff
path: root/helix-vcs
diff options
context:
space:
mode:
authorPascal Kuthe2022-12-01 08:35:23 +0000
committerGitHub2022-12-01 08:35:23 +0000
commit5a3ff742218aac32c3af08993f0edb623631fc72 (patch)
tree55c09d58aef9284daf63224e1d3afaac6da26ee8 /helix-vcs
parent67415e096ea70173d30550803559eb2347ed04d6 (diff)
Show (git) diff signs in gutter (#3890)
* Show (git) diff signs in gutter (#3890) Avoid string allocation when git diffing Incrementally diff using changesets refactor diffs to be provider indepndent and improve git implementation remove dependency on zlib-ng switch to asynchronus diffing with similar Update helix-vcs/Cargo.toml fix toml formatting Co-authored-by: Ivan Tham <pickfire@riseup.net> fix typo in documentation use ropey reexpors from helix-core fix crash when creating new file remove useless use if io::Cursor fix spelling mistakes implement suggested improvement to repository loading improve git test isolation remove lefover comments Co-authored-by: univerz <univerz@fu-solution.com> fixed spelling mistake minor cosmetic changes fix: set self.differ to None if decoding the diff_base fails fixup formatting Co-authored-by: Ivan Tham <pickfire@riseup.net> reload diff_base when file is reloaded from disk switch to imara-diff Fixup formatting Co-authored-by: Blaž Hrastnik <blaz@mxxn.io> Redraw buffer whenever a diff is updated. Only store hunks instead of changes for individual lines to easily allow jumping between them Update to latest gitoxide version Change default diff gutter position Only update gutter after timeout * update diff gutter synchronously, with a timeout * Apply suggestions from code review Co-authored-by: Blaž Hrastnik <blaz@mxxn.io> Co-authored-by: Michael Davis <mcarsondavis@gmail.com> * address review comments and ensure lock is always aquired * remove configuration for redraw timeout Co-authored-by: Blaž Hrastnik <blaz@mxxn.io> Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
Diffstat (limited to 'helix-vcs')
-rw-r--r--helix-vcs/Cargo.toml28
-rw-r--r--helix-vcs/src/diff.rs198
-rw-r--r--helix-vcs/src/diff/line_cache.rs130
-rw-r--r--helix-vcs/src/diff/worker.rs207
-rw-r--r--helix-vcs/src/diff/worker/test.rs149
-rw-r--r--helix-vcs/src/git.rs80
-rw-r--r--helix-vcs/src/git/test.rs121
-rw-r--r--helix-vcs/src/lib.rs51
8 files changed, 964 insertions, 0 deletions
diff --git a/helix-vcs/Cargo.toml b/helix-vcs/Cargo.toml
new file mode 100644
index 00000000..c114666d
--- /dev/null
+++ b/helix-vcs/Cargo.toml
@@ -0,0 +1,28 @@
+[package]
+name = "helix-vcs"
+version = "0.6.0"
+authors = ["Blaž Hrastnik <blaz@mxxn.io>"]
+edition = "2021"
+license = "MPL-2.0"
+categories = ["editor"]
+repository = "https://github.com/helix-editor/helix"
+homepage = "https://helix-editor.com"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+helix-core = { version = "0.6", path = "../helix-core" }
+
+tokio = { version = "1", features = ["rt", "rt-multi-thread", "time", "sync", "parking_lot", "macros"] }
+parking_lot = "0.12"
+
+git-repository = { version = "0.26", default-features = false , optional = true }
+imara-diff = "0.1.5"
+
+log = "0.4"
+
+[features]
+git = ["git-repository"]
+
+[dev-dependencies]
+tempfile = "3.3" \ No newline at end of file
diff --git a/helix-vcs/src/diff.rs b/helix-vcs/src/diff.rs
new file mode 100644
index 00000000..b1acd1f2
--- /dev/null
+++ b/helix-vcs/src/diff.rs
@@ -0,0 +1,198 @@
+use std::ops::Range;
+use std::sync::Arc;
+
+use helix_core::Rope;
+use imara_diff::Algorithm;
+use parking_lot::{Mutex, MutexGuard};
+use tokio::sync::mpsc::{unbounded_channel, UnboundedSender};
+use tokio::sync::{Notify, OwnedRwLockReadGuard, RwLock};
+use tokio::task::JoinHandle;
+use tokio::time::Instant;
+
+use crate::diff::worker::DiffWorker;
+
+mod line_cache;
+mod worker;
+
+type RedrawHandle = (Arc<Notify>, Arc<RwLock<()>>);
+
+/// A rendering lock passed to the differ the prevents redraws from occurring
+struct RenderLock {
+ pub lock: OwnedRwLockReadGuard<()>,
+ pub timeout: Option<Instant>,
+}
+
+struct Event {
+ text: Rope,
+ is_base: bool,
+ render_lock: Option<RenderLock>,
+}
+
+#[derive(Clone, Debug)]
+pub struct DiffHandle {
+ channel: UnboundedSender<Event>,
+ render_lock: Arc<RwLock<()>>,
+ hunks: Arc<Mutex<Vec<Hunk>>>,
+ inverted: bool,
+}
+
+impl DiffHandle {
+ pub fn new(diff_base: Rope, doc: Rope, redraw_handle: RedrawHandle) -> DiffHandle {
+ DiffHandle::new_with_handle(diff_base, doc, redraw_handle).0
+ }
+
+ fn new_with_handle(
+ diff_base: Rope,
+ doc: Rope,
+ redraw_handle: RedrawHandle,
+ ) -> (DiffHandle, JoinHandle<()>) {
+ let (sender, receiver) = unbounded_channel();
+ let hunks: Arc<Mutex<Vec<Hunk>>> = Arc::default();
+ let worker = DiffWorker {
+ channel: receiver,
+ hunks: hunks.clone(),
+ new_hunks: Vec::default(),
+ redraw_notify: redraw_handle.0,
+ diff_finished_notify: Arc::default(),
+ };
+ let handle = tokio::spawn(worker.run(diff_base, doc));
+ let differ = DiffHandle {
+ channel: sender,
+ hunks,
+ inverted: false,
+ render_lock: redraw_handle.1,
+ };
+ (differ, handle)
+ }
+
+ pub fn invert(&mut self) {
+ self.inverted = !self.inverted;
+ }
+
+ pub fn hunks(&self) -> FileHunks {
+ FileHunks {
+ hunks: self.hunks.lock(),
+ inverted: self.inverted,
+ }
+ }
+
+ /// Updates the document associated with this redraw handle
+ /// This function is only intended to be called from within the rendering loop
+ /// if called from elsewhere it may fail to acquire the render lock and panic
+ pub fn update_document(&self, doc: Rope, block: bool) -> bool {
+ // unwrap is ok here because the rendering lock is
+ // only exclusively locked during redraw.
+ // This function is only intended to be called
+ // from the core rendering loop where no redraw can happen in parallel
+ let lock = self.render_lock.clone().try_read_owned().unwrap();
+ let timeout = if block {
+ None
+ } else {
+ Some(Instant::now() + tokio::time::Duration::from_millis(SYNC_DIFF_TIMEOUT))
+ };
+ self.update_document_impl(doc, self.inverted, Some(RenderLock { lock, timeout }))
+ }
+
+ pub fn update_diff_base(&self, diff_base: Rope) -> bool {
+ self.update_document_impl(diff_base, !self.inverted, None)
+ }
+
+ fn update_document_impl(
+ &self,
+ text: Rope,
+ is_base: bool,
+ render_lock: Option<RenderLock>,
+ ) -> bool {
+ let event = Event {
+ text,
+ is_base,
+ render_lock,
+ };
+ self.channel.send(event).is_ok()
+ }
+}
+
+/// synchronous debounce value should be low
+/// so we can update synchronously most of the time
+const DIFF_DEBOUNCE_TIME_SYNC: u64 = 1;
+/// maximum time that rendering should be blocked until the diff finishes
+const SYNC_DIFF_TIMEOUT: u64 = 12;
+const DIFF_DEBOUNCE_TIME_ASYNC: u64 = 96;
+const ALGORITHM: Algorithm = Algorithm::Histogram;
+const MAX_DIFF_LINES: usize = 64 * u16::MAX as usize;
+// cap average line length to 128 for files with MAX_DIFF_LINES
+const MAX_DIFF_BYTES: usize = MAX_DIFF_LINES * 128;
+
+/// A single change in a file potentially spanning multiple lines
+/// Hunks produced by the differs are always ordered by their position
+/// in the file and non-overlapping.
+/// Specifically for any two hunks `x` and `y` the following properties hold:
+///
+/// ``` no_compile
+/// assert!(x.before.end <= y.before.start);
+/// assert!(x.after.end <= y.after.start);
+/// ```
+#[derive(PartialEq, Eq, Clone, Debug)]
+pub struct Hunk {
+ pub before: Range<u32>,
+ pub after: Range<u32>,
+}
+
+impl Hunk {
+ /// Can be used instead of `Option::None` for better performance
+ /// because lines larger then `i32::MAX` are not supported by `imara-diff` anyways.
+ /// Has some nice properties where it usually is not necessary to check for `None` separately:
+ /// Empty ranges fail contains checks and also fails smaller then checks.
+ pub const NONE: Hunk = Hunk {
+ before: u32::MAX..u32::MAX,
+ after: u32::MAX..u32::MAX,
+ };
+
+ /// Inverts a change so that `before`
+ pub fn invert(&self) -> Hunk {
+ Hunk {
+ before: self.after.clone(),
+ after: self.before.clone(),
+ }
+ }
+
+ pub fn is_pure_insertion(&self) -> bool {
+ self.before.is_empty()
+ }
+
+ pub fn is_pure_removal(&self) -> bool {
+ self.after.is_empty()
+ }
+}
+
+/// A list of changes in a file sorted in ascending
+/// non-overlapping order
+#[derive(Debug)]
+pub struct FileHunks<'a> {
+ hunks: MutexGuard<'a, Vec<Hunk>>,
+ inverted: bool,
+}
+
+impl FileHunks<'_> {
+ pub fn is_inverted(&self) -> bool {
+ self.inverted
+ }
+
+ /// Returns the `Hunk` for the `n`th change in this file.
+ /// if there is no `n`th change `Hunk::NONE` is returned instead.
+ pub fn nth_hunk(&self, n: u32) -> Hunk {
+ match self.hunks.get(n as usize) {
+ Some(hunk) if self.inverted => hunk.invert(),
+ Some(hunk) => hunk.clone(),
+ None => Hunk::NONE,
+ }
+ }
+
+ pub fn len(&self) -> u32 {
+ self.hunks.len() as u32
+ }
+
+ pub fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+}
diff --git a/helix-vcs/src/diff/line_cache.rs b/helix-vcs/src/diff/line_cache.rs
new file mode 100644
index 00000000..c3ee5daa
--- /dev/null
+++ b/helix-vcs/src/diff/line_cache.rs
@@ -0,0 +1,130 @@
+//! This modules encapsulates a tiny bit of unsafe code that
+//! makes diffing significantly faster and more ergonomic to implement.
+//! This code is necessary because diffing requires quick random
+//! access to the lines of the text that is being diffed.
+//!
+//! Therefore it is best to collect the `Rope::lines` iterator into a vec
+//! first because access to the vec is `O(1)` where `Rope::line` is `O(log N)`.
+//! However this process can allocate a (potentially quite large) vector.
+//!
+//! To avoid reallocation for every diff, the vector is reused.
+//! However the RopeSlice references the original rope and therefore forms a self-referential data structure.
+//! A transmute is used to change the lifetime of the slice to static to circumvent that project.
+use std::mem::transmute;
+
+use helix_core::{Rope, RopeSlice};
+use imara_diff::intern::{InternedInput, Interner};
+
+use super::{MAX_DIFF_BYTES, MAX_DIFF_LINES};
+
+/// A cache that stores the `lines` of a rope as a vector.
+/// It allows safely reusing the allocation of the vec when updating the rope
+pub(crate) struct InternedRopeLines {
+ diff_base: Rope,
+ doc: Rope,
+ num_tokens_diff_base: u32,
+ interned: InternedInput<RopeSlice<'static>>,
+}
+
+impl InternedRopeLines {
+ pub fn new(diff_base: Rope, doc: Rope) -> InternedRopeLines {
+ let mut res = InternedRopeLines {
+ interned: InternedInput {
+ before: Vec::with_capacity(diff_base.len_lines()),
+ after: Vec::with_capacity(doc.len_lines()),
+ interner: Interner::new(diff_base.len_lines() + doc.len_lines()),
+ },
+ diff_base,
+ doc,
+ // will be populated by update_diff_base_impl
+ num_tokens_diff_base: 0,
+ };
+ res.update_diff_base_impl();
+ res
+ }
+
+ /// Updates the `diff_base` and optionally the document if `doc` is not None
+ pub fn update_diff_base(&mut self, diff_base: Rope, doc: Option<Rope>) {
+ self.interned.clear();
+ self.diff_base = diff_base;
+ if let Some(doc) = doc {
+ self.doc = doc
+ }
+ if !self.is_too_large() {
+ self.update_diff_base_impl();
+ }
+ }
+
+ /// Updates the `doc` without reinterning the `diff_base`, this function
+ /// is therefore significantly faster than `update_diff_base` when only the document changes.
+ pub fn update_doc(&mut self, doc: Rope) {
+ // Safety: we clear any tokens that were added after
+ // the interning of `self.diff_base` finished so
+ // all lines that refer to `self.doc` have been purged.
+
+ self.interned
+ .interner
+ .erase_tokens_after(self.num_tokens_diff_base.into());
+
+ self.doc = doc;
+ if self.is_too_large() {
+ self.interned.after.clear();
+ } else {
+ self.update_doc_impl();
+ }
+ }
+
+ fn update_diff_base_impl(&mut self) {
+ // Safety: This transmute is safe because it only transmutes a lifetime, which has no effect.
+ // The backing storage for the RopeSlices referred to by the lifetime is stored in `self.diff_base`.
+ // Therefore as long as `self.diff_base` is not dropped/replaced this memory remains valid.
+ // `self.diff_base` is only changed in `self.update_diff_base`, which clears the interner.
+ // When the interned lines are exposed to consumer in `self.diff_input`, the lifetime is bounded to a reference to self.
+ // That means that on calls to update there exist no references to `self.interned`.
+ let before = self
+ .diff_base
+ .lines()
+ .map(|line: RopeSlice| -> RopeSlice<'static> { unsafe { transmute(line) } });
+ self.interned.update_before(before);
+ self.num_tokens_diff_base = self.interned.interner.num_tokens();
+ // the has to be interned again because the interner was fully cleared
+ self.update_doc_impl()
+ }
+
+ fn update_doc_impl(&mut self) {
+ // Safety: This transmute is save because it only transmutes a lifetime, which has no effect.
+ // The backing storage for the RopeSlices referred to by the lifetime is stored in `self.doc`.
+ // Therefore as long as `self.doc` is not dropped/replaced this memory remains valid.
+ // `self.doc` is only changed in `self.update_doc`, which clears the interner.
+ // When the interned lines are exposed to consumer in `self.diff_input`, the lifetime is bounded to a reference to self.
+ // That means that on calls to update there exist no references to `self.interned`.
+ let after = self
+ .doc
+ .lines()
+ .map(|line: RopeSlice| -> RopeSlice<'static> { unsafe { transmute(line) } });
+ self.interned.update_after(after);
+ }
+
+ fn is_too_large(&self) -> bool {
+ // bound both lines and bytes to avoid huge files with few (but huge) lines
+ // or huge file with tiny lines. While this makes no difference to
+ // diff itself (the diff performance only depends on the number of tokens)
+ // the interning runtime depends mostly on filesize and is actually dominant
+ // for large files
+ self.doc.len_lines() > MAX_DIFF_LINES
+ || self.diff_base.len_lines() > MAX_DIFF_LINES
+ || self.doc.len_bytes() > MAX_DIFF_BYTES
+ || self.diff_base.len_bytes() > MAX_DIFF_BYTES
+ }
+
+ /// Returns the `InternedInput` for performing the diff.
+ /// If `diff_base` or `doc` is so large that performing a diff could slow the editor
+ /// this function returns `None`.
+ pub fn interned_lines(&self) -> Option<&InternedInput<RopeSlice>> {
+ if self.is_too_large() {
+ None
+ } else {
+ Some(&self.interned)
+ }
+ }
+}
diff --git a/helix-vcs/src/diff/worker.rs b/helix-vcs/src/diff/worker.rs
new file mode 100644
index 00000000..b8659c9b
--- /dev/null
+++ b/helix-vcs/src/diff/worker.rs
@@ -0,0 +1,207 @@
+use std::mem::swap;
+use std::ops::Range;
+use std::sync::Arc;
+
+use helix_core::{Rope, RopeSlice};
+use imara_diff::intern::InternedInput;
+use parking_lot::Mutex;
+use tokio::sync::mpsc::UnboundedReceiver;
+use tokio::sync::Notify;
+use tokio::time::{timeout, timeout_at, Duration};
+
+use crate::diff::{
+ Event, RenderLock, ALGORITHM, DIFF_DEBOUNCE_TIME_ASYNC, DIFF_DEBOUNCE_TIME_SYNC,
+};
+
+use super::line_cache::InternedRopeLines;
+use super::Hunk;
+
+#[cfg(test)]
+mod test;
+
+pub(super) struct DiffWorker {
+ pub channel: UnboundedReceiver<Event>,
+ pub hunks: Arc<Mutex<Vec<Hunk>>>,
+ pub new_hunks: Vec<Hunk>,
+ pub redraw_notify: Arc<Notify>,
+ pub diff_finished_notify: Arc<Notify>,
+}
+
+impl DiffWorker {
+ async fn accumulate_events(&mut self, event: Event) -> (Option<Rope>, Option<Rope>) {
+ let mut accumulator = EventAccumulator::new();
+ accumulator.handle_event(event).await;
+ accumulator
+ .accumulate_debounced_events(
+ &mut self.channel,
+ self.redraw_notify.clone(),
+ self.diff_finished_notify.clone(),
+ )
+ .await;
+ (accumulator.doc, accumulator.diff_base)
+ }
+
+ pub async fn run(mut self, diff_base: Rope, doc: Rope) {
+ let mut interner = InternedRopeLines::new(diff_base, doc);
+ if let Some(lines) = interner.interned_lines() {
+ self.perform_diff(lines);
+ }
+ self.apply_hunks();
+ while let Some(event) = self.channel.recv().await {
+ let (doc, diff_base) = self.accumulate_events(event).await;
+
+ let process_accumulated_events = || {
+ if let Some(new_base) = diff_base {
+ interner.update_diff_base(new_base, doc)
+ } else {
+ interner.update_doc(doc.unwrap())
+ }
+
+ if let Some(lines) = interner.interned_lines() {
+ self.perform_diff(lines)
+ }
+ };
+
+ // Calculating diffs is computationally expensive and should
+ // not run inside an async function to avoid blocking other futures.
+ // Note: tokio::task::block_in_place does not work during tests
+ #[cfg(test)]
+ process_accumulated_events();
+ #[cfg(not(test))]
+ tokio::task::block_in_place(process_accumulated_events);
+
+ self.apply_hunks();
+ }
+ }
+
+ /// update the hunks (used by the gutter) by replacing it with `self.new_hunks`.
+ /// `self.new_hunks` is always empty after this function runs.
+ /// To improve performance this function tries to reuse the allocation of the old diff previously stored in `self.line_diffs`
+ fn apply_hunks(&mut self) {
+ swap(&mut *self.hunks.lock(), &mut self.new_hunks);
+ self.diff_finished_notify.notify_waiters();
+ self.new_hunks.clear();
+ }
+
+ fn perform_diff(&mut self, input: &InternedInput<RopeSlice>) {
+ imara_diff::diff(ALGORITHM, input, |before: Range<u32>, after: Range<u32>| {
+ self.new_hunks.push(Hunk { before, after })
+ })
+ }
+}
+
+struct EventAccumulator {
+ diff_base: Option<Rope>,
+ doc: Option<Rope>,
+ render_lock: Option<RenderLock>,
+}
+
+impl<'a> EventAccumulator {
+ fn new() -> EventAccumulator {
+ EventAccumulator {
+ diff_base: None,
+ doc: None,
+ render_lock: None,
+ }
+ }
+
+ async fn handle_event(&mut self, event: Event) {
+ let dst = if event.is_base {
+ &mut self.diff_base
+ } else {
+ &mut self.doc
+ };
+
+ *dst = Some(event.text);
+
+ // always prefer the most synchronous requested render mode
+ if let Some(render_lock) = event.render_lock {
+ match &mut self.render_lock {
+ Some(RenderLock { timeout, .. }) => {
+ // A timeout of `None` means that the render should
+ // always wait for the diff to complete (so no timeout)
+ // remove the existing timeout, otherwise keep the previous timeout
+ // because it will be shorter then the current timeout
+ if render_lock.timeout.is_none() {
+ timeout.take();
+ }
+ }
+ None => self.render_lock = Some(render_lock),
+ }
+ }
+ }
+
+ async fn accumulate_debounced_events(
+ &mut self,
+ channel: &mut UnboundedReceiver<Event>,
+ redraw_notify: Arc<Notify>,
+ diff_finished_notify: Arc<Notify>,
+ ) {
+ let async_debounce = Duration::from_millis(DIFF_DEBOUNCE_TIME_ASYNC);
+ let sync_debounce = Duration::from_millis(DIFF_DEBOUNCE_TIME_SYNC);
+ loop {
+ // if we are not blocking rendering use a much longer timeout
+ let debounce = if self.render_lock.is_none() {
+ async_debounce
+ } else {
+ sync_debounce
+ };
+
+ if let Ok(Some(event)) = timeout(debounce, channel.recv()).await {
+ self.handle_event(event).await;
+ } else {
+ break;
+ }
+ }
+
+ // setup task to trigger the rendering
+ match self.render_lock.take() {
+ // diff is performed outside of the rendering loop
+ // request a redraw after the diff is done
+ None => {
+ tokio::spawn(async move {
+ diff_finished_notify.notified().await;
+ redraw_notify.notify_one();
+ });
+ }
+ // diff is performed inside the rendering loop
+ // block redraw until the diff is done or the timeout is expired
+ Some(RenderLock {
+ lock,
+ timeout: Some(timeout),
+ }) => {
+ tokio::spawn(async move {
+ let res = {
+ // Acquire a lock on the redraw handle.
+ // The lock will block the rendering from occurring while held.
+ // The rendering waits for the diff if it doesn't time out
+ timeout_at(timeout, diff_finished_notify.notified()).await
+ };
+ // we either reached the timeout or the diff is finished, release the render lock
+ drop(lock);
+ if res.is_ok() {
+ // Diff finished in time we are done.
+ return;
+ }
+ // Diff failed to complete in time log the event
+ // and wait until the diff occurs to trigger an async redraw
+ log::warn!("Diff computation timed out, update of diffs might appear delayed");
+ diff_finished_notify.notified().await;
+ redraw_notify.notify_one();
+ });
+ }
+ // a blocking diff is performed inside the rendering loop
+ // block redraw until the diff is done
+ Some(RenderLock {
+ lock,
+ timeout: None,
+ }) => {
+ tokio::spawn(async move {
+ diff_finished_notify.notified().await;
+ // diff is done release the lock
+ drop(lock)
+ });
+ }
+ };
+ }
+}
diff --git a/helix-vcs/src/diff/worker/test.rs b/helix-vcs/src/diff/worker/test.rs
new file mode 100644
index 00000000..14442426
--- /dev/null
+++ b/helix-vcs/src/diff/worker/test.rs
@@ -0,0 +1,149 @@
+use helix_core::Rope;
+use tokio::task::JoinHandle;
+
+use crate::diff::{DiffHandle, Hunk};
+
+impl DiffHandle {
+ fn new_test(diff_base: &str, doc: &str) -> (DiffHandle, JoinHandle<()>) {
+ DiffHandle::new_with_handle(
+ Rope::from_str(diff_base),
+ Rope::from_str(doc),
+ Default::default(),
+ )
+ }
+ async fn into_diff(self, handle: JoinHandle<()>) -> Vec<Hunk> {
+ let hunks = self.hunks;
+ // dropping the channel terminates the task
+ drop(self.channel);
+ handle.await.unwrap();
+ let hunks = hunks.lock();
+ Vec::clone(&*hunks)
+ }
+}
+
+#[tokio::test]
+async fn append_line() {
+ let (differ, handle) = DiffHandle::new_test("foo\n", "foo\nbar\n");
+ let line_diffs = differ.into_diff(handle).await;
+ assert_eq!(
+ &line_diffs,
+ &[Hunk {
+ before: 1..1,
+ after: 1..2
+ }]
+ )
+}
+
+#[tokio::test]
+async fn prepend_line() {
+ let (differ, handle) = DiffHandle::new_test("foo\n", "bar\nfoo\n");
+ let line_diffs = differ.into_diff(handle).await;
+ assert_eq!(
+ &line_diffs,
+ &[Hunk {
+ before: 0..0,
+ after: 0..1
+ }]
+ )
+}
+
+#[tokio::test]
+async fn modify() {
+ let (differ, handle) = DiffHandle::new_test("foo\nbar\n", "foo bar\nbar\n");
+ let line_diffs = differ.into_diff(handle).await;
+ assert_eq!(
+ &line_diffs,
+ &[Hunk {
+ before: 0..1,
+ after: 0..1
+ }]
+ )
+}
+
+#[tokio::test]
+async fn delete_line() {
+ let (differ, handle) = DiffHandle::new_test("foo\nfoo bar\nbar\n", "foo\nbar\n");
+ let line_diffs = differ.into_diff(handle).await;
+ assert_eq!(
+ &line_diffs,
+ &[Hunk {
+ before: 1..2,
+ after: 1..1
+ }]
+ )
+}
+
+#[tokio::test]
+async fn delete_line_and_modify() {
+ let (differ, handle) = DiffHandle::new_test("foo\nbar\ntest\nfoo", "foo\ntest\nfoo bar");
+ let line_diffs = differ.into_diff(handle).await;
+ assert_eq!(
+ &line_diffs,
+ &[
+ Hunk {
+ before: 1..2,
+ after: 1..1
+ },
+ Hunk {
+ before: 3..4,
+ after: 2..3
+ },
+ ]
+ )
+}
+
+#[tokio::test]
+async fn add_use() {
+ let (differ, handle) = DiffHandle::new_test(
+ "use ropey::Rope;\nuse tokio::task::JoinHandle;\n",
+ "use ropey::Rope;\nuse ropey::RopeSlice;\nuse tokio::task::JoinHandle;\n",
+ );
+ let line_diffs = differ.into_diff(handle).await;
+ assert_eq!(
+ &line_diffs,
+ &[Hunk {
+ before: 1..1,
+ after: 1..2
+ },]
+ )
+}
+
+#[tokio::test]
+async fn update_document() {
+ let (differ, handle) = DiffHandle::new_test("foo\nbar\ntest\nfoo", "foo\nbar\ntest\nfoo");
+ differ.update_document(Rope::from_str("foo\ntest\nfoo bar"), false);
+ let line_diffs = differ.into_diff(handle).await;
+ assert_eq!(
+ &line_diffs,
+ &[
+ Hunk {
+ before: 1..2,
+ after: 1..1
+ },
+ Hunk {
+ before: 3..4,
+ after: 2..3
+ },
+ ]
+ )
+}
+
+#[tokio::test]
+async fn update_base() {
+ let (differ, handle) = DiffHandle::new_test("foo\ntest\nfoo bar", "foo\ntest\nfoo bar");
+ differ.update_diff_base(Rope::from_str("foo\nbar\ntest\nfoo"));
+ let line_diffs = differ.into_diff(handle).await;
+ assert_eq!(
+ &line_diffs,
+ &[
+ Hunk {
+ before: 1..2,
+ after: 1..1
+ },
+ Hunk {
+ before: 3..4,
+ after: 2..3
+ },
+ ]
+ )
+}
diff --git a/helix-vcs/src/git.rs b/helix-vcs/src/git.rs
new file mode 100644
index 00000000..82b2b558
--- /dev/null
+++ b/helix-vcs/src/git.rs
@@ -0,0 +1,80 @@
+use std::path::Path;
+
+use git::objs::tree::EntryMode;
+use git::sec::trust::DefaultForLevel;
+use git::{Commit, ObjectId, Repository, ThreadSafeRepository};
+use git_repository as git;
+
+use crate::DiffProvider;
+
+#[cfg(test)]
+mod test;
+
+pub struct Git;
+
+impl Git {
+ fn open_repo(path: &Path, ceiling_dir: Option<&Path>) -> Option<ThreadSafeRepository> {
+ // custom open options
+ let mut git_open_opts_map = git::sec::trust::Mapping::<git::open::Options>::default();
+
+ // don't use the global git configs (not needed)
+ let config = git::permissions::Config {
+ system: false,
+ git: false,
+ user: false,
+ env: true,
+ includes: true,
+ git_binary: false,
+ };
+ // change options for config permissions without touching anything else
+ git_open_opts_map.reduced = git_open_opts_map.reduced.permissions(git::Permissions {
+ config,
+ ..git::Permissions::default_for_level(git::sec::Trust::Reduced)
+ });
+ git_open_opts_map.full = git_open_opts_map.full.permissions(git::Permissions {
+ config,
+ ..git::Permissions::default_for_level(git::sec::Trust::Full)
+ });
+
+ let mut open_options = git::discover::upwards::Options::default();
+ if let Some(ceiling_dir) = ceiling_dir {
+ open_options.ceiling_dirs = vec![ceiling_dir.to_owned()];
+ }
+
+ ThreadSafeRepository::discover_with_environment_overrides_opts(
+ path,
+ open_options,
+ git_open_opts_map,
+ )
+ .ok()
+ }
+}
+
+impl DiffProvider for Git {
+ fn get_diff_base(&self, file: &Path) -> Option<Vec<u8>> {
+ debug_assert!(!file.exists() || file.is_file());
+ debug_assert!(file.is_absolute());
+
+ // TODO cache repository lookup
+ let repo = Git::open_repo(file.parent()?, None)?.to_thread_local();
+ let head = repo.head_commit().ok()?;
+ let file_oid = find_file_in_commit(&repo, &head, file)?;
+
+ let file_object = repo.find_object(file_oid).ok()?;
+ Some(file_object.detach().data)
+ }
+}
+
+/// Finds the object that contains the contents of a file at a specific commit.
+fn find_file_in_commit(repo: &Repository, commit: &Commit, file: &Path) -> Option<ObjectId> {
+ let repo_dir = repo.work_dir()?;
+ let rel_path = file.strip_prefix(repo_dir).ok()?;
+ let tree = commit.tree().ok()?;
+ let tree_entry = tree.lookup_entry_by_path(rel_path).ok()??;
+ match tree_entry.mode() {
+ // not a file, everything is new, do not show diff
+ EntryMode::Tree | EntryMode::Commit | EntryMode::Link => None,
+ // found a file
+ EntryMode::Blob | EntryMode::BlobExecutable => Some(tree_entry.object_id()),
+ }
+}
diff --git a/helix-vcs/src/git/test.rs b/helix-vcs/src/git/test.rs
new file mode 100644
index 00000000..d6e9af08
--- /dev/null
+++ b/helix-vcs/src/git/test.rs
@@ -0,0 +1,121 @@
+use std::{fs::File, io::Write, path::Path, process::Command};
+
+use tempfile::TempDir;
+
+use crate::{DiffProvider, Git};
+
+fn exec_git_cmd(args: &str, git_dir: &Path) {
+ let res = Command::new("git")
+ .arg("-C")
+ .arg(git_dir) // execute the git command in this directory
+ .args(args.split_whitespace())
+ .env_remove("GIT_DIR")
+ .env_remove("GIT_ASKPASS")
+ .env_remove("SSH_ASKPASS")
+ .env("GIT_TERMINAL_PROMPT", "false")
+ .env("GIT_AUTHOR_DATE", "2000-01-01 00:00:00 +0000")
+ .env("GIT_AUTHOR_EMAIL", "author@example.com")
+ .env("GIT_AUTHOR_NAME", "author")
+ .env("GIT_COMMITTER_DATE", "2000-01-02 00:00:00 +0000")
+ .env("GIT_COMMITTER_EMAIL", "committer@example.com")
+ .env("GIT_COMMITTER_NAME", "committer")
+ .env("GIT_CONFIG_COUNT", "2")
+ .env("GIT_CONFIG_KEY_0", "commit.gpgsign")
+ .env("GIT_CONFIG_VALUE_0", "false")
+ .env("GIT_CONFIG_KEY_1", "init.defaultBranch")
+ .env("GIT_CONFIG_VALUE_1", "main")
+ .output()
+ .unwrap_or_else(|_| panic!("`git {args}` failed"));
+ if !res.status.success() {
+ println!("{}", String::from_utf8_lossy(&res.stdout));
+ eprintln!("{}", String::from_utf8_lossy(&res.stderr));
+ panic!("`git {args}` failed (see output above)")
+ }
+}
+
+fn create_commit(repo: &Path, add_modified: bool) {
+ if add_modified {
+ exec_git_cmd("add -A", repo);
+ }
+ exec_git_cmd("commit -m message", repo);
+}
+
+fn empty_git_repo() -> TempDir {
+ let tmp = tempfile::tempdir().expect("create temp dir for git testing");
+ exec_git_cmd("init", tmp.path());
+ exec_git_cmd("config user.email test@helix.org", tmp.path());
+ exec_git_cmd("config user.name helix-test", tmp.path());
+ tmp
+}
+
+#[test]
+fn missing_file() {
+ let temp_git = empty_git_repo();
+ let file = temp_git.path().join("file.txt");
+ File::create(&file).unwrap().write_all(b"foo").unwrap();
+
+ assert_eq!(Git.get_diff_base(&file), None);
+}
+
+#[test]
+fn unmodified_file() {
+ let temp_git = empty_git_repo();
+ let file = temp_git.path().join("file.txt");
+ let contents = b"foo".as_slice();
+ File::create(&file).unwrap().write_all(contents).unwrap();
+ create_commit(temp_git.path(), true);
+ assert_eq!(Git.get_diff_base(&file), Some(Vec::from(contents)));
+}
+
+#[test]
+fn modified_file() {
+ let temp_git = empty_git_repo();
+ let file = temp_git.path().join("file.txt");
+ let contents = b"foo".as_slice();
+ File::create(&file).unwrap().write_all(contents).unwrap();
+ create_commit(temp_git.path(), true);
+ File::create(&file).unwrap().write_all(b"bar").unwrap();
+
+ assert_eq!(Git.get_diff_base(&file), Some(Vec::from(contents)));
+}
+
+/// Test that `get_file_head` does not return content for a directory.
+/// This is important to correctly cover cases where a directory is removed and replaced by a file.
+/// If the contents of the directory object were returned a diff between a path and the directory children would be produced.
+#[test]
+fn directory() {
+ let temp_git = empty_git_repo();
+ let dir = temp_git.path().join("file.txt");
+ std::fs::create_dir(&dir).expect("");
+ let file = dir.join("file.txt");
+ let contents = b"foo".as_slice();
+ File::create(&file).unwrap().write_all(contents).unwrap();
+
+ create_commit(temp_git.path(), true);
+
+ std::fs::remove_dir_all(&dir).unwrap();
+ File::create(&dir).unwrap().write_all(b"bar").unwrap();
+ assert_eq!(Git.get_diff_base(&dir), None);
+}
+
+/// Test that `get_file_head` does not return content for a symlink.
+/// This is important to correctly cover cases where a symlink is removed and replaced by a file.
+/// If the contents of the symlink object were returned a diff between a path and the actual file would be produced (bad ui).
+#[cfg(any(unix, windows))]
+#[test]
+fn symlink() {
+ #[cfg(unix)]
+ use std::os::unix::fs::symlink;
+ #[cfg(not(unix))]
+ use std::os::windows::fs::symlink_file as symlink;
+ let temp_git = empty_git_repo();
+ let file = temp_git.path().join("file.txt");
+ let contents = b"foo".as_slice();
+ File::create(&file).unwrap().write_all(contents).unwrap();
+ let file_link = temp_git.path().join("file_link.txt");
+ symlink("file.txt", &file_link).unwrap();
+
+ create_commit(temp_git.path(), true);
+ assert_eq!(Git.get_diff_base(&file_link), None);
+ assert_eq!(Git.get_diff_base(&file), Some(Vec::from(contents)));
+}
diff --git a/helix-vcs/src/lib.rs b/helix-vcs/src/lib.rs
new file mode 100644
index 00000000..97320d32
--- /dev/null
+++ b/helix-vcs/src/lib.rs
@@ -0,0 +1,51 @@
+use std::path::Path;
+
+#[cfg(feature = "git")]
+pub use git::Git;
+#[cfg(not(feature = "git"))]
+pub use Dummy as Git;
+
+#[cfg(feature = "git")]
+mod git;
+
+mod diff;
+
+pub use diff::{DiffHandle, Hunk};
+
+pub trait DiffProvider {
+ /// Returns the data that a diff should be computed against
+ /// if this provider is used.
+ /// The data is returned as raw byte without any decoding or encoding performed
+ /// to ensure all file encodings are handled correctly.
+ fn get_diff_base(&self, file: &Path) -> Option<Vec<u8>>;
+}
+
+#[doc(hidden)]
+pub struct Dummy;
+impl DiffProvider for Dummy {
+ fn get_diff_base(&self, _file: &Path) -> Option<Vec<u8>> {
+ None
+ }
+}
+
+pub struct DiffProviderRegistry {
+ providers: Vec<Box<dyn DiffProvider>>,
+}
+
+impl DiffProviderRegistry {
+ pub fn get_diff_base(&self, file: &Path) -> Option<Vec<u8>> {
+ self.providers
+ .iter()
+ .find_map(|provider| provider.get_diff_base(file))
+ }
+}
+
+impl Default for DiffProviderRegistry {
+ fn default() -> Self {
+ // currently only git is supported
+ // TODO make this configurable when more providers are added
+ let git: Box<dyn DiffProvider> = Box::new(Git);
+ let providers = vec![git];
+ DiffProviderRegistry { providers }
+ }
+}