diff options
Diffstat (limited to 'helix-vcs/src')
-rw-r--r-- | helix-vcs/src/diff.rs | 198 | ||||
-rw-r--r-- | helix-vcs/src/diff/line_cache.rs | 130 | ||||
-rw-r--r-- | helix-vcs/src/diff/worker.rs | 207 | ||||
-rw-r--r-- | helix-vcs/src/diff/worker/test.rs | 149 | ||||
-rw-r--r-- | helix-vcs/src/git.rs | 80 | ||||
-rw-r--r-- | helix-vcs/src/git/test.rs | 121 | ||||
-rw-r--r-- | helix-vcs/src/lib.rs | 51 |
7 files changed, 936 insertions, 0 deletions
diff --git a/helix-vcs/src/diff.rs b/helix-vcs/src/diff.rs new file mode 100644 index 00000000..b1acd1f2 --- /dev/null +++ b/helix-vcs/src/diff.rs @@ -0,0 +1,198 @@ +use std::ops::Range; +use std::sync::Arc; + +use helix_core::Rope; +use imara_diff::Algorithm; +use parking_lot::{Mutex, MutexGuard}; +use tokio::sync::mpsc::{unbounded_channel, UnboundedSender}; +use tokio::sync::{Notify, OwnedRwLockReadGuard, RwLock}; +use tokio::task::JoinHandle; +use tokio::time::Instant; + +use crate::diff::worker::DiffWorker; + +mod line_cache; +mod worker; + +type RedrawHandle = (Arc<Notify>, Arc<RwLock<()>>); + +/// A rendering lock passed to the differ the prevents redraws from occurring +struct RenderLock { + pub lock: OwnedRwLockReadGuard<()>, + pub timeout: Option<Instant>, +} + +struct Event { + text: Rope, + is_base: bool, + render_lock: Option<RenderLock>, +} + +#[derive(Clone, Debug)] +pub struct DiffHandle { + channel: UnboundedSender<Event>, + render_lock: Arc<RwLock<()>>, + hunks: Arc<Mutex<Vec<Hunk>>>, + inverted: bool, +} + +impl DiffHandle { + pub fn new(diff_base: Rope, doc: Rope, redraw_handle: RedrawHandle) -> DiffHandle { + DiffHandle::new_with_handle(diff_base, doc, redraw_handle).0 + } + + fn new_with_handle( + diff_base: Rope, + doc: Rope, + redraw_handle: RedrawHandle, + ) -> (DiffHandle, JoinHandle<()>) { + let (sender, receiver) = unbounded_channel(); + let hunks: Arc<Mutex<Vec<Hunk>>> = Arc::default(); + let worker = DiffWorker { + channel: receiver, + hunks: hunks.clone(), + new_hunks: Vec::default(), + redraw_notify: redraw_handle.0, + diff_finished_notify: Arc::default(), + }; + let handle = tokio::spawn(worker.run(diff_base, doc)); + let differ = DiffHandle { + channel: sender, + hunks, + inverted: false, + render_lock: redraw_handle.1, + }; + (differ, handle) + } + + pub fn invert(&mut self) { + self.inverted = !self.inverted; + } + + pub fn hunks(&self) -> FileHunks { + FileHunks { + hunks: self.hunks.lock(), + inverted: self.inverted, + } + } + + /// Updates the document associated with this redraw handle + /// This function is only intended to be called from within the rendering loop + /// if called from elsewhere it may fail to acquire the render lock and panic + pub fn update_document(&self, doc: Rope, block: bool) -> bool { + // unwrap is ok here because the rendering lock is + // only exclusively locked during redraw. + // This function is only intended to be called + // from the core rendering loop where no redraw can happen in parallel + let lock = self.render_lock.clone().try_read_owned().unwrap(); + let timeout = if block { + None + } else { + Some(Instant::now() + tokio::time::Duration::from_millis(SYNC_DIFF_TIMEOUT)) + }; + self.update_document_impl(doc, self.inverted, Some(RenderLock { lock, timeout })) + } + + pub fn update_diff_base(&self, diff_base: Rope) -> bool { + self.update_document_impl(diff_base, !self.inverted, None) + } + + fn update_document_impl( + &self, + text: Rope, + is_base: bool, + render_lock: Option<RenderLock>, + ) -> bool { + let event = Event { + text, + is_base, + render_lock, + }; + self.channel.send(event).is_ok() + } +} + +/// synchronous debounce value should be low +/// so we can update synchronously most of the time +const DIFF_DEBOUNCE_TIME_SYNC: u64 = 1; +/// maximum time that rendering should be blocked until the diff finishes +const SYNC_DIFF_TIMEOUT: u64 = 12; +const DIFF_DEBOUNCE_TIME_ASYNC: u64 = 96; +const ALGORITHM: Algorithm = Algorithm::Histogram; +const MAX_DIFF_LINES: usize = 64 * u16::MAX as usize; +// cap average line length to 128 for files with MAX_DIFF_LINES +const MAX_DIFF_BYTES: usize = MAX_DIFF_LINES * 128; + +/// A single change in a file potentially spanning multiple lines +/// Hunks produced by the differs are always ordered by their position +/// in the file and non-overlapping. +/// Specifically for any two hunks `x` and `y` the following properties hold: +/// +/// ``` no_compile +/// assert!(x.before.end <= y.before.start); +/// assert!(x.after.end <= y.after.start); +/// ``` +#[derive(PartialEq, Eq, Clone, Debug)] +pub struct Hunk { + pub before: Range<u32>, + pub after: Range<u32>, +} + +impl Hunk { + /// Can be used instead of `Option::None` for better performance + /// because lines larger then `i32::MAX` are not supported by `imara-diff` anyways. + /// Has some nice properties where it usually is not necessary to check for `None` separately: + /// Empty ranges fail contains checks and also fails smaller then checks. + pub const NONE: Hunk = Hunk { + before: u32::MAX..u32::MAX, + after: u32::MAX..u32::MAX, + }; + + /// Inverts a change so that `before` + pub fn invert(&self) -> Hunk { + Hunk { + before: self.after.clone(), + after: self.before.clone(), + } + } + + pub fn is_pure_insertion(&self) -> bool { + self.before.is_empty() + } + + pub fn is_pure_removal(&self) -> bool { + self.after.is_empty() + } +} + +/// A list of changes in a file sorted in ascending +/// non-overlapping order +#[derive(Debug)] +pub struct FileHunks<'a> { + hunks: MutexGuard<'a, Vec<Hunk>>, + inverted: bool, +} + +impl FileHunks<'_> { + pub fn is_inverted(&self) -> bool { + self.inverted + } + + /// Returns the `Hunk` for the `n`th change in this file. + /// if there is no `n`th change `Hunk::NONE` is returned instead. + pub fn nth_hunk(&self, n: u32) -> Hunk { + match self.hunks.get(n as usize) { + Some(hunk) if self.inverted => hunk.invert(), + Some(hunk) => hunk.clone(), + None => Hunk::NONE, + } + } + + pub fn len(&self) -> u32 { + self.hunks.len() as u32 + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} diff --git a/helix-vcs/src/diff/line_cache.rs b/helix-vcs/src/diff/line_cache.rs new file mode 100644 index 00000000..c3ee5daa --- /dev/null +++ b/helix-vcs/src/diff/line_cache.rs @@ -0,0 +1,130 @@ +//! This modules encapsulates a tiny bit of unsafe code that +//! makes diffing significantly faster and more ergonomic to implement. +//! This code is necessary because diffing requires quick random +//! access to the lines of the text that is being diffed. +//! +//! Therefore it is best to collect the `Rope::lines` iterator into a vec +//! first because access to the vec is `O(1)` where `Rope::line` is `O(log N)`. +//! However this process can allocate a (potentially quite large) vector. +//! +//! To avoid reallocation for every diff, the vector is reused. +//! However the RopeSlice references the original rope and therefore forms a self-referential data structure. +//! A transmute is used to change the lifetime of the slice to static to circumvent that project. +use std::mem::transmute; + +use helix_core::{Rope, RopeSlice}; +use imara_diff::intern::{InternedInput, Interner}; + +use super::{MAX_DIFF_BYTES, MAX_DIFF_LINES}; + +/// A cache that stores the `lines` of a rope as a vector. +/// It allows safely reusing the allocation of the vec when updating the rope +pub(crate) struct InternedRopeLines { + diff_base: Rope, + doc: Rope, + num_tokens_diff_base: u32, + interned: InternedInput<RopeSlice<'static>>, +} + +impl InternedRopeLines { + pub fn new(diff_base: Rope, doc: Rope) -> InternedRopeLines { + let mut res = InternedRopeLines { + interned: InternedInput { + before: Vec::with_capacity(diff_base.len_lines()), + after: Vec::with_capacity(doc.len_lines()), + interner: Interner::new(diff_base.len_lines() + doc.len_lines()), + }, + diff_base, + doc, + // will be populated by update_diff_base_impl + num_tokens_diff_base: 0, + }; + res.update_diff_base_impl(); + res + } + + /// Updates the `diff_base` and optionally the document if `doc` is not None + pub fn update_diff_base(&mut self, diff_base: Rope, doc: Option<Rope>) { + self.interned.clear(); + self.diff_base = diff_base; + if let Some(doc) = doc { + self.doc = doc + } + if !self.is_too_large() { + self.update_diff_base_impl(); + } + } + + /// Updates the `doc` without reinterning the `diff_base`, this function + /// is therefore significantly faster than `update_diff_base` when only the document changes. + pub fn update_doc(&mut self, doc: Rope) { + // Safety: we clear any tokens that were added after + // the interning of `self.diff_base` finished so + // all lines that refer to `self.doc` have been purged. + + self.interned + .interner + .erase_tokens_after(self.num_tokens_diff_base.into()); + + self.doc = doc; + if self.is_too_large() { + self.interned.after.clear(); + } else { + self.update_doc_impl(); + } + } + + fn update_diff_base_impl(&mut self) { + // Safety: This transmute is safe because it only transmutes a lifetime, which has no effect. + // The backing storage for the RopeSlices referred to by the lifetime is stored in `self.diff_base`. + // Therefore as long as `self.diff_base` is not dropped/replaced this memory remains valid. + // `self.diff_base` is only changed in `self.update_diff_base`, which clears the interner. + // When the interned lines are exposed to consumer in `self.diff_input`, the lifetime is bounded to a reference to self. + // That means that on calls to update there exist no references to `self.interned`. + let before = self + .diff_base + .lines() + .map(|line: RopeSlice| -> RopeSlice<'static> { unsafe { transmute(line) } }); + self.interned.update_before(before); + self.num_tokens_diff_base = self.interned.interner.num_tokens(); + // the has to be interned again because the interner was fully cleared + self.update_doc_impl() + } + + fn update_doc_impl(&mut self) { + // Safety: This transmute is save because it only transmutes a lifetime, which has no effect. + // The backing storage for the RopeSlices referred to by the lifetime is stored in `self.doc`. + // Therefore as long as `self.doc` is not dropped/replaced this memory remains valid. + // `self.doc` is only changed in `self.update_doc`, which clears the interner. + // When the interned lines are exposed to consumer in `self.diff_input`, the lifetime is bounded to a reference to self. + // That means that on calls to update there exist no references to `self.interned`. + let after = self + .doc + .lines() + .map(|line: RopeSlice| -> RopeSlice<'static> { unsafe { transmute(line) } }); + self.interned.update_after(after); + } + + fn is_too_large(&self) -> bool { + // bound both lines and bytes to avoid huge files with few (but huge) lines + // or huge file with tiny lines. While this makes no difference to + // diff itself (the diff performance only depends on the number of tokens) + // the interning runtime depends mostly on filesize and is actually dominant + // for large files + self.doc.len_lines() > MAX_DIFF_LINES + || self.diff_base.len_lines() > MAX_DIFF_LINES + || self.doc.len_bytes() > MAX_DIFF_BYTES + || self.diff_base.len_bytes() > MAX_DIFF_BYTES + } + + /// Returns the `InternedInput` for performing the diff. + /// If `diff_base` or `doc` is so large that performing a diff could slow the editor + /// this function returns `None`. + pub fn interned_lines(&self) -> Option<&InternedInput<RopeSlice>> { + if self.is_too_large() { + None + } else { + Some(&self.interned) + } + } +} diff --git a/helix-vcs/src/diff/worker.rs b/helix-vcs/src/diff/worker.rs new file mode 100644 index 00000000..b8659c9b --- /dev/null +++ b/helix-vcs/src/diff/worker.rs @@ -0,0 +1,207 @@ +use std::mem::swap; +use std::ops::Range; +use std::sync::Arc; + +use helix_core::{Rope, RopeSlice}; +use imara_diff::intern::InternedInput; +use parking_lot::Mutex; +use tokio::sync::mpsc::UnboundedReceiver; +use tokio::sync::Notify; +use tokio::time::{timeout, timeout_at, Duration}; + +use crate::diff::{ + Event, RenderLock, ALGORITHM, DIFF_DEBOUNCE_TIME_ASYNC, DIFF_DEBOUNCE_TIME_SYNC, +}; + +use super::line_cache::InternedRopeLines; +use super::Hunk; + +#[cfg(test)] +mod test; + +pub(super) struct DiffWorker { + pub channel: UnboundedReceiver<Event>, + pub hunks: Arc<Mutex<Vec<Hunk>>>, + pub new_hunks: Vec<Hunk>, + pub redraw_notify: Arc<Notify>, + pub diff_finished_notify: Arc<Notify>, +} + +impl DiffWorker { + async fn accumulate_events(&mut self, event: Event) -> (Option<Rope>, Option<Rope>) { + let mut accumulator = EventAccumulator::new(); + accumulator.handle_event(event).await; + accumulator + .accumulate_debounced_events( + &mut self.channel, + self.redraw_notify.clone(), + self.diff_finished_notify.clone(), + ) + .await; + (accumulator.doc, accumulator.diff_base) + } + + pub async fn run(mut self, diff_base: Rope, doc: Rope) { + let mut interner = InternedRopeLines::new(diff_base, doc); + if let Some(lines) = interner.interned_lines() { + self.perform_diff(lines); + } + self.apply_hunks(); + while let Some(event) = self.channel.recv().await { + let (doc, diff_base) = self.accumulate_events(event).await; + + let process_accumulated_events = || { + if let Some(new_base) = diff_base { + interner.update_diff_base(new_base, doc) + } else { + interner.update_doc(doc.unwrap()) + } + + if let Some(lines) = interner.interned_lines() { + self.perform_diff(lines) + } + }; + + // Calculating diffs is computationally expensive and should + // not run inside an async function to avoid blocking other futures. + // Note: tokio::task::block_in_place does not work during tests + #[cfg(test)] + process_accumulated_events(); + #[cfg(not(test))] + tokio::task::block_in_place(process_accumulated_events); + + self.apply_hunks(); + } + } + + /// update the hunks (used by the gutter) by replacing it with `self.new_hunks`. + /// `self.new_hunks` is always empty after this function runs. + /// To improve performance this function tries to reuse the allocation of the old diff previously stored in `self.line_diffs` + fn apply_hunks(&mut self) { + swap(&mut *self.hunks.lock(), &mut self.new_hunks); + self.diff_finished_notify.notify_waiters(); + self.new_hunks.clear(); + } + + fn perform_diff(&mut self, input: &InternedInput<RopeSlice>) { + imara_diff::diff(ALGORITHM, input, |before: Range<u32>, after: Range<u32>| { + self.new_hunks.push(Hunk { before, after }) + }) + } +} + +struct EventAccumulator { + diff_base: Option<Rope>, + doc: Option<Rope>, + render_lock: Option<RenderLock>, +} + +impl<'a> EventAccumulator { + fn new() -> EventAccumulator { + EventAccumulator { + diff_base: None, + doc: None, + render_lock: None, + } + } + + async fn handle_event(&mut self, event: Event) { + let dst = if event.is_base { + &mut self.diff_base + } else { + &mut self.doc + }; + + *dst = Some(event.text); + + // always prefer the most synchronous requested render mode + if let Some(render_lock) = event.render_lock { + match &mut self.render_lock { + Some(RenderLock { timeout, .. }) => { + // A timeout of `None` means that the render should + // always wait for the diff to complete (so no timeout) + // remove the existing timeout, otherwise keep the previous timeout + // because it will be shorter then the current timeout + if render_lock.timeout.is_none() { + timeout.take(); + } + } + None => self.render_lock = Some(render_lock), + } + } + } + + async fn accumulate_debounced_events( + &mut self, + channel: &mut UnboundedReceiver<Event>, + redraw_notify: Arc<Notify>, + diff_finished_notify: Arc<Notify>, + ) { + let async_debounce = Duration::from_millis(DIFF_DEBOUNCE_TIME_ASYNC); + let sync_debounce = Duration::from_millis(DIFF_DEBOUNCE_TIME_SYNC); + loop { + // if we are not blocking rendering use a much longer timeout + let debounce = if self.render_lock.is_none() { + async_debounce + } else { + sync_debounce + }; + + if let Ok(Some(event)) = timeout(debounce, channel.recv()).await { + self.handle_event(event).await; + } else { + break; + } + } + + // setup task to trigger the rendering + match self.render_lock.take() { + // diff is performed outside of the rendering loop + // request a redraw after the diff is done + None => { + tokio::spawn(async move { + diff_finished_notify.notified().await; + redraw_notify.notify_one(); + }); + } + // diff is performed inside the rendering loop + // block redraw until the diff is done or the timeout is expired + Some(RenderLock { + lock, + timeout: Some(timeout), + }) => { + tokio::spawn(async move { + let res = { + // Acquire a lock on the redraw handle. + // The lock will block the rendering from occurring while held. + // The rendering waits for the diff if it doesn't time out + timeout_at(timeout, diff_finished_notify.notified()).await + }; + // we either reached the timeout or the diff is finished, release the render lock + drop(lock); + if res.is_ok() { + // Diff finished in time we are done. + return; + } + // Diff failed to complete in time log the event + // and wait until the diff occurs to trigger an async redraw + log::warn!("Diff computation timed out, update of diffs might appear delayed"); + diff_finished_notify.notified().await; + redraw_notify.notify_one(); + }); + } + // a blocking diff is performed inside the rendering loop + // block redraw until the diff is done + Some(RenderLock { + lock, + timeout: None, + }) => { + tokio::spawn(async move { + diff_finished_notify.notified().await; + // diff is done release the lock + drop(lock) + }); + } + }; + } +} diff --git a/helix-vcs/src/diff/worker/test.rs b/helix-vcs/src/diff/worker/test.rs new file mode 100644 index 00000000..14442426 --- /dev/null +++ b/helix-vcs/src/diff/worker/test.rs @@ -0,0 +1,149 @@ +use helix_core::Rope; +use tokio::task::JoinHandle; + +use crate::diff::{DiffHandle, Hunk}; + +impl DiffHandle { + fn new_test(diff_base: &str, doc: &str) -> (DiffHandle, JoinHandle<()>) { + DiffHandle::new_with_handle( + Rope::from_str(diff_base), + Rope::from_str(doc), + Default::default(), + ) + } + async fn into_diff(self, handle: JoinHandle<()>) -> Vec<Hunk> { + let hunks = self.hunks; + // dropping the channel terminates the task + drop(self.channel); + handle.await.unwrap(); + let hunks = hunks.lock(); + Vec::clone(&*hunks) + } +} + +#[tokio::test] +async fn append_line() { + let (differ, handle) = DiffHandle::new_test("foo\n", "foo\nbar\n"); + let line_diffs = differ.into_diff(handle).await; + assert_eq!( + &line_diffs, + &[Hunk { + before: 1..1, + after: 1..2 + }] + ) +} + +#[tokio::test] +async fn prepend_line() { + let (differ, handle) = DiffHandle::new_test("foo\n", "bar\nfoo\n"); + let line_diffs = differ.into_diff(handle).await; + assert_eq!( + &line_diffs, + &[Hunk { + before: 0..0, + after: 0..1 + }] + ) +} + +#[tokio::test] +async fn modify() { + let (differ, handle) = DiffHandle::new_test("foo\nbar\n", "foo bar\nbar\n"); + let line_diffs = differ.into_diff(handle).await; + assert_eq!( + &line_diffs, + &[Hunk { + before: 0..1, + after: 0..1 + }] + ) +} + +#[tokio::test] +async fn delete_line() { + let (differ, handle) = DiffHandle::new_test("foo\nfoo bar\nbar\n", "foo\nbar\n"); + let line_diffs = differ.into_diff(handle).await; + assert_eq!( + &line_diffs, + &[Hunk { + before: 1..2, + after: 1..1 + }] + ) +} + +#[tokio::test] +async fn delete_line_and_modify() { + let (differ, handle) = DiffHandle::new_test("foo\nbar\ntest\nfoo", "foo\ntest\nfoo bar"); + let line_diffs = differ.into_diff(handle).await; + assert_eq!( + &line_diffs, + &[ + Hunk { + before: 1..2, + after: 1..1 + }, + Hunk { + before: 3..4, + after: 2..3 + }, + ] + ) +} + +#[tokio::test] +async fn add_use() { + let (differ, handle) = DiffHandle::new_test( + "use ropey::Rope;\nuse tokio::task::JoinHandle;\n", + "use ropey::Rope;\nuse ropey::RopeSlice;\nuse tokio::task::JoinHandle;\n", + ); + let line_diffs = differ.into_diff(handle).await; + assert_eq!( + &line_diffs, + &[Hunk { + before: 1..1, + after: 1..2 + },] + ) +} + +#[tokio::test] +async fn update_document() { + let (differ, handle) = DiffHandle::new_test("foo\nbar\ntest\nfoo", "foo\nbar\ntest\nfoo"); + differ.update_document(Rope::from_str("foo\ntest\nfoo bar"), false); + let line_diffs = differ.into_diff(handle).await; + assert_eq!( + &line_diffs, + &[ + Hunk { + before: 1..2, + after: 1..1 + }, + Hunk { + before: 3..4, + after: 2..3 + }, + ] + ) +} + +#[tokio::test] +async fn update_base() { + let (differ, handle) = DiffHandle::new_test("foo\ntest\nfoo bar", "foo\ntest\nfoo bar"); + differ.update_diff_base(Rope::from_str("foo\nbar\ntest\nfoo")); + let line_diffs = differ.into_diff(handle).await; + assert_eq!( + &line_diffs, + &[ + Hunk { + before: 1..2, + after: 1..1 + }, + Hunk { + before: 3..4, + after: 2..3 + }, + ] + ) +} diff --git a/helix-vcs/src/git.rs b/helix-vcs/src/git.rs new file mode 100644 index 00000000..82b2b558 --- /dev/null +++ b/helix-vcs/src/git.rs @@ -0,0 +1,80 @@ +use std::path::Path; + +use git::objs::tree::EntryMode; +use git::sec::trust::DefaultForLevel; +use git::{Commit, ObjectId, Repository, ThreadSafeRepository}; +use git_repository as git; + +use crate::DiffProvider; + +#[cfg(test)] +mod test; + +pub struct Git; + +impl Git { + fn open_repo(path: &Path, ceiling_dir: Option<&Path>) -> Option<ThreadSafeRepository> { + // custom open options + let mut git_open_opts_map = git::sec::trust::Mapping::<git::open::Options>::default(); + + // don't use the global git configs (not needed) + let config = git::permissions::Config { + system: false, + git: false, + user: false, + env: true, + includes: true, + git_binary: false, + }; + // change options for config permissions without touching anything else + git_open_opts_map.reduced = git_open_opts_map.reduced.permissions(git::Permissions { + config, + ..git::Permissions::default_for_level(git::sec::Trust::Reduced) + }); + git_open_opts_map.full = git_open_opts_map.full.permissions(git::Permissions { + config, + ..git::Permissions::default_for_level(git::sec::Trust::Full) + }); + + let mut open_options = git::discover::upwards::Options::default(); + if let Some(ceiling_dir) = ceiling_dir { + open_options.ceiling_dirs = vec![ceiling_dir.to_owned()]; + } + + ThreadSafeRepository::discover_with_environment_overrides_opts( + path, + open_options, + git_open_opts_map, + ) + .ok() + } +} + +impl DiffProvider for Git { + fn get_diff_base(&self, file: &Path) -> Option<Vec<u8>> { + debug_assert!(!file.exists() || file.is_file()); + debug_assert!(file.is_absolute()); + + // TODO cache repository lookup + let repo = Git::open_repo(file.parent()?, None)?.to_thread_local(); + let head = repo.head_commit().ok()?; + let file_oid = find_file_in_commit(&repo, &head, file)?; + + let file_object = repo.find_object(file_oid).ok()?; + Some(file_object.detach().data) + } +} + +/// Finds the object that contains the contents of a file at a specific commit. +fn find_file_in_commit(repo: &Repository, commit: &Commit, file: &Path) -> Option<ObjectId> { + let repo_dir = repo.work_dir()?; + let rel_path = file.strip_prefix(repo_dir).ok()?; + let tree = commit.tree().ok()?; + let tree_entry = tree.lookup_entry_by_path(rel_path).ok()??; + match tree_entry.mode() { + // not a file, everything is new, do not show diff + EntryMode::Tree | EntryMode::Commit | EntryMode::Link => None, + // found a file + EntryMode::Blob | EntryMode::BlobExecutable => Some(tree_entry.object_id()), + } +} diff --git a/helix-vcs/src/git/test.rs b/helix-vcs/src/git/test.rs new file mode 100644 index 00000000..d6e9af08 --- /dev/null +++ b/helix-vcs/src/git/test.rs @@ -0,0 +1,121 @@ +use std::{fs::File, io::Write, path::Path, process::Command}; + +use tempfile::TempDir; + +use crate::{DiffProvider, Git}; + +fn exec_git_cmd(args: &str, git_dir: &Path) { + let res = Command::new("git") + .arg("-C") + .arg(git_dir) // execute the git command in this directory + .args(args.split_whitespace()) + .env_remove("GIT_DIR") + .env_remove("GIT_ASKPASS") + .env_remove("SSH_ASKPASS") + .env("GIT_TERMINAL_PROMPT", "false") + .env("GIT_AUTHOR_DATE", "2000-01-01 00:00:00 +0000") + .env("GIT_AUTHOR_EMAIL", "author@example.com") + .env("GIT_AUTHOR_NAME", "author") + .env("GIT_COMMITTER_DATE", "2000-01-02 00:00:00 +0000") + .env("GIT_COMMITTER_EMAIL", "committer@example.com") + .env("GIT_COMMITTER_NAME", "committer") + .env("GIT_CONFIG_COUNT", "2") + .env("GIT_CONFIG_KEY_0", "commit.gpgsign") + .env("GIT_CONFIG_VALUE_0", "false") + .env("GIT_CONFIG_KEY_1", "init.defaultBranch") + .env("GIT_CONFIG_VALUE_1", "main") + .output() + .unwrap_or_else(|_| panic!("`git {args}` failed")); + if !res.status.success() { + println!("{}", String::from_utf8_lossy(&res.stdout)); + eprintln!("{}", String::from_utf8_lossy(&res.stderr)); + panic!("`git {args}` failed (see output above)") + } +} + +fn create_commit(repo: &Path, add_modified: bool) { + if add_modified { + exec_git_cmd("add -A", repo); + } + exec_git_cmd("commit -m message", repo); +} + +fn empty_git_repo() -> TempDir { + let tmp = tempfile::tempdir().expect("create temp dir for git testing"); + exec_git_cmd("init", tmp.path()); + exec_git_cmd("config user.email test@helix.org", tmp.path()); + exec_git_cmd("config user.name helix-test", tmp.path()); + tmp +} + +#[test] +fn missing_file() { + let temp_git = empty_git_repo(); + let file = temp_git.path().join("file.txt"); + File::create(&file).unwrap().write_all(b"foo").unwrap(); + + assert_eq!(Git.get_diff_base(&file), None); +} + +#[test] +fn unmodified_file() { + let temp_git = empty_git_repo(); + let file = temp_git.path().join("file.txt"); + let contents = b"foo".as_slice(); + File::create(&file).unwrap().write_all(contents).unwrap(); + create_commit(temp_git.path(), true); + assert_eq!(Git.get_diff_base(&file), Some(Vec::from(contents))); +} + +#[test] +fn modified_file() { + let temp_git = empty_git_repo(); + let file = temp_git.path().join("file.txt"); + let contents = b"foo".as_slice(); + File::create(&file).unwrap().write_all(contents).unwrap(); + create_commit(temp_git.path(), true); + File::create(&file).unwrap().write_all(b"bar").unwrap(); + + assert_eq!(Git.get_diff_base(&file), Some(Vec::from(contents))); +} + +/// Test that `get_file_head` does not return content for a directory. +/// This is important to correctly cover cases where a directory is removed and replaced by a file. +/// If the contents of the directory object were returned a diff between a path and the directory children would be produced. +#[test] +fn directory() { + let temp_git = empty_git_repo(); + let dir = temp_git.path().join("file.txt"); + std::fs::create_dir(&dir).expect(""); + let file = dir.join("file.txt"); + let contents = b"foo".as_slice(); + File::create(&file).unwrap().write_all(contents).unwrap(); + + create_commit(temp_git.path(), true); + + std::fs::remove_dir_all(&dir).unwrap(); + File::create(&dir).unwrap().write_all(b"bar").unwrap(); + assert_eq!(Git.get_diff_base(&dir), None); +} + +/// Test that `get_file_head` does not return content for a symlink. +/// This is important to correctly cover cases where a symlink is removed and replaced by a file. +/// If the contents of the symlink object were returned a diff between a path and the actual file would be produced (bad ui). +#[cfg(any(unix, windows))] +#[test] +fn symlink() { + #[cfg(unix)] + use std::os::unix::fs::symlink; + #[cfg(not(unix))] + use std::os::windows::fs::symlink_file as symlink; + let temp_git = empty_git_repo(); + let file = temp_git.path().join("file.txt"); + let contents = b"foo".as_slice(); + File::create(&file).unwrap().write_all(contents).unwrap(); + let file_link = temp_git.path().join("file_link.txt"); + symlink("file.txt", &file_link).unwrap(); + + create_commit(temp_git.path(), true); + assert_eq!(Git.get_diff_base(&file_link), None); + assert_eq!(Git.get_diff_base(&file), Some(Vec::from(contents))); +} diff --git a/helix-vcs/src/lib.rs b/helix-vcs/src/lib.rs new file mode 100644 index 00000000..97320d32 --- /dev/null +++ b/helix-vcs/src/lib.rs @@ -0,0 +1,51 @@ +use std::path::Path; + +#[cfg(feature = "git")] +pub use git::Git; +#[cfg(not(feature = "git"))] +pub use Dummy as Git; + +#[cfg(feature = "git")] +mod git; + +mod diff; + +pub use diff::{DiffHandle, Hunk}; + +pub trait DiffProvider { + /// Returns the data that a diff should be computed against + /// if this provider is used. + /// The data is returned as raw byte without any decoding or encoding performed + /// to ensure all file encodings are handled correctly. + fn get_diff_base(&self, file: &Path) -> Option<Vec<u8>>; +} + +#[doc(hidden)] +pub struct Dummy; +impl DiffProvider for Dummy { + fn get_diff_base(&self, _file: &Path) -> Option<Vec<u8>> { + None + } +} + +pub struct DiffProviderRegistry { + providers: Vec<Box<dyn DiffProvider>>, +} + +impl DiffProviderRegistry { + pub fn get_diff_base(&self, file: &Path) -> Option<Vec<u8>> { + self.providers + .iter() + .find_map(|provider| provider.get_diff_base(file)) + } +} + +impl Default for DiffProviderRegistry { + fn default() -> Self { + // currently only git is supported + // TODO make this configurable when more providers are added + let git: Box<dyn DiffProvider> = Box::new(Git); + let providers = vec![git]; + DiffProviderRegistry { providers } + } +} |