Skip to content

Commit 66c3d03

Browse files
committed
feat: diff between worktree and index
1 parent deabba6 commit 66c3d03

File tree

5 files changed

+376
-0
lines changed

5 files changed

+376
-0
lines changed

gix-worktree/src/diff.rs

Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
use std::io::{self, ErrorKind};
2+
use std::path::{Path, PathBuf};
3+
use std::time::{Duration, SystemTimeError};
4+
5+
use bstr::BString;
6+
use gix_features::hash;
7+
use gix_hash::ObjectId;
8+
use gix_index as index;
9+
use gix_object::encode::loose_header;
10+
use gix_path as path;
11+
12+
use crate::fs;
13+
use crate::read::{self, read_blob_to_buf_with_meta};
14+
15+
/// How the mode of an index entry has changed
16+
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
17+
pub enum ModeChange {
18+
/// Shown as `typechange` in git status
19+
/// For example if a normal file was replaced with a symlink.
20+
/// Note: Except for submodules only files/symlinks are present in the
21+
/// the index so anything turning into a directory is counted as a removal
22+
TypeChange,
23+
/// The executable bit of a file changed
24+
ExecutableChange,
25+
}
26+
27+
/// How a worktree file changed compared to an index entry
28+
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
29+
pub struct FileModification {
30+
/// How the mode has changed
31+
pub mode_change: Option<ModeChange>,
32+
/// mtime/ctime changed. If this is false then we can assume
33+
/// that the file is uncahged (with the exception of racy timestamps).
34+
/// If this is true however the file might still be unchaged. We need
35+
/// to read the file from disk and compare it to the object in
36+
/// index.
37+
pub stat_changed: bool,
38+
/// The data of this entry has changed. This can be quickly
39+
/// determined if the size of the stat data is mismatched.
40+
/// Otherwise a data change must be detected by reading the file
41+
/// from disk and comparing it to the file stored in the index
42+
/// (only needs to be done if `self.stat_changed` is true)
43+
pub data_changed: bool,
44+
}
45+
46+
impl FileModification {
47+
/// Computes the status of an entry by comparing its stat to `symlink_metadata()`
48+
pub fn from_stat(
49+
entry: &index::Entry,
50+
fs_stat: &std::fs::Metadata,
51+
capabilites: &fs::Capabilities,
52+
) -> Result<FileModification, SystemTimeError> {
53+
#[cfg(unix)]
54+
use std::os::unix::fs::MetadataExt;
55+
56+
let mode_change = match entry.mode {
57+
index::entry::Mode::FILE if !fs_stat.is_file() => Some(ModeChange::TypeChange),
58+
#[cfg(unix)]
59+
index::entry::Mode::FILE if capabilites.executable_bit && fs_stat.mode() & 0o111 != 0 => {
60+
Some(ModeChange::ExecutableChange)
61+
}
62+
#[cfg(unix)]
63+
index::entry::Mode::FILE_EXECUTABLE if capabilites.executable_bit && fs_stat.mode() & 0o111 == 0 => {
64+
Some(ModeChange::ExecutableChange)
65+
}
66+
index::entry::Mode::SYMLINK if capabilites.symlink && !fs_stat.is_symlink() => Some(ModeChange::TypeChange),
67+
index::entry::Mode::SYMLINK if !capabilites.symlink && !fs_stat.is_file() => Some(ModeChange::TypeChange),
68+
index::entry::Mode::COMMIT if !fs_stat.is_dir() => Some(ModeChange::TypeChange),
69+
_ => None, // TODO: log/errror invalid file type
70+
};
71+
72+
let data_changed = entry.stat.size as u64 != fs_stat.len();
73+
74+
let ctime = fs_stat
75+
.created()
76+
.map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?;
77+
let mtime = fs_stat
78+
.modified()
79+
.map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?;
80+
81+
let stat = &entry.stat;
82+
let stat_changed = stat.mtime.secs
83+
!= mtime
84+
.as_secs()
85+
.try_into()
86+
.expect("by 2038 we found a solution for this")
87+
|| stat.mtime.nsecs != mtime.subsec_nanos()
88+
|| stat.ctime.secs
89+
!= ctime
90+
.as_secs()
91+
.try_into()
92+
.expect("by 2038 we found a solution for this")
93+
|| stat.ctime.nsecs != ctime.subsec_nanos();
94+
95+
Ok(Self {
96+
mode_change,
97+
stat_changed,
98+
data_changed,
99+
})
100+
}
101+
102+
/// Marks this entries stats as changed if there is a potential fs race condition
103+
pub fn detect_racy_stat(&mut self, index: &index::State, index_entry: &index::Entry) {
104+
self.stat_changed = self.stat_changed || index_entry.stat.mtime >= index.timestamp()
105+
}
106+
107+
/// returns true if this entry has any changes
108+
/// usually `detect_racy_stat` should be called first to avoid race condition
109+
pub fn changed(&self) -> bool {
110+
self.mode_change.is_some() || self.stat_changed || self.data_changed
111+
}
112+
113+
/// Reads the worktree file from the disk and compares it to
114+
/// the index entries oid to check if the actual data of the file is changed
115+
/// and sets [`Entry::data_changed`] accordingly
116+
pub fn compare_data(
117+
&mut self,
118+
worktree_path: &Path,
119+
index_entry: &index::Entry,
120+
buf: &mut Vec<u8>,
121+
capabilities: &fs::Capabilities,
122+
) -> Result<(), read::Error> {
123+
if self.mode_change.is_some() || !self.stat_changed || self.data_changed {
124+
return Ok(());
125+
}
126+
let data = read_blob_to_buf_with_meta(
127+
worktree_path,
128+
index_entry.mode.contains(index::entry::Mode::SYMLINK),
129+
buf,
130+
capabilities,
131+
)?;
132+
let header = loose_header(gix_object::Kind::Blob, data.len());
133+
let hash_changed = match index_entry.id {
134+
ObjectId::Sha1(entry_hash) => {
135+
let mut file_hash = hash::Sha1::default();
136+
file_hash.update(&header);
137+
file_hash.update(&data);
138+
let file_hash = file_hash.digest();
139+
entry_hash != file_hash
140+
}
141+
};
142+
self.data_changed = hash_changed;
143+
Ok(())
144+
}
145+
}
146+
147+
#[allow(missing_docs)]
148+
#[derive(Debug, thiserror::Error)]
149+
pub enum Error {
150+
#[error("Could not convert path to UTF8 {path}")]
151+
IllformedUtf8 { path: BString },
152+
#[error("The clock was off when reading file related metadata after updating a file on disk")]
153+
Time(#[from] std::time::SystemTimeError),
154+
#[error("IO error while writing blob or reading file metadata or changing filetype")]
155+
Io(#[from] io::Error),
156+
}
157+
158+
#[derive(Clone, Debug)]
159+
/// A change between the index and the worktree computed by [`compate_to_index`]
160+
pub struct Change<'a> {
161+
/// The index entry that changed
162+
pub index_entry: &'a index::Entry,
163+
/// The on-disk worktree path corresponding to this entry
164+
pub worktree_path: PathBuf,
165+
/// How this index entry changed
166+
pub kind: ChangeKind,
167+
/// file metadata that can be reused (optimization)
168+
pub fstat: Option<std::fs::Metadata>,
169+
}
170+
171+
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
172+
///
173+
pub enum ChangeKind {
174+
/// An index entry has no corresponding file in the worktree
175+
Removed,
176+
/// Ar new files that has been marked with git add but has not yet been
177+
/// checked in yet. No diff is computed for these files because whatever is
178+
/// on disk at commit time will be used
179+
Added,
180+
/// Called for files that may have changed in some form as indicated by `change`.
181+
/// Note that this doesn't necessarily mean that the *content* of the file changed
182+
/// see [`FileStatus`] for details
183+
Modified {
184+
/// How the file was modified exactly
185+
modification: FileModification,
186+
/// Whether this (changed) file also has an unresolved merge conflict
187+
conflict: bool,
188+
},
189+
/// There are unresolved merge conflicts for this file
190+
/// but it has not changed on disk
191+
Conflict,
192+
}
193+
194+
/// Computes the changes between the index and the worktree
195+
pub fn compare_to_index<'a: 'b, 'b>(
196+
index: &'a index::State,
197+
// TODO: use worktree cache instead
198+
worktree: &'b Path,
199+
capabilities: &'b fs::Capabilities,
200+
) -> impl Iterator<Item = Result<Change<'a>, Error>> + 'b {
201+
// TODO: parallel with rayon
202+
index.entries().iter().filter_map(|index_entry| {
203+
let conflict = match index_entry.stage() {
204+
0 => false,
205+
1 => true,
206+
_ => return None,
207+
};
208+
let git_path = index_entry.path(index);
209+
if index_entry.flags.intersects(
210+
index::entry::Flags::UPTODATE
211+
| index::entry::Flags::SKIP_WORKTREE
212+
| index::entry::Flags::ASSUME_VALID
213+
| index::entry::Flags::FSMONITOR_VALID,
214+
) {
215+
return None;
216+
}
217+
218+
let path = if let Ok(path) = path::try_from_bstr(git_path) {
219+
path
220+
} else {
221+
return Some(Err(Error::IllformedUtf8 {
222+
path: git_path.to_owned(),
223+
}));
224+
};
225+
226+
let worktree_path = worktree.join(path);
227+
let metadata = match worktree_path.symlink_metadata() {
228+
// TODO: check if any parent directory is a symlink
229+
// we need to use fs::Cache for that
230+
Ok(metadata) if metadata.is_dir() => {
231+
// index entries are normally only for files/symlinks
232+
// if a file turned into a directory it was removed
233+
// the only exception here are submodules which are
234+
// part of the index despite being directories
235+
//
236+
// TODO: submodules:
237+
// if entry.mode.contains(Mode::COMMIT) &&
238+
// resolve_gitlink_ref(ce->name, "HEAD", &sub))
239+
return Some(Ok(Change {
240+
kind: ChangeKind::Removed,
241+
index_entry,
242+
worktree_path,
243+
fstat: Some(metadata),
244+
}));
245+
}
246+
Ok(metdata) => metdata,
247+
Err(err) if err.kind() == ErrorKind::NotFound => {
248+
return Some(Ok(Change {
249+
kind: ChangeKind::Removed,
250+
index_entry,
251+
worktree_path,
252+
fstat: None,
253+
}))
254+
}
255+
Err(err) => {
256+
// TODO: strict mode?
257+
return Some(Err(err.into()));
258+
}
259+
};
260+
if index_entry.flags.contains(index::entry::Flags::INTENT_TO_ADD) {
261+
return Some(Ok(Change {
262+
kind: ChangeKind::Added,
263+
index_entry,
264+
worktree_path,
265+
fstat: None,
266+
}));
267+
}
268+
let mut change = match FileModification::from_stat(index_entry, &metadata, capabilities) {
269+
Ok(change) => change,
270+
Err(err) => return Some(Err(err.into())),
271+
};
272+
change.detect_racy_stat(index, index_entry);
273+
274+
let kind = if change.changed() {
275+
ChangeKind::Modified {
276+
modification: change,
277+
conflict,
278+
}
279+
} else if conflict {
280+
ChangeKind::Conflict
281+
} else {
282+
return None;
283+
};
284+
285+
Some(Ok(Change {
286+
kind,
287+
index_entry,
288+
worktree_path,
289+
fstat: Some(metadata),
290+
}))
291+
})
292+
}

gix-worktree/src/lib.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,12 @@ pub mod fs;
1313
pub mod index;
1414

1515
pub(crate) mod os;
16+
17+
///
18+
pub mod diff;
19+
20+
///
21+
pub mod untracked;
22+
1623
///
1724
pub mod read;

gix-worktree/src/untracked.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
// TODO: untracked file detection, needs fs::Cache
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
use std::fs::{self};
2+
use std::path::Path;
3+
4+
use bstr::BString;
5+
use gix_worktree as worktree;
6+
use worktree::diff::{ChangeKind, FileModification};
7+
8+
fn compute_diff(name: &str, make_worktree_dirty: impl FnOnce(&Path)) -> Vec<(ChangeKind, BString)> {
9+
let work_tree =
10+
gix_testtools::scripted_fixture_writable(Path::new(name).with_extension("sh")).expect("script works");
11+
let git_dir = work_tree.path().join(".git");
12+
make_worktree_dirty(work_tree.path());
13+
let index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, Default::default()).unwrap();
14+
let capapilites = worktree::fs::Capabilities::probe(git_dir);
15+
let mut buf = Vec::with_capacity(8 * 1024);
16+
worktree::diff::compare_to_index(&index, work_tree.path(), &capapilites)
17+
.filter_map(|change| {
18+
let mut change = change.unwrap();
19+
if let ChangeKind::Modified {
20+
ref mut modification, ..
21+
} = &mut change.kind
22+
{
23+
modification
24+
.compare_data(&change.worktree_path, change.index_entry, &mut buf, &capapilites)
25+
.unwrap();
26+
if modification.mode_change.is_none() && !modification.data_changed {
27+
return None;
28+
}
29+
}
30+
Some((change.kind, change.index_entry.path(&index).to_owned()))
31+
})
32+
.collect()
33+
}
34+
35+
#[test]
36+
fn removed() {
37+
let diff = compute_diff("make_mixed_without_submodules", |path| {
38+
fs::remove_file(path.join("executable")).unwrap();
39+
fs::remove_file(path.join("dir/content")).unwrap();
40+
fs::remove_file(path.join("dir/sub-dir/symlink")).unwrap();
41+
});
42+
43+
assert_eq!(
44+
diff,
45+
vec![
46+
(ChangeKind::Removed, BString::new(b"dir/content".to_vec())),
47+
(ChangeKind::Removed, BString::new(b"dir/sub-dir/symlink".to_vec())),
48+
(ChangeKind::Removed, BString::new(b"executable".to_vec())),
49+
]
50+
)
51+
}
52+
53+
#[test]
54+
fn changed() {
55+
let diff = compute_diff("make_mixed_without_submodules", |path| {
56+
fs::write(path.join("dir/content"), "hello_world").unwrap();
57+
// write same content to this file to simulate a touch command
58+
fs::write(path.join("executable"), "content").unwrap();
59+
});
60+
61+
assert_eq!(
62+
diff,
63+
vec![(
64+
ChangeKind::Modified {
65+
modification: FileModification {
66+
mode_change: None,
67+
stat_changed: true,
68+
data_changed: true
69+
},
70+
conflict: false
71+
},
72+
BString::new(b"dir/content".to_vec())
73+
),]
74+
)
75+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
mod checkout;
2+
mod diff;

0 commit comments

Comments
 (0)