|
| 1 | +use std::io::{self, ErrorKind}; |
| 2 | +use std::path::{Path, PathBuf}; |
| 3 | +use std::time::{Duration, SystemTimeError}; |
| 4 | + |
| 5 | +use bstr::BString; |
| 6 | +use gix_features::hash; |
| 7 | +use gix_hash::ObjectId; |
| 8 | +use gix_index as index; |
| 9 | +use gix_object::encode::loose_header; |
| 10 | +use gix_path as path; |
| 11 | + |
| 12 | +use crate::fs; |
| 13 | +use crate::read::{self, read_blob_to_buf_with_meta}; |
| 14 | + |
| 15 | +/// How the mode of an index entry has changed |
| 16 | +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] |
| 17 | +pub enum ModeChange { |
| 18 | + /// Shown as `typechange` in git status |
| 19 | + /// For example if a normal file was replaced with a symlink. |
| 20 | + /// Note: Except for submodules only files/symlinks are present in the |
| 21 | + /// the index so anything turning into a directory is counted as a removal |
| 22 | + TypeChange, |
| 23 | + /// The executable bit of a file changed |
| 24 | + ExecutableChange, |
| 25 | +} |
| 26 | + |
| 27 | +/// How a worktree file changed compared to an index entry |
| 28 | +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] |
| 29 | +pub struct FileModification { |
| 30 | + /// How the mode has changed |
| 31 | + pub mode_change: Option<ModeChange>, |
| 32 | + /// mtime/ctime changed. If this is false then we can assume |
| 33 | + /// that the file is uncahged (with the exception of racy timestamps). |
| 34 | + /// If this is true however the file might still be unchaged. We need |
| 35 | + /// to read the file from disk and compare it to the object in |
| 36 | + /// index. |
| 37 | + pub stat_changed: bool, |
| 38 | + /// The data of this entry has changed. This can be quickly |
| 39 | + /// determined if the size of the stat data is mismatched. |
| 40 | + /// Otherwise a data change must be detected by reading the file |
| 41 | + /// from disk and comparing it to the file stored in the index |
| 42 | + /// (only needs to be done if `self.stat_changed` is true) |
| 43 | + pub data_changed: bool, |
| 44 | +} |
| 45 | + |
| 46 | +impl FileModification { |
| 47 | + /// Computes the status of an entry by comparing its stat to `symlink_metadata()` |
| 48 | + pub fn from_stat( |
| 49 | + entry: &index::Entry, |
| 50 | + fs_stat: &std::fs::Metadata, |
| 51 | + capabilites: &fs::Capabilities, |
| 52 | + ) -> Result<FileModification, SystemTimeError> { |
| 53 | + #[cfg(unix)] |
| 54 | + use std::os::unix::fs::MetadataExt; |
| 55 | + |
| 56 | + let mode_change = match entry.mode { |
| 57 | + index::entry::Mode::FILE if !fs_stat.is_file() => Some(ModeChange::TypeChange), |
| 58 | + #[cfg(unix)] |
| 59 | + index::entry::Mode::FILE if capabilites.executable_bit && fs_stat.mode() & 0o111 != 0 => { |
| 60 | + Some(ModeChange::ExecutableChange) |
| 61 | + } |
| 62 | + #[cfg(unix)] |
| 63 | + index::entry::Mode::FILE_EXECUTABLE if capabilites.executable_bit && fs_stat.mode() & 0o111 == 0 => { |
| 64 | + Some(ModeChange::ExecutableChange) |
| 65 | + } |
| 66 | + index::entry::Mode::SYMLINK if capabilites.symlink && !fs_stat.is_symlink() => Some(ModeChange::TypeChange), |
| 67 | + index::entry::Mode::SYMLINK if !capabilites.symlink && !fs_stat.is_file() => Some(ModeChange::TypeChange), |
| 68 | + index::entry::Mode::COMMIT if !fs_stat.is_dir() => Some(ModeChange::TypeChange), |
| 69 | + _ => None, // TODO: log/errror invalid file type |
| 70 | + }; |
| 71 | + |
| 72 | + let data_changed = entry.stat.size as u64 != fs_stat.len(); |
| 73 | + |
| 74 | + let ctime = fs_stat |
| 75 | + .created() |
| 76 | + .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; |
| 77 | + let mtime = fs_stat |
| 78 | + .modified() |
| 79 | + .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; |
| 80 | + |
| 81 | + let stat = &entry.stat; |
| 82 | + let stat_changed = stat.mtime.secs |
| 83 | + != mtime |
| 84 | + .as_secs() |
| 85 | + .try_into() |
| 86 | + .expect("by 2038 we found a solution for this") |
| 87 | + || stat.mtime.nsecs != mtime.subsec_nanos() |
| 88 | + || stat.ctime.secs |
| 89 | + != ctime |
| 90 | + .as_secs() |
| 91 | + .try_into() |
| 92 | + .expect("by 2038 we found a solution for this") |
| 93 | + || stat.ctime.nsecs != ctime.subsec_nanos(); |
| 94 | + |
| 95 | + Ok(Self { |
| 96 | + mode_change, |
| 97 | + stat_changed, |
| 98 | + data_changed, |
| 99 | + }) |
| 100 | + } |
| 101 | + |
| 102 | + /// Marks this entries stats as changed if there is a potential fs race condition |
| 103 | + pub fn detect_racy_stat(&mut self, index: &index::State, index_entry: &index::Entry) { |
| 104 | + self.stat_changed = self.stat_changed || index_entry.stat.mtime >= index.timestamp() |
| 105 | + } |
| 106 | + |
| 107 | + /// returns true if this entry has any changes |
| 108 | + /// usually `detect_racy_stat` should be called first to avoid race condition |
| 109 | + pub fn changed(&self) -> bool { |
| 110 | + self.mode_change.is_some() || self.stat_changed || self.data_changed |
| 111 | + } |
| 112 | + |
| 113 | + /// Reads the worktree file from the disk and compares it to |
| 114 | + /// the index entries oid to check if the actual data of the file is changed |
| 115 | + /// and sets [`Entry::data_changed`] accordingly |
| 116 | + pub fn compare_data( |
| 117 | + &mut self, |
| 118 | + worktree_path: &Path, |
| 119 | + index_entry: &index::Entry, |
| 120 | + buf: &mut Vec<u8>, |
| 121 | + capabilities: &fs::Capabilities, |
| 122 | + ) -> Result<(), read::Error> { |
| 123 | + if self.mode_change.is_some() || !self.stat_changed || self.data_changed { |
| 124 | + return Ok(()); |
| 125 | + } |
| 126 | + let data = read_blob_to_buf_with_meta( |
| 127 | + worktree_path, |
| 128 | + index_entry.mode.contains(index::entry::Mode::SYMLINK), |
| 129 | + buf, |
| 130 | + capabilities, |
| 131 | + )?; |
| 132 | + let header = loose_header(gix_object::Kind::Blob, data.len()); |
| 133 | + let hash_changed = match index_entry.id { |
| 134 | + ObjectId::Sha1(entry_hash) => { |
| 135 | + let mut file_hash = hash::Sha1::default(); |
| 136 | + file_hash.update(&header); |
| 137 | + file_hash.update(&data); |
| 138 | + let file_hash = file_hash.digest(); |
| 139 | + entry_hash != file_hash |
| 140 | + } |
| 141 | + }; |
| 142 | + self.data_changed = hash_changed; |
| 143 | + Ok(()) |
| 144 | + } |
| 145 | +} |
| 146 | + |
| 147 | +#[allow(missing_docs)] |
| 148 | +#[derive(Debug, thiserror::Error)] |
| 149 | +pub enum Error { |
| 150 | + #[error("Could not convert path to UTF8 {path}")] |
| 151 | + IllformedUtf8 { path: BString }, |
| 152 | + #[error("The clock was off when reading file related metadata after updating a file on disk")] |
| 153 | + Time(#[from] std::time::SystemTimeError), |
| 154 | + #[error("IO error while writing blob or reading file metadata or changing filetype")] |
| 155 | + Io(#[from] io::Error), |
| 156 | +} |
| 157 | + |
| 158 | +#[derive(Clone, Debug)] |
| 159 | +/// A change between the index and the worktree computed by [`compate_to_index`] |
| 160 | +pub struct Change<'a> { |
| 161 | + /// The index entry that changed |
| 162 | + pub index_entry: &'a index::Entry, |
| 163 | + /// The on-disk worktree path corresponding to this entry |
| 164 | + pub worktree_path: PathBuf, |
| 165 | + /// How this index entry changed |
| 166 | + pub kind: ChangeKind, |
| 167 | + /// file metadata that can be reused (optimization) |
| 168 | + pub fstat: Option<std::fs::Metadata>, |
| 169 | +} |
| 170 | + |
| 171 | +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] |
| 172 | +/// |
| 173 | +pub enum ChangeKind { |
| 174 | + /// An index entry has no corresponding file in the worktree |
| 175 | + Removed, |
| 176 | + /// Ar new files that has been marked with git add but has not yet been |
| 177 | + /// checked in yet. No diff is computed for these files because whatever is |
| 178 | + /// on disk at commit time will be used |
| 179 | + Added, |
| 180 | + /// Called for files that may have changed in some form as indicated by `change`. |
| 181 | + /// Note that this doesn't necessarily mean that the *content* of the file changed |
| 182 | + /// see [`FileStatus`] for details |
| 183 | + Modified { |
| 184 | + /// How the file was modified exactly |
| 185 | + modification: FileModification, |
| 186 | + /// Whether this (changed) file also has an unresolved merge conflict |
| 187 | + conflict: bool, |
| 188 | + }, |
| 189 | + /// There are unresolved merge conflicts for this file |
| 190 | + /// but it has not changed on disk |
| 191 | + Conflict, |
| 192 | +} |
| 193 | + |
| 194 | +/// Computes the changes between the index and the worktree |
| 195 | +pub fn compare_to_index<'a: 'b, 'b>( |
| 196 | + index: &'a index::State, |
| 197 | + // TODO: use worktree cache instead |
| 198 | + worktree: &'b Path, |
| 199 | + capabilities: &'b fs::Capabilities, |
| 200 | +) -> impl Iterator<Item = Result<Change<'a>, Error>> + 'b { |
| 201 | + // TODO: parallel with rayon |
| 202 | + index.entries().iter().filter_map(|index_entry| { |
| 203 | + let conflict = match index_entry.stage() { |
| 204 | + 0 => false, |
| 205 | + 1 => true, |
| 206 | + _ => return None, |
| 207 | + }; |
| 208 | + let git_path = index_entry.path(index); |
| 209 | + if index_entry.flags.intersects( |
| 210 | + index::entry::Flags::UPTODATE |
| 211 | + | index::entry::Flags::SKIP_WORKTREE |
| 212 | + | index::entry::Flags::ASSUME_VALID |
| 213 | + | index::entry::Flags::FSMONITOR_VALID, |
| 214 | + ) { |
| 215 | + return None; |
| 216 | + } |
| 217 | + |
| 218 | + let path = if let Ok(path) = path::try_from_bstr(git_path) { |
| 219 | + path |
| 220 | + } else { |
| 221 | + return Some(Err(Error::IllformedUtf8 { |
| 222 | + path: git_path.to_owned(), |
| 223 | + })); |
| 224 | + }; |
| 225 | + |
| 226 | + let worktree_path = worktree.join(path); |
| 227 | + let metadata = match worktree_path.symlink_metadata() { |
| 228 | + // TODO: check if any parent directory is a symlink |
| 229 | + // we need to use fs::Cache for that |
| 230 | + Ok(metadata) if metadata.is_dir() => { |
| 231 | + // index entries are normally only for files/symlinks |
| 232 | + // if a file turned into a directory it was removed |
| 233 | + // the only exception here are submodules which are |
| 234 | + // part of the index despite being directories |
| 235 | + // |
| 236 | + // TODO: submodules: |
| 237 | + // if entry.mode.contains(Mode::COMMIT) && |
| 238 | + // resolve_gitlink_ref(ce->name, "HEAD", &sub)) |
| 239 | + return Some(Ok(Change { |
| 240 | + kind: ChangeKind::Removed, |
| 241 | + index_entry, |
| 242 | + worktree_path, |
| 243 | + fstat: Some(metadata), |
| 244 | + })); |
| 245 | + } |
| 246 | + Ok(metdata) => metdata, |
| 247 | + Err(err) if err.kind() == ErrorKind::NotFound => { |
| 248 | + return Some(Ok(Change { |
| 249 | + kind: ChangeKind::Removed, |
| 250 | + index_entry, |
| 251 | + worktree_path, |
| 252 | + fstat: None, |
| 253 | + })) |
| 254 | + } |
| 255 | + Err(err) => { |
| 256 | + // TODO: strict mode? |
| 257 | + return Some(Err(err.into())); |
| 258 | + } |
| 259 | + }; |
| 260 | + if index_entry.flags.contains(index::entry::Flags::INTENT_TO_ADD) { |
| 261 | + return Some(Ok(Change { |
| 262 | + kind: ChangeKind::Added, |
| 263 | + index_entry, |
| 264 | + worktree_path, |
| 265 | + fstat: None, |
| 266 | + })); |
| 267 | + } |
| 268 | + let mut change = match FileModification::from_stat(index_entry, &metadata, capabilities) { |
| 269 | + Ok(change) => change, |
| 270 | + Err(err) => return Some(Err(err.into())), |
| 271 | + }; |
| 272 | + change.detect_racy_stat(index, index_entry); |
| 273 | + |
| 274 | + let kind = if change.changed() { |
| 275 | + ChangeKind::Modified { |
| 276 | + modification: change, |
| 277 | + conflict, |
| 278 | + } |
| 279 | + } else if conflict { |
| 280 | + ChangeKind::Conflict |
| 281 | + } else { |
| 282 | + return None; |
| 283 | + }; |
| 284 | + |
| 285 | + Some(Ok(Change { |
| 286 | + kind, |
| 287 | + index_entry, |
| 288 | + worktree_path, |
| 289 | + fstat: Some(metadata), |
| 290 | + })) |
| 291 | + }) |
| 292 | +} |
0 commit comments