|
| 1 | +use crate::{ |
| 2 | + file::{self, File}, |
| 3 | + GENERATION_NUMBER_INFINITY, GENERATION_NUMBER_MAX, |
| 4 | +}; |
| 5 | +use bstr::ByteSlice; |
| 6 | +use git_object::{borrowed, owned, SHA1_SIZE}; |
| 7 | +use std::cmp::{max, min}; |
| 8 | +use std::collections::HashMap; |
| 9 | +use std::convert::TryFrom; |
| 10 | +use std::path::Path; |
| 11 | + |
| 12 | +#[derive(thiserror::Error, Debug)] |
| 13 | +pub enum Error { |
| 14 | + #[error(transparent)] |
| 15 | + Commit(#[from] file::commit::Error), |
| 16 | + #[error("commit at file position {pos} has invalid ID {id}")] |
| 17 | + CommitId { id: owned::Id, pos: file::Position }, |
| 18 | + #[error("commit at file position {pos} with ID {id} is out of order relative to its predecessor with ID {predecessor_id}")] |
| 19 | + CommitsOutOfOrder { |
| 20 | + id: owned::Id, |
| 21 | + pos: file::Position, |
| 22 | + predecessor_id: owned::Id, |
| 23 | + }, |
| 24 | + #[error("commit-graph filename should be {0}")] |
| 25 | + Filename(String), |
| 26 | + #[error("commit {id} has invalid generation {generation}")] |
| 27 | + Generation { generation: u32, id: owned::Id }, |
| 28 | + #[error("checksum mismatch: expected {expected}, got {actual}")] |
| 29 | + Mismatch { expected: owned::Id, actual: owned::Id }, |
| 30 | + #[error("commit {id} has invalid root tree ID {root_tree_id}")] |
| 31 | + RootTreeId { id: owned::Id, root_tree_id: owned::Id }, |
| 32 | +} |
| 33 | + |
| 34 | +// This is a separate type to let `traverse`'s caller use the same error type for its result and its |
| 35 | +// processor error type while also letting that error type contain file::verify::Error values. |
| 36 | +// Is there a better way? Should the caller's error type just use boxes to avoid recursive type |
| 37 | +// errors? |
| 38 | +#[derive(thiserror::Error, Debug)] |
| 39 | +pub enum EitherError<E1: std::error::Error + 'static, E2: std::error::Error + 'static> { |
| 40 | + #[error(transparent)] |
| 41 | + Internal(#[from] E1), |
| 42 | + // Why can't I use #[from] here? Boo! |
| 43 | + #[error("{0}")] |
| 44 | + Processor(#[source] E2), |
| 45 | +} |
| 46 | + |
| 47 | +#[derive(Clone, Debug, Eq, PartialEq)] |
| 48 | +#[cfg_attr(feature = "serde1", derive(serde::Deserialize, serde::Serialize))] |
| 49 | +pub struct Outcome { |
| 50 | + pub max_generation: u32, |
| 51 | + pub max_parents: u32, |
| 52 | + pub min_generation: u32, |
| 53 | + pub num_commits: u32, |
| 54 | + pub parent_counts: HashMap<u32, u32>, |
| 55 | +} |
| 56 | + |
| 57 | +impl File { |
| 58 | + pub fn checksum(&self) -> borrowed::Id<'_> { |
| 59 | + borrowed::Id::try_from(&self.data[self.data.len() - SHA1_SIZE..]).expect("file to be large enough for a hash") |
| 60 | + } |
| 61 | + |
| 62 | + pub fn traverse<'a, E, Processor>(&'a self, mut processor: Processor) -> Result<Outcome, EitherError<Error, E>> |
| 63 | + where |
| 64 | + E: std::error::Error + 'static, |
| 65 | + Processor: FnMut(&file::Commit<'a>) -> Result<(), E>, |
| 66 | + { |
| 67 | + self.verify_checksum()?; |
| 68 | + verify_split_chain_filename_hash(&self.path, self.checksum())?; |
| 69 | + |
| 70 | + // This probably belongs in borrowed::Id itself? |
| 71 | + let null_id = borrowed::Id::from(&[0u8; SHA1_SIZE]); |
| 72 | + |
| 73 | + let mut stats = Outcome { |
| 74 | + max_generation: 0, |
| 75 | + max_parents: 0, |
| 76 | + min_generation: GENERATION_NUMBER_INFINITY, |
| 77 | + num_commits: self.num_commits(), |
| 78 | + parent_counts: HashMap::new(), |
| 79 | + }; |
| 80 | + |
| 81 | + // TODO: Verify self.fan values as we go. |
| 82 | + let mut prev_id: borrowed::Id<'a> = null_id; |
| 83 | + for commit in self.iter_commits() { |
| 84 | + if commit.id() <= prev_id { |
| 85 | + if commit.id() == null_id { |
| 86 | + return Err(Error::CommitId { |
| 87 | + pos: commit.position(), |
| 88 | + id: commit.id().into(), |
| 89 | + } |
| 90 | + .into()); |
| 91 | + } |
| 92 | + return Err(Error::CommitsOutOfOrder { |
| 93 | + pos: commit.position(), |
| 94 | + id: commit.id().into(), |
| 95 | + predecessor_id: prev_id.into(), |
| 96 | + } |
| 97 | + .into()); |
| 98 | + } |
| 99 | + if commit.root_tree_id() == null_id { |
| 100 | + return Err(Error::RootTreeId { |
| 101 | + id: commit.id().into(), |
| 102 | + root_tree_id: commit.root_tree_id().into(), |
| 103 | + } |
| 104 | + .into()); |
| 105 | + } |
| 106 | + if commit.generation() > GENERATION_NUMBER_MAX { |
| 107 | + return Err(Error::Generation { |
| 108 | + generation: commit.generation(), |
| 109 | + id: commit.id().into(), |
| 110 | + } |
| 111 | + .into()); |
| 112 | + } |
| 113 | + |
| 114 | + processor(&commit).map_err(EitherError::Processor)?; |
| 115 | + |
| 116 | + stats.max_generation = max(stats.max_generation, commit.generation()); |
| 117 | + stats.min_generation = min(stats.min_generation, commit.generation()); |
| 118 | + let parent_count = commit |
| 119 | + .iter_parents() |
| 120 | + .try_fold(0u32, |acc, pos| pos.map(|_| acc + 1)) |
| 121 | + .map_err(Error::Commit)?; |
| 122 | + *stats.parent_counts.entry(parent_count).or_insert(0) += 1; |
| 123 | + prev_id = commit.id(); |
| 124 | + } |
| 125 | + |
| 126 | + if stats.min_generation == GENERATION_NUMBER_INFINITY { |
| 127 | + stats.min_generation = 0; |
| 128 | + } |
| 129 | + |
| 130 | + Ok(stats) |
| 131 | + } |
| 132 | + |
| 133 | + pub fn verify_checksum(&self) -> Result<owned::Id, Error> { |
| 134 | + // TODO: Use/copy git_odb::hash::bytes_of_file. |
| 135 | + let data_len_without_trailer = self.data.len() - SHA1_SIZE; |
| 136 | + let mut hasher = git_features::hash::Sha1::default(); |
| 137 | + hasher.update(&self.data[..data_len_without_trailer]); |
| 138 | + let actual = owned::Id::new_sha1(hasher.digest()); |
| 139 | + |
| 140 | + let expected = self.checksum(); |
| 141 | + if actual.to_borrowed() == expected { |
| 142 | + Ok(actual) |
| 143 | + } else { |
| 144 | + Err(Error::Mismatch { |
| 145 | + actual, |
| 146 | + expected: expected.into(), |
| 147 | + }) |
| 148 | + } |
| 149 | + } |
| 150 | +} |
| 151 | + |
| 152 | +/// If the given path's filename matches "graph-{hash}.graph", check that `hash` matches the |
| 153 | +/// expected hash. |
| 154 | +fn verify_split_chain_filename_hash(path: impl AsRef<Path>, expected: borrowed::Id<'_>) -> Result<(), Error> { |
| 155 | + let path = path.as_ref(); |
| 156 | + path.file_name() |
| 157 | + .and_then(|filename| filename.to_str()) |
| 158 | + .and_then(|filename| filename.strip_suffix(".graph")) |
| 159 | + .and_then(|stem| stem.strip_prefix("graph-")) |
| 160 | + .map_or(Ok(()), |hex| match owned::Id::from_40_bytes_in_hex(hex.as_bytes()) { |
| 161 | + Ok(actual) if actual.to_borrowed() == expected => Ok(()), |
| 162 | + _ => Err(Error::Filename(format!( |
| 163 | + "graph-{}.graph", |
| 164 | + expected.to_sha1_hex().as_bstr() |
| 165 | + ))), |
| 166 | + }) |
| 167 | +} |
0 commit comments