Skip to content

Commit 6d1fcca

Browse files
committed
fix!: Tree|TreeRef::write_to() will now assure the most basic sanity of entry names.
Previously, it would allow null-bytes in the name which would corrupt the written tree. Now this is forbidden. For some reason, it disallowed newlines, but that is now allowed as validation is should be handled on a higher level.
1 parent 1bf08c8 commit 6d1fcca

File tree

5 files changed

+173
-42
lines changed

5 files changed

+173
-42
lines changed

gix-object/src/tree/editor.rs

Lines changed: 63 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use bstr::{BStr, BString, ByteSlice, ByteVec};
44
use gix_hash::ObjectId;
55
use gix_hashtable::hash_map::Entry;
66
use std::cmp::Ordering;
7+
use std::fmt::Formatter;
78

89
/// A way to constrain all [tree-edits](Editor) to a given subtree.
910
pub struct Cursor<'a, 'find> {
@@ -14,6 +15,26 @@ pub struct Cursor<'a, 'find> {
1415
prefix: BString,
1516
}
1617

18+
impl std::fmt::Debug for Editor<'_> {
19+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
20+
f.debug_struct("Editor")
21+
.field("object_hash", &self.object_hash)
22+
.field("path_buf", &self.path_buf)
23+
.field("trees", &self.trees)
24+
.finish()
25+
}
26+
}
27+
28+
/// The error returned by [Editor] or [Cursor] edit operation.
29+
#[derive(Debug, thiserror::Error)]
30+
#[allow(missing_docs)]
31+
pub enum Error {
32+
#[error("Empty path components are not allowed")]
33+
EmptyPathComponent,
34+
#[error(transparent)]
35+
FindExistingObject(#[from] crate::find::existing_object::Error),
36+
}
37+
1738
/// Lifecycle
1839
impl<'a> Editor<'a> {
1940
/// Create a new editor that uses `root` as base for all edits. Use `find` to lookup existing
@@ -44,14 +65,22 @@ impl<'a> Editor<'a> {
4465
/// Future calls to [`upsert`](Self::upsert) or similar will keep working on the last seen state of the
4566
/// just-written root-tree.
4667
/// If this is not desired, use [set_root()](Self::set_root()).
68+
///
69+
/// ### Validation
70+
///
71+
/// Note that no additional validation is performed to assure correctness of entry-names.
72+
/// It is absolutely and intentionally possible to write out invalid trees with this method.
73+
/// Higher layers are expected to perform detailed validation.
4774
pub fn write<E>(&mut self, out: impl FnMut(&Tree) -> Result<ObjectId, E>) -> Result<ObjectId, E> {
4875
self.path_buf.clear();
4976
self.write_at_pathbuf(out, WriteMode::Normal)
5077
}
5178

5279
/// Remove the entry at `rela_path`, loading all trees on the path accordingly.
5380
/// It's no error if the entry doesn't exist, or if `rela_path` doesn't lead to an existing entry at all.
54-
pub fn remove<I, C>(&mut self, rela_path: I) -> Result<&mut Self, crate::find::existing_object::Error>
81+
///
82+
/// Note that trying to remove a path with an empty component is also forbidden.
83+
pub fn remove<I, C>(&mut self, rela_path: I) -> Result<&mut Self, Error>
5584
where
5685
I: IntoIterator<Item = C>,
5786
C: AsRef<BStr>,
@@ -74,12 +103,7 @@ impl<'a> Editor<'a> {
74103
///
75104
/// `id` can also be an empty tree, along with [the respective `kind`](EntryKind::Tree), even though that's normally not allowed
76105
/// in Git trees.
77-
pub fn upsert<I, C>(
78-
&mut self,
79-
rela_path: I,
80-
kind: EntryKind,
81-
id: ObjectId,
82-
) -> Result<&mut Self, crate::find::existing_object::Error>
106+
pub fn upsert<I, C>(&mut self, rela_path: I, kind: EntryKind, id: ObjectId) -> Result<&mut Self, Error>
83107
where
84108
I: IntoIterator<Item = C>,
85109
C: AsRef<BStr>,
@@ -130,22 +154,39 @@ impl<'a> Editor<'a> {
130154
if tree.entries.is_empty() {
131155
parent_to_adjust.entries.remove(entry_idx);
132156
} else {
133-
parent_to_adjust.entries[entry_idx].oid = out(&tree)?;
157+
match out(&tree) {
158+
Ok(id) => {
159+
parent_to_adjust.entries[entry_idx].oid = id;
160+
}
161+
Err(err) => {
162+
let root_tree = parents.into_iter().next().expect("root wasn't consumed yet");
163+
self.trees.insert(path_hash(&root_tree.1), root_tree.2);
164+
return Err(err);
165+
}
166+
}
134167
}
135168
} else if parents.is_empty() {
136169
debug_assert!(children.is_empty(), "we consume children before parents");
137170
debug_assert_eq!(rela_path, self.path_buf, "this should always be the root tree");
138171

139172
// There may be left-over trees if they are replaced with blobs for example.
140-
let root_tree_id = out(&tree)?;
141-
match mode {
142-
WriteMode::Normal => {
143-
self.trees.clear();
173+
match out(&tree) {
174+
Ok(id) => {
175+
let root_tree_id = id;
176+
match mode {
177+
WriteMode::Normal => {
178+
self.trees.clear();
179+
}
180+
WriteMode::FromCursor => {}
181+
}
182+
self.trees.insert(path_hash(&rela_path), tree);
183+
return Ok(root_tree_id);
184+
}
185+
Err(err) => {
186+
self.trees.insert(path_hash(&rela_path), tree);
187+
return Err(err);
144188
}
145-
WriteMode::FromCursor => {}
146189
}
147-
self.trees.insert(path_hash(&self.path_buf), tree);
148-
return Ok(root_tree_id);
149190
} else if !tree.entries.is_empty() {
150191
out(&tree)?;
151192
}
@@ -161,7 +202,7 @@ impl<'a> Editor<'a> {
161202
&mut self,
162203
rela_path: I,
163204
kind_and_id: Option<(EntryKind, ObjectId, UpsertMode)>,
164-
) -> Result<&mut Self, crate::find::existing_object::Error>
205+
) -> Result<&mut Self, Error>
165206
where
166207
I: IntoIterator<Item = C>,
167208
C: AsRef<BStr>,
@@ -174,6 +215,9 @@ impl<'a> Editor<'a> {
174215
let new_kind_is_tree = kind_and_id.map_or(false, |(kind, _, _)| kind == EntryKind::Tree);
175216
while let Some(name) = rela_path.next() {
176217
let name = name.as_ref();
218+
if name.is_empty() {
219+
return Err(Error::EmptyPathComponent);
220+
}
177221
let is_last = rela_path.peek().is_none();
178222
let mut needs_sorting = false;
179223
let current_level_must_be_tree = !is_last || new_kind_is_tree;
@@ -301,7 +345,7 @@ mod cursor {
301345
///
302346
/// The returned cursor will then allow applying edits to the tree at `rela_path` as root.
303347
/// If `rela_path` is a single empty string, it is equivalent to using the current instance itself.
304-
pub fn cursor_at<I, C>(&mut self, rela_path: I) -> Result<Cursor<'_, 'a>, crate::find::existing_object::Error>
348+
pub fn cursor_at<I, C>(&mut self, rela_path: I) -> Result<Cursor<'_, 'a>, super::Error>
305349
where
306350
I: IntoIterator<Item = C>,
307351
C: AsRef<BStr>,
@@ -320,12 +364,7 @@ mod cursor {
320364

321365
impl<'a, 'find> Cursor<'a, 'find> {
322366
/// Like [`Editor::upsert()`], but with the constraint of only editing in this cursor's tree.
323-
pub fn upsert<I, C>(
324-
&mut self,
325-
rela_path: I,
326-
kind: EntryKind,
327-
id: ObjectId,
328-
) -> Result<&mut Self, crate::find::existing_object::Error>
367+
pub fn upsert<I, C>(&mut self, rela_path: I, kind: EntryKind, id: ObjectId) -> Result<&mut Self, super::Error>
329368
where
330369
I: IntoIterator<Item = C>,
331370
C: AsRef<BStr>,
@@ -337,7 +376,7 @@ mod cursor {
337376
}
338377

339378
/// Like [`Editor::remove()`], but with the constraint of only editing in this cursor's tree.
340-
pub fn remove<I, C>(&mut self, rela_path: I) -> Result<&mut Self, crate::find::existing_object::Error>
379+
pub fn remove<I, C>(&mut self, rela_path: I) -> Result<&mut Self, super::Error>
341380
where
342381
I: IntoIterator<Item = C>,
343382
C: AsRef<BStr>,

gix-object/src/tree/write.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ use crate::{
1212
#[derive(Debug, thiserror::Error)]
1313
#[allow(missing_docs)]
1414
pub enum Error {
15-
#[error("Newlines are invalid in file paths: {name:?}")]
16-
NewlineInFilename { name: BString },
15+
#[error("Nullbytes are invalid in file paths as they are separators: {name:?}")]
16+
NullbyteInFilename { name: BString },
1717
}
1818

1919
impl From<Error> for io::Error {
@@ -40,8 +40,8 @@ impl crate::WriteTo for Tree {
4040
out.write_all(mode.as_bytes(&mut buf))?;
4141
out.write_all(SPACE)?;
4242

43-
if filename.find_byte(b'\n').is_some() {
44-
return Err(Error::NewlineInFilename {
43+
if filename.find_byte(0).is_some() {
44+
return Err(Error::NullbyteInFilename {
4545
name: (*filename).to_owned(),
4646
}
4747
.into());
@@ -87,8 +87,8 @@ impl<'a> crate::WriteTo for TreeRef<'a> {
8787
out.write_all(mode.as_bytes(&mut buf))?;
8888
out.write_all(SPACE)?;
8989

90-
if filename.find_byte(b'\n').is_some() {
91-
return Err(Error::NewlineInFilename {
90+
if filename.find_byte(0).is_some() {
91+
return Err(Error::NullbyteInFilename {
9292
name: (*filename).to_owned(),
9393
}
9494
.into());

gix-object/tests/encode/mod.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,41 @@ mod commit {
8888
}
8989

9090
mod tree {
91+
use gix_object::tree::EntryKind;
92+
use gix_object::{tree, WriteTo};
93+
94+
#[test]
95+
fn write_to_does_not_validate() {
96+
let mut tree = gix_object::Tree::empty();
97+
tree.entries.push(tree::Entry {
98+
mode: EntryKind::Blob.into(),
99+
filename: "".into(),
100+
oid: gix_hash::Kind::Sha1.null(),
101+
});
102+
tree.entries.push(tree::Entry {
103+
mode: EntryKind::Tree.into(),
104+
filename: "something\nwith\newlines\n".into(),
105+
oid: gix_hash::ObjectId::empty_tree(gix_hash::Kind::Sha1),
106+
});
107+
tree.write_to(&mut std::io::sink())
108+
.expect("write succeeds, no validation is performed");
109+
}
110+
111+
#[test]
112+
fn write_to_does_not_allow_separator() {
113+
let mut tree = gix_object::Tree::empty();
114+
tree.entries.push(tree::Entry {
115+
mode: EntryKind::Blob.into(),
116+
filename: "hi\0ho".into(),
117+
oid: gix_hash::Kind::Sha1.null(),
118+
});
119+
let err = tree.write_to(&mut std::io::sink()).unwrap_err();
120+
assert_eq!(
121+
err.to_string(),
122+
"Nullbytes are invalid in file paths as they are separators: \"hi\\0ho\""
123+
);
124+
}
125+
91126
round_trip!(gix_object::Tree, gix_object::TreeRef, "tree/everything.tree");
92127
}
93128

gix-object/tests/tree/editor.rs

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ fn from_existing_cursor() -> crate::Result {
111111
odb.access_count_and_clear();
112112
let mut edit = gix_object::tree::Editor::new(root_tree.clone(), &odb, gix_hash::Kind::Sha1);
113113

114-
let mut cursor = edit.cursor_at(Some(""))?;
114+
let mut cursor = edit.to_cursor();
115115
let actual = cursor
116116
.remove(Some("bin"))?
117117
.remove(Some("bin.d"))?
@@ -326,6 +326,56 @@ fn from_existing_remove() -> crate::Result {
326326
Ok(())
327327
}
328328

329+
#[test]
330+
fn from_empty_invalid_write() -> crate::Result {
331+
let (storage, mut write, _num_writes_and_clear) = new_inmemory_writes();
332+
let odb = StorageOdb::new(storage.clone());
333+
let mut edit = gix_object::tree::Editor::new(Tree::default(), &odb, gix_hash::Kind::Sha1);
334+
335+
let actual = edit
336+
.upsert(["a", "\n"], EntryKind::Blob, any_blob())?
337+
.write(&mut write)
338+
.expect("no validation is performed");
339+
assert_eq!(
340+
display_tree(actual, &storage),
341+
"d23290ea39c284156731188dce62c17ac6b71bda
342+
└── a
343+
└── \n bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644
344+
"
345+
);
346+
347+
let err = edit
348+
.upsert(Some("with\0null"), EntryKind::Blob, any_blob())?
349+
.write(&mut write)
350+
.unwrap_err();
351+
assert_eq!(
352+
err.to_string(),
353+
"Nullbytes are invalid in file paths as they are separators: \"with\\0null\""
354+
);
355+
356+
let err = edit.upsert(Some(""), EntryKind::Blob, any_blob()).unwrap_err();
357+
let expected_msg = "Empty path components are not allowed";
358+
assert_eq!(err.to_string(), expected_msg);
359+
let err = edit
360+
.upsert(["fine", "", "previous is not fine"], EntryKind::Blob, any_blob())
361+
.unwrap_err();
362+
assert_eq!(err.to_string(), expected_msg);
363+
364+
let actual = edit
365+
.remove(Some("a"))?
366+
.remove(Some("with\0null"))?
367+
.upsert(Some("works"), EntryKind::Blob, any_blob())?
368+
.write(&mut write)?;
369+
assert_eq!(
370+
display_tree(actual, &storage),
371+
"d5b913c39b06507c7c64adb16c268ce1102ef5c1
372+
└── works bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100644
373+
",
374+
"after removing invalid entries, it can write again"
375+
);
376+
Ok(())
377+
}
378+
329379
#[test]
330380
fn from_empty_add() -> crate::Result {
331381
let (storage, mut write, num_writes_and_clear) = new_inmemory_writes();
@@ -656,7 +706,7 @@ mod utils {
656706

657707
pub(super) fn new_inmemory_writes() -> (
658708
TreeStore,
659-
impl FnMut(&Tree) -> Result<ObjectId, std::convert::Infallible>,
709+
impl FnMut(&Tree) -> Result<ObjectId, std::io::Error>,
660710
impl Fn() -> usize,
661711
) {
662712
let store = TreeStore::default();
@@ -667,8 +717,7 @@ mod utils {
667717
let mut buf = Vec::with_capacity(512);
668718
move |tree: &Tree| {
669719
buf.clear();
670-
tree.write_to(&mut buf)
671-
.expect("write to memory can't fail and tree is valid");
720+
tree.write_to(&mut buf)?;
672721
let header = gix_object::encode::loose_header(gix_object::Kind::Tree, buf.len() as u64);
673722
let mut hasher = gix_features::hash::hasher(gix_hash::Kind::Sha1);
674723
hasher.update(&header);

gix-object/tests/tree/from_bytes.rs

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,32 @@
1-
use gix_object::{bstr::ByteSlice, tree, tree::EntryRef, TreeRef, TreeRefIter};
1+
use gix_object::{bstr::ByteSlice, tree, tree::EntryRef, Tree, TreeRef, TreeRefIter, WriteTo};
22

33
use crate::{fixture_name, hex_to_id};
44

55
#[test]
66
fn empty() -> crate::Result {
7+
let tree_ref = TreeRef::from_bytes(&[])?;
78
assert_eq!(
8-
TreeRef::from_bytes(&[])?,
9+
tree_ref,
910
TreeRef { entries: vec![] },
1011
"empty trees are valid despite usually rare in the wild"
1112
);
13+
14+
let mut buf = Vec::new();
15+
tree_ref.write_to(&mut buf)?;
16+
assert!(buf.is_empty());
17+
18+
buf.clear();
19+
Tree::from(tree_ref).write_to(&mut buf)?;
20+
assert!(buf.is_empty());
1221
Ok(())
1322
}
1423

1524
#[test]
1625
fn everything() -> crate::Result {
26+
let fixture = fixture_name("tree", "everything.tree");
27+
let tree_ref = TreeRef::from_bytes(&fixture)?;
1728
assert_eq!(
18-
TreeRef::from_bytes(&fixture_name("tree", "everything.tree"))?,
29+
tree_ref,
1930
TreeRef {
2031
entries: vec![
2132
EntryRef {
@@ -83,11 +94,8 @@ fn special_trees() -> crate::Result {
8394
("special-5", 17),
8495
] {
8596
let fixture = fixture_name("tree", &format!("{name}.tree"));
86-
assert_eq!(
87-
TreeRef::from_bytes(&fixture)?.entries.len(),
88-
expected_entry_count,
89-
"{name}"
90-
);
97+
let actual = TreeRef::from_bytes(&fixture)?;
98+
assert_eq!(actual.entries.len(), expected_entry_count, "{name}");
9199
assert_eq!(
92100
TreeRefIter::from_bytes(&fixture).map(Result::unwrap).count(),
93101
expected_entry_count,

0 commit comments

Comments
 (0)