Skip to content

Commit d51f330

Browse files
committed
Merge branch 'sanitization'
2 parents 8077f3e + b9cf0f4 commit d51f330

File tree

6 files changed

+418
-65
lines changed

6 files changed

+418
-65
lines changed

gix-ref/tests/namespace/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ mod expand {
6363
fn repeated_slashes_are_invalid() {
6464
assert!(matches!(
6565
gix_ref::namespace::expand("foo//bar").expect_err("empty invalid"),
66-
gix_validate::reference::name::Error::RepeatedSlash
66+
gix_validate::reference::name::Error::Tag(gix_validate::tag::name::Error::RepeatedSlash)
6767
));
6868
}
6969
}

gix-refspec/tests/parse/invalid.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ fn empty() {
1111
fn empty_component() {
1212
assert!(matches!(
1313
try_parse("refs/heads/test:refs/remotes//test", Operation::Fetch).unwrap_err(),
14-
Error::ReferenceName(gix_validate::reference::name::Error::RepeatedSlash)
14+
Error::ReferenceName(gix_validate::reference::name::Error::Tag(
15+
gix_validate::tag::name::Error::RepeatedSlash
16+
))
1517
));
1618
}
1719

gix-validate/src/reference.rs

Lines changed: 38 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use bstr::{BStr, BString, ByteSlice};
2+
13
///
24
#[allow(clippy::empty_docs)]
35
pub mod name {
@@ -11,12 +13,6 @@ pub mod name {
1113
Tag(#[from] crate::tag::name::Error),
1214
#[error("Standalone references must be all uppercased, like 'HEAD'")]
1315
SomeLowercase,
14-
#[error("A reference name must not start with a slash '/'")]
15-
StartsWithSlash,
16-
#[error("Multiple slashes in a row are not allowed as they may change the reference's meaning")]
17-
RepeatedSlash,
18-
#[error("Path components must not start with '.'")]
19-
StartsWithDot,
2016
}
2117

2218
impl From<Infallible> for Error {
@@ -26,49 +22,59 @@ pub mod name {
2622
}
2723
}
2824

29-
use bstr::BStr;
30-
3125
/// Validate a reference name running all the tests in the book. This disallows lower-case references like `lower`, but also allows
3226
/// ones like `HEAD`, and `refs/lower`.
3327
pub fn name(path: &BStr) -> Result<&BStr, name::Error> {
34-
validate(path, Mode::Complete)
28+
match validate(path, Mode::Complete)? {
29+
None => Ok(path),
30+
Some(_) => {
31+
unreachable!("Without sanitization, there is no chance a sanitized version is returned.")
32+
}
33+
}
3534
}
3635

3736
/// Validate a partial reference name. As it is assumed to be partial, names like `some-name` is allowed
3837
/// even though these would be disallowed with when using [`name()`].
3938
pub fn name_partial(path: &BStr) -> Result<&BStr, name::Error> {
40-
validate(path, Mode::Partial)
39+
match validate(path, Mode::Partial)? {
40+
None => Ok(path),
41+
Some(_) => {
42+
unreachable!("Without sanitization, there is no chance a sanitized version is returned.")
43+
}
44+
}
45+
}
46+
47+
/// The infallible version of [`name_partial()`] which instead of failing, alters `path` and returns it to be a valid
48+
/// partial name, which would also pass [`name_partial()`].
49+
///
50+
/// Note that an empty `path` is replaced with a `-` in order to be valid.
51+
pub fn name_partial_or_sanitize(path: &BStr) -> BString {
52+
validate(path, Mode::PartialSanitize)
53+
.expect("BUG: errors cannot happen as any issue is fixed instantly")
54+
.expect("we always rebuild the path")
4155
}
4256

4357
enum Mode {
4458
Complete,
4559
Partial,
60+
/// like Partial, but instead of failing, a sanitized version is returned.
61+
PartialSanitize,
4662
}
4763

48-
fn validate(path: &BStr, mode: Mode) -> Result<&BStr, name::Error> {
49-
crate::tag::name(path)?;
50-
if path[0] == b'/' {
51-
return Err(name::Error::StartsWithSlash);
52-
}
53-
let mut previous = 0;
54-
let mut saw_slash = false;
55-
for byte in path.iter() {
56-
match *byte {
57-
b'/' if previous == b'/' => return Err(name::Error::RepeatedSlash),
58-
b'.' if previous == b'/' => return Err(name::Error::StartsWithDot),
59-
_ => {}
60-
}
61-
62-
if *byte == b'/' {
63-
saw_slash = true;
64-
}
65-
previous = *byte;
66-
}
67-
64+
fn validate(path: &BStr, mode: Mode) -> Result<Option<BString>, name::Error> {
65+
let out = crate::tag::name_inner(
66+
path,
67+
match mode {
68+
Mode::Complete | Mode::Partial => crate::tag::Mode::Validate,
69+
Mode::PartialSanitize => crate::tag::Mode::Sanitize,
70+
},
71+
)?;
6872
if let Mode::Complete = mode {
69-
if !saw_slash && !path.iter().all(|c| c.is_ascii_uppercase() || *c == b'_') {
73+
let input = out.as_ref().map_or(path, |b| b.as_bstr());
74+
let saw_slash = input.find_byte(b'/').is_some();
75+
if !saw_slash && !input.iter().all(|c| c.is_ascii_uppercase() || *c == b'_') {
7076
return Err(name::Error::SomeLowercase);
7177
}
7278
}
73-
Ok(path)
79+
Ok(out)
7480
}

gix-validate/src/tag.rs

Lines changed: 113 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use bstr::BStr;
1+
use bstr::{BStr, BString};
22

33
///
44
#[allow(clippy::empty_docs)]
@@ -11,8 +11,12 @@ pub mod name {
1111
pub enum Error {
1212
#[error("A ref must not contain invalid bytes or ascii control characters: {byte:?}")]
1313
InvalidByte { byte: BString },
14+
#[error("A reference name must not start with a slash '/'")]
15+
StartsWithSlash,
16+
#[error("Multiple slashes in a row are not allowed as they may change the reference's meaning")]
17+
RepeatedSlash,
1418
#[error("A ref must not contain '..' as it may be mistaken for a range")]
15-
DoubleDot,
19+
RepeatedDot,
1620
#[error("A ref must not end with '.lock'")]
1721
LockFileSuffix,
1822
#[error("A ref must not contain '@{{' which is a part of a ref-log")]
@@ -33,36 +37,130 @@ pub mod name {
3337
/// Assure the given `input` resemble a valid git tag name, which is returned unchanged on success.
3438
/// Tag names are provided as names, lik` v1.0` or `alpha-1`, without paths.
3539
pub fn name(input: &BStr) -> Result<&BStr, name::Error> {
40+
match name_inner(input, Mode::Validate)? {
41+
None => Ok(input),
42+
Some(_) => {
43+
unreachable!("When validating, the input isn't changed")
44+
}
45+
}
46+
}
47+
48+
#[derive(Eq, PartialEq)]
49+
pub(crate) enum Mode {
50+
Sanitize,
51+
Validate,
52+
}
53+
54+
pub(crate) fn name_inner(input: &BStr, mode: Mode) -> Result<Option<BString>, name::Error> {
55+
let mut out: Option<BString> =
56+
matches!(mode, Mode::Sanitize).then(|| BString::from(Vec::with_capacity(input.len())));
3657
if input.is_empty() {
37-
return Err(name::Error::Empty);
58+
return if let Some(mut out) = out {
59+
out.push(b'-');
60+
Ok(Some(out))
61+
} else {
62+
Err(name::Error::Empty)
63+
};
3864
}
39-
if *input.last().expect("non-empty") == b'/' {
65+
if *input.last().expect("non-empty") == b'/' && out.is_none() {
4066
return Err(name::Error::EndsWithSlash);
4167
}
68+
if input.first() == Some(&b'/') && out.is_none() {
69+
return Err(name::Error::StartsWithSlash);
70+
}
4271

4372
let mut previous = 0;
4473
for byte in input.iter() {
4574
match byte {
4675
b'\\' | b'^' | b':' | b'[' | b'?' | b' ' | b'~' | b'\0'..=b'\x1F' | b'\x7F' => {
47-
return Err(name::Error::InvalidByte {
48-
byte: (&[*byte][..]).into(),
49-
})
76+
if let Some(out) = out.as_mut() {
77+
out.push(b'-');
78+
} else {
79+
return Err(name::Error::InvalidByte {
80+
byte: (&[*byte][..]).into(),
81+
});
82+
}
83+
}
84+
b'*' => {
85+
if let Some(out) = out.as_mut() {
86+
out.push(b'-');
87+
} else {
88+
return Err(name::Error::Asterisk);
89+
}
90+
}
91+
92+
b'.' if previous == b'.' => {
93+
if out.is_none() {
94+
return Err(name::Error::RepeatedDot);
95+
}
96+
}
97+
b'.' if previous == b'/' => {
98+
if let Some(out) = out.as_mut() {
99+
out.push(b'-');
100+
} else {
101+
return Err(name::Error::StartsWithDot);
102+
}
103+
}
104+
b'{' if previous == b'@' => {
105+
if let Some(out) = out.as_mut() {
106+
out.push(b'-');
107+
} else {
108+
return Err(name::Error::ReflogPortion);
109+
}
110+
}
111+
b'/' if previous == b'/' => {
112+
if out.is_none() {
113+
return Err(name::Error::RepeatedSlash);
114+
}
115+
}
116+
b'.' if previous == b'/' => {
117+
if let Some(out) = out.as_mut() {
118+
out.push(b'-');
119+
} else {
120+
return Err(name::Error::StartsWithDot);
121+
}
122+
}
123+
c => {
124+
if let Some(out) = out.as_mut() {
125+
out.push(*c)
126+
}
50127
}
51-
b'*' => return Err(name::Error::Asterisk),
52-
b'.' if previous == b'.' => return Err(name::Error::DoubleDot),
53-
b'{' if previous == b'@' => return Err(name::Error::ReflogPortion),
54-
_ => {}
55128
}
56129
previous = *byte;
57130
}
131+
132+
if let Some(out) = out.as_mut() {
133+
while out.last() == Some(&b'/') {
134+
out.pop();
135+
}
136+
while out.first() == Some(&b'/') {
137+
out.remove(0);
138+
}
139+
}
58140
if input[0] == b'.' {
59-
return Err(name::Error::StartsWithDot);
141+
if let Some(out) = out.as_mut() {
142+
out[0] = b'-';
143+
} else {
144+
return Err(name::Error::StartsWithDot);
145+
}
60146
}
61147
if input[input.len() - 1] == b'.' {
62-
return Err(name::Error::EndsWithDot);
148+
if let Some(out) = out.as_mut() {
149+
let last = out.len() - 1;
150+
out[last] = b'-';
151+
} else {
152+
return Err(name::Error::EndsWithDot);
153+
}
63154
}
64155
if input.ends_with(b".lock") {
65-
return Err(name::Error::LockFileSuffix);
156+
if let Some(out) = out.as_mut() {
157+
while out.ends_with(b".lock") {
158+
let len_without_suffix = out.len() - b".lock".len();
159+
out.truncate(len_without_suffix);
160+
}
161+
} else {
162+
return Err(name::Error::LockFileSuffix);
163+
}
66164
}
67-
Ok(input)
165+
Ok(out)
68166
}

0 commit comments

Comments
 (0)