Skip to content

Commit 6984d92

Browse files
committed
implement recursive wildstars
A sequence of two consecutive wildstars, `**`, acts like a regular wildstar `*` except that it also matches on path separators, making them useful for matching on arbitrary subdirectories. For example, the pattern "somedir/**/needle.txt" would match: * somedir/one/needle.txt * somedir/one/two/needle.txt * somedir/other/needle.txt Patterns such as "somedir/**/*.txt" are also possible. The previous behavior of collapsing many consecutive wildcards to a single one has been changed so that now a sequence of more than two consecutive wildcards is treated literally.
1 parent 2f29bba commit 6984d92

File tree

2 files changed

+51
-11
lines changed

2 files changed

+51
-11
lines changed

src/lib.rs

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ pub struct Pattern {
192192
enum PatternToken {
193193
Char(char),
194194
AnyChar,
195-
AnySequence,
195+
AnySequence(bool),
196196
AnyWithin(Vec<CharSpecifier> ),
197197
AnyExcept(Vec<CharSpecifier> )
198198
}
@@ -220,6 +220,12 @@ impl Pattern {
220220
/// of characters, as ordered by Unicode, so e.g. `[0-9]` specifies any
221221
/// character between 0 and 9 inclusive.
222222
///
223+
/// A sequence of two `*` characters, `**`, acts like a single `*` except
224+
/// that it also matches path separators, making it useful for matching
225+
/// on arbitrary subdirectories.
226+
///
227+
/// A sequence of more than two consecutive `*` characters is treated literally.
228+
///
223229
/// The metacharacters `?`, `*`, `[`, `]` can be matched by using brackets
224230
/// (e.g. `[?]`). When a `]` occurs immediately following `[` or `[!` then
225231
/// it is interpreted as being part of, rather then ending, the character
@@ -242,11 +248,23 @@ impl Pattern {
242248
i += 1;
243249
}
244250
'*' => {
245-
// *, **, ***, ****, ... are all equivalent
246-
while i < chars.len() && chars[i] == '*' {
247-
i += 1;
251+
let old = i;
252+
253+
while i < chars.len() && chars[i] == '*' {
254+
i += 1;
255+
}
256+
257+
let count = i - old;
258+
259+
if count > 2 {
260+
for _ in range(0u, count) {
261+
tokens.push(Char('*'));
248262
}
249-
tokens.push(AnySequence);
263+
} else if count == 2 {
264+
tokens.push(AnySequence(true));
265+
} else {
266+
tokens.push(AnySequence(false));
267+
}
250268
}
251269
'[' => {
252270

@@ -364,7 +382,7 @@ impl Pattern {
364382

365383
for (ti, token) in self.tokens.slice_from(i).iter().enumerate() {
366384
match *token {
367-
AnySequence => {
385+
AnySequence(recursive) => {
368386
loop {
369387
match self.matches_from(prev_char.get(), file, i + ti + 1, options) {
370388
SubPatternDoesntMatch => (), // keep trying
@@ -376,7 +394,7 @@ impl Pattern {
376394
Some(pair) => pair
377395
};
378396

379-
if require_literal(c) {
397+
if !recursive && require_literal(c) {
380398
return SubPatternDoesntMatch;
381399
}
382400
prev_char.set(Some(c));
@@ -408,7 +426,7 @@ impl Pattern {
408426
Char(c2) => {
409427
chars_eq(c, c2, options.case_sensitive)
410428
}
411-
AnySequence => {
429+
AnySequence(_) => {
412430
unreachable!()
413431
}
414432
};
@@ -628,10 +646,9 @@ mod test {
628646
}
629647

630648
#[test]
631-
fn test_wildcard_optimizations() {
632-
assert!(Pattern::new("a*b").matches("a___b"));
649+
fn test_wildcards() {
650+
assert!(Pattern::new("a*b").matches("a_b"));
633651
assert!(Pattern::new("a**b").matches("a___b"));
634-
assert!(Pattern::new("a***b").matches("a___b"));
635652
assert!(Pattern::new("a*b*c").matches("abc"));
636653
assert!(!Pattern::new("a*b*c").matches("abcd"));
637654
assert!(Pattern::new("a*b*c").matches("a_b_c"));
@@ -642,6 +659,16 @@ mod test {
642659
assert!(Pattern::new("a*b[xyz]c*d").matches("abxcdbxcddd"));
643660
}
644661

662+
#[test]
663+
fn test_recursive_wildstars() {
664+
let pat = Pattern::new("some/**/needle.txt");
665+
assert!(pat.matches("some/one/needle.txt"));
666+
assert!(pat.matches("some/one/two/needle.txt"));
667+
assert!(pat.matches("some/other/needle.txt"));
668+
// more than 2 consecutive wildcards and they're all treated literally
669+
assert!(Pattern::new("a***b").matches("a***b"));
670+
}
671+
645672
#[test]
646673
fn test_lots_of_files() {
647674
// this is a good test because it touches lots of differently named files

tests/glob-std.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,19 @@ fn main() {
7171
mk_file("xyz/y", false);
7272
mk_file("xyz/z", false);
7373

74+
mk_file("r", true);
75+
mk_file("r/one", true);
76+
mk_file("r/one/a.md", false);
77+
mk_file("r/two", true);
78+
mk_file("r/two/b.md", false);
79+
mk_file("r/three", true);
80+
mk_file("r/three/c.md", false);
81+
82+
assert_eq!(glob_vec("r/**/*.md"), vec!(
83+
abs_path("r/one/a.md"),
84+
abs_path("r/three/c.md"),
85+
abs_path("r/two/b.md")));
86+
7487
assert_eq!(glob_vec(""), Vec::new());
7588
assert_eq!(glob_vec("."), vec!(os::getcwd().unwrap()));
7689
assert_eq!(glob_vec(".."), vec!(os::getcwd().unwrap().join("..")));

0 commit comments

Comments
 (0)