Skip to content

Commit 3d127b5

Browse files
author
atollk
committed
Added filter_glob and exclude_glob to fs.walk.Walker.
These extend the class by an option to include/exclude resources by their entire path, not just its last component. To do so, fs.wildcard had to undergo a rework to remove the dependency on the `re` module. Unit tests were added for all new/changed code.
1 parent 12cd2f4 commit 3d127b5

File tree

8 files changed

+531
-131
lines changed

8 files changed

+531
-131
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
1010

1111
### Added
1212

13+
- To `fs.walk.Walker`, added parameters `filter_glob` and `exclude_glob`.
14+
Closes [#459](https://github.com/PyFilesystem/pyfilesystem2/issues/459).
1315
- Added `fs.copy.copy_file_if`, `fs.copy.copy_dir_if`, and `fs.copy.copy_fs_if`.
1416
Closes [#458](https://github.com/PyFilesystem/pyfilesystem2/issues/458).
1517
- Added `fs.base.FS.getmodified`.
@@ -22,6 +24,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
2224

2325
### Fixed
2426

27+
- Elaborated documentation of `filter_dirs` and `exclude_dirs` in `fs.walk.Walker`.
28+
Closes [#371](https://github.com/PyFilesystem/pyfilesystem2/issues/371).
2529
- Fixed performance bugs in `fs.copy.copy_dir_if_newer`. Test cases were adapted to catch those bugs in the future.
2630
- Fixed precision bug for timestamps in `fs.OSFS.setinfo`.
2731

fs/base.py

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
import six
2323

24-
from . import copy, errors, fsencode, iotools, move, tools, walk, wildcard
24+
from . import copy, errors, fsencode, iotools, move, tools, walk, wildcard, glob
2525
from .copy import copy_modified_time
2626
from .glob import BoundGlobber
2727
from .mode import validate_open_mode
@@ -1648,8 +1648,8 @@ def check(self):
16481648
if self.isclosed():
16491649
raise errors.FilesystemClosed()
16501650

1651-
def match(self, patterns, name):
1652-
# type: (Optional[Iterable[Text]], Text) -> bool
1651+
def match(self, patterns, name, accept_prefix=False):
1652+
# type: (Optional[Iterable[Text]], Text, bool) -> bool
16531653
"""Check if a name matches any of a list of wildcards.
16541654
16551655
If a filesystem is case *insensitive* (such as Windows) then
@@ -1691,6 +1691,61 @@ def match(self, patterns, name):
16911691
matcher = wildcard.get_matcher(patterns, case_sensitive)
16921692
return matcher(name)
16931693

1694+
def match_glob(self, patterns, path, accept_prefix=False):
1695+
# type: (Optional[Iterable[Text]], Text, bool) -> bool
1696+
"""Check if a path matches any of a list of glob patterns.
1697+
1698+
If a filesystem is case *insensitive* (such as Windows) then
1699+
this method will perform a case insensitive match (i.e. ``*.py``
1700+
will match the same names as ``*.PY``). Otherwise the match will
1701+
be case sensitive (``*.py`` and ``*.PY`` will match different
1702+
names).
1703+
1704+
Arguments:
1705+
patterns (list, optional): A list of patterns, e.g.
1706+
``['*.py']``, or `None` to match everything.
1707+
path (str): A resource path, starting with "/".
1708+
accept_prefix (bool): If ``True``, the path is
1709+
not required to match the wildcards themselves
1710+
but only need to be a prefix of a string that does.
1711+
1712+
Returns:
1713+
bool: `True` if ``path`` matches any of the patterns.
1714+
1715+
Raises:
1716+
TypeError: If ``patterns`` is a single string instead of
1717+
a list (or `None`).
1718+
ValueError: If ``path`` is not a string starting with "/".
1719+
1720+
Example:
1721+
>>> my_fs.match_glob(['*.py'], '/__init__.py')
1722+
True
1723+
>>> my_fs.match_glob(['*.jpg', '*.png'], '/foo.gif')
1724+
False
1725+
>>> my_fs.match_glob(['dir/file.txt'], '/dir/', accept_prefix=True)
1726+
True
1727+
>>> my_fs.match_glob(['dir/file.txt'], '/dir/gile.txt', accept_prefix=True)
1728+
False
1729+
1730+
Note:
1731+
If ``patterns`` is `None` (or ``['*']``), then this
1732+
method will always return `True`.
1733+
1734+
"""
1735+
if patterns is None:
1736+
return True
1737+
if not path or path[0] != "/":
1738+
raise ValueError("%s needs to be a string starting with /" % path)
1739+
if isinstance(patterns, six.text_type):
1740+
raise TypeError("patterns must be a list or sequence")
1741+
case_sensitive = not typing.cast(
1742+
bool, self.getmeta().get("case_insensitive", False)
1743+
)
1744+
matcher = glob.get_matcher(
1745+
patterns, case_sensitive, accept_prefix=accept_prefix
1746+
)
1747+
return matcher(path)
1748+
16941749
def tree(self, **kwargs):
16951750
# type: (**Any) -> None
16961751
"""Render a tree view of the filesystem to stdout or a file.

fs/errors.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
"OperationFailed",
4343
"OperationTimeout",
4444
"PathError",
45+
"PatternError",
4546
"PermissionDenied",
4647
"RemoteConnectionError",
4748
"RemoveRootError",
@@ -51,6 +52,7 @@
5152
"ResourceNotFound",
5253
"ResourceReadOnly",
5354
"Unsupported",
55+
"UnsupportedHash",
5456
]
5557

5658

@@ -346,3 +348,19 @@ class UnsupportedHash(ValueError):
346348
not supported by hashlib.
347349
348350
"""
351+
352+
353+
class PatternError(ValueError):
354+
"""A string pattern with invalid syntax was given."""
355+
356+
default_message = "pattern '{pattern}' is invalid at position {position}"
357+
358+
def __init__(self, pattern, position, exc=None, msg=None): # noqa: D107
359+
# type: (Text, int, Optional[Exception], Optional[Text]) -> None
360+
self.pattern = pattern
361+
self.position = position
362+
self.exc = exc
363+
super(ValueError, self).__init__()
364+
365+
def __reduce__(self):
366+
return type(self), (self.path, self.position, self.exc, self._msg)

fs/glob.py

Lines changed: 172 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,28 @@
66
from collections import namedtuple
77
import re
88
import typing
9+
from functools import partial
910

1011
from .lrucache import LRUCache
1112
from ._repr import make_repr
1213
from .path import iteratepath
13-
from . import wildcard
1414

1515

1616
GlobMatch = namedtuple("GlobMatch", ["path", "info"])
1717
Counts = namedtuple("Counts", ["files", "directories", "data"])
1818
LineCounts = namedtuple("LineCounts", ["lines", "non_blank"])
1919

2020
if typing.TYPE_CHECKING:
21-
from typing import Iterator, List, Optional, Pattern, Text, Tuple
21+
from typing import (
22+
Iterator,
23+
List,
24+
Optional,
25+
Pattern,
26+
Text,
27+
Tuple,
28+
Iterable,
29+
Callable,
30+
)
2231
from .base import FS
2332

2433

@@ -27,17 +36,87 @@
2736
) # type: LRUCache[Tuple[Text, bool], Tuple[int, bool, Pattern]]
2837

2938

39+
def _split_pattern_by_rec(pattern):
40+
# type: (Text) -> List[Text]
41+
"""Split a glob pattern at its directory seperators (/).
42+
43+
Takes into account escaped cases like [/].
44+
"""
45+
indices = [-1]
46+
bracket_open = False
47+
for i, c in enumerate(pattern):
48+
if c == "/" and not bracket_open:
49+
indices.append(i)
50+
elif c == "[":
51+
bracket_open = True
52+
elif c == "]":
53+
bracket_open = False
54+
55+
indices.append(len(pattern))
56+
return [pattern[i + 1 : j] for i, j in zip(indices[:-1], indices[1:])]
57+
58+
59+
def _translate(pattern, case_sensitive=True):
60+
# type: (Text, bool) -> Text
61+
"""Translate a wildcard pattern to a regular expression.
62+
63+
There is no way to quote meta-characters.
64+
Arguments:
65+
pattern (str): A wildcard pattern.
66+
case_sensitive (bool): Set to `False` to use a case
67+
insensitive regex (default `True`).
68+
69+
Returns:
70+
str: A regex equivalent to the given pattern.
71+
72+
"""
73+
if not case_sensitive:
74+
pattern = pattern.lower()
75+
i, n = 0, len(pattern)
76+
res = []
77+
while i < n:
78+
c = pattern[i]
79+
i = i + 1
80+
if c == "*":
81+
res.append("[^/]*")
82+
elif c == "?":
83+
res.append("[^/]")
84+
elif c == "[":
85+
j = i
86+
if j < n and pattern[j] == "!":
87+
j = j + 1
88+
if j < n and pattern[j] == "]":
89+
j = j + 1
90+
while j < n and pattern[j] != "]":
91+
j = j + 1
92+
if j >= n:
93+
res.append("\\[")
94+
else:
95+
stuff = pattern[i:j].replace("\\", "\\\\")
96+
i = j + 1
97+
if stuff[0] == "!":
98+
stuff = "^" + stuff[1:]
99+
elif stuff[0] == "^":
100+
stuff = "\\" + stuff
101+
res.append("[%s]" % stuff)
102+
else:
103+
res.append(re.escape(c))
104+
return "".join(res)
105+
106+
30107
def _translate_glob(pattern, case_sensitive=True):
31108
levels = 0
32109
recursive = False
33110
re_patterns = [""]
34111
for component in iteratepath(pattern):
35-
if component == "**":
36-
re_patterns.append(".*/?")
112+
if "**" in component:
37113
recursive = True
114+
split = component.split("**")
115+
split_re = [_translate(s, case_sensitive=case_sensitive) for s in split]
116+
re_patterns.append("/?" + ".*/?".join(split_re))
38117
else:
39118
re_patterns.append(
40-
"/" + wildcard._translate(component, case_sensitive=case_sensitive)
119+
"/" + _translate(component, case_sensitive=case_sensitive)
41120
)
42121
levels += 1
43122
re_glob = "(?ms)^" + "".join(re_patterns) + ("/$" if pattern.endswith("/") else "$")
@@ -71,6 +150,8 @@ def match(pattern, path):
71150
except KeyError:
72151
levels, recursive, re_pattern = _translate_glob(pattern, case_sensitive=True)
73152
_PATTERN_CACHE[(pattern, True)] = (levels, recursive, re_pattern)
153+
if path and path[0] != "/":
154+
path = "/" + path
74155
return bool(re_pattern.match(path))
75156

76157

@@ -91,9 +172,95 @@ def imatch(pattern, path):
91172
except KeyError:
92173
levels, recursive, re_pattern = _translate_glob(pattern, case_sensitive=True)
93174
_PATTERN_CACHE[(pattern, False)] = (levels, recursive, re_pattern)
175+
if path and path[0] != "/":
176+
path = "/" + path
94177
return bool(re_pattern.match(path))
95178

96179

180+
def match_any(patterns, path):
181+
# type: (Iterable[Text], Text) -> bool
182+
"""Test if a path matches any of a list of patterns.
183+
184+
Will return `True` if ``patterns`` is an empty list.
185+
186+
Arguments:
187+
patterns (list): A list of wildcard pattern, e.g ``["*.py",
188+
"*.pyc"]``
189+
name (str): A filename.
190+
191+
Returns:
192+
bool: `True` if the path matches at least one of the patterns.
193+
194+
"""
195+
if not patterns:
196+
return True
197+
return any(match(pattern, path) for pattern in patterns)
198+
199+
200+
def imatch_any(patterns, path):
201+
# type: (Iterable[Text], Text) -> bool
202+
"""Test if a path matches any of a list of patterns (case insensitive).
203+
204+
Will return `True` if ``patterns`` is an empty list.
205+
206+
Arguments:
207+
patterns (list): A list of wildcard pattern, e.g ``["*.py",
208+
"*.pyc"]``
209+
name (str): A filename.
210+
211+
Returns:
212+
bool: `True` if the path matches at least one of the patterns.
213+
214+
"""
215+
if not patterns:
216+
return True
217+
return any(imatch(pattern, path) for pattern in patterns)
218+
219+
220+
def get_matcher(patterns, case_sensitive, accept_prefix=False):
221+
# type: (Iterable[Text], bool, bool) -> Callable[[Text], bool]
222+
"""Get a callable that matches paths against the given patterns.
223+
224+
Arguments:
225+
patterns (list): A list of wildcard pattern. e.g. ``["*.py",
226+
"*.pyc"]``
227+
case_sensitive (bool): If ``True``, then the callable will be case
228+
sensitive, otherwise it will be case insensitive.
229+
accept_prefix (bool): If ``True``, the name is
230+
not required to match the wildcards themselves
231+
but only need to be a prefix of a string that does.
232+
233+
Returns:
234+
callable: a matcher that will return `True` if the paths given as
235+
an argument matches any of the given patterns.
236+
237+
Example:
238+
>>> from fs import wildcard
239+
>>> is_python = wildcard.get_matcher(['*.py'], True)
240+
>>> is_python('__init__.py')
241+
True
242+
>>> is_python('foo.txt')
243+
False
244+
245+
"""
246+
if not patterns:
247+
return lambda name: True
248+
249+
if accept_prefix:
250+
new_patterns = []
251+
for pattern in patterns:
252+
split = _split_pattern_by_rec(pattern)
253+
for i in range(1, len(split)):
254+
new_pattern = "/".join(split[:i])
255+
new_patterns.append(new_pattern)
256+
new_patterns.append(new_pattern + "/")
257+
new_patterns.append(pattern)
258+
patterns = new_patterns
259+
260+
matcher = match_any if case_sensitive else imatch_any
261+
return partial(matcher, patterns)
262+
263+
97264
class Globber(object):
98265
"""A generator of glob results."""
99266

0 commit comments

Comments
 (0)