Skip to content

Commit 462b03a

Browse files
authored
Merge pull request #654 from martindurant/memdirs
Fix memory directories and add tests
2 parents 5bfd3d8 + b956197 commit 462b03a

File tree

10 files changed

+170
-106
lines changed

10 files changed

+170
-106
lines changed

fsspec/conftest.py

+2
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@ def m():
1717
"""
1818
m = fsspec.filesystem("memory")
1919
m.store.clear()
20+
m.pseudo_dirs.clear()
2021
try:
2122
yield m
2223
finally:
2324
m.store.clear()
25+
m.pseudo_dirs.clear()
2426

2527

2628
@pytest.fixture

fsspec/fuse.py

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ def __init__(self, fs, path):
1616
self.cache = {}
1717
self.root = path.rstrip("/") + "/"
1818
self.counter = 0
19+
logger.info("Starting FUSE at %s", path)
1920

2021
def getattr(self, path, fh=None):
2122
logger.debug("getattr %s", path)

fsspec/implementations/memory.py

+109-70
Original file line numberDiff line numberDiff line change
@@ -18,92 +18,104 @@ class MemoryFileSystem(AbstractFileSystem):
1818
"""
1919

2020
store = {} # global
21-
pseudo_dirs = []
21+
pseudo_dirs = [""]
2222
protocol = "memory"
23-
root_marker = ""
23+
root_marker = "/"
24+
25+
@classmethod
26+
def _strip_protocol(cls, path):
27+
if path.startswith("memory://"):
28+
path = path[len("memory://") :]
29+
if "::" in path or "://" in path:
30+
return path.rstrip("/")
31+
path = path.lstrip("/").rstrip("/")
32+
return "/" + path if path else ""
2433

2534
def ls(self, path, detail=False, **kwargs):
35+
path = self._strip_protocol(path)
2636
if path in self.store:
27-
# there is a key with this exact name, but could also be directory
28-
out = [
37+
# there is a key with this exact name
38+
return [
2939
{
3040
"name": path,
3141
"size": self.store[path].getbuffer().nbytes,
3242
"type": "file",
3343
"created": self.store[path].created,
3444
}
3545
]
36-
else:
37-
out = []
38-
path = path.strip("/").lstrip("/")
3946
paths = set()
47+
starter = path + "/"
48+
out = []
4049
for p2 in self.store:
41-
has_slash = "/" if p2.startswith("/") else ""
42-
p = p2.lstrip("/")
43-
if "/" in p:
44-
root = p.rsplit("/", 1)[0]
45-
else:
46-
root = ""
47-
if root == path:
48-
out.append(
49-
{
50-
"name": has_slash + p,
51-
"size": self.store[p2].getbuffer().nbytes,
52-
"type": "file",
53-
"created": self.store[p2].created,
54-
}
55-
)
56-
elif (
57-
path
58-
and len(path) < len(p.strip("/"))
59-
and all(
60-
(a == b) for a, b in zip(path.split("/"), p.strip("/").split("/"))
61-
)
62-
):
63-
# implicit directory
64-
ppath = "/".join(p.split("/")[: len(path.split("/")) + 1])
65-
if ppath not in paths:
66-
out.append(
67-
{
68-
"name": has_slash + ppath + "/",
69-
"size": 0,
70-
"type": "directory",
71-
}
72-
)
73-
paths.add(ppath)
74-
elif all(
75-
(a == b)
76-
for a, b in zip(path.split("/"), [""] + p.strip("/").split("/"))
77-
):
78-
# root directory entry
79-
ppath = p.rstrip("/").split("/", 1)[0]
80-
if ppath not in paths:
50+
if p2.startswith(starter):
51+
if "/" not in p2[len(starter) :]:
52+
# exact child
8153
out.append(
8254
{
83-
"name": has_slash + ppath + "/",
84-
"size": 0,
85-
"type": "directory",
55+
"name": p2,
56+
"size": self.store[p2].getbuffer().nbytes,
57+
"type": "file",
58+
"created": self.store[p2].created,
8659
}
8760
)
88-
paths.add(ppath)
61+
elif len(p2) > len(starter):
62+
# implied child directory
63+
ppath = starter + p2[len(starter) :].split("/", 1)[0]
64+
if ppath not in paths:
65+
out = out or []
66+
out.append(
67+
{
68+
"name": ppath,
69+
"size": 0,
70+
"type": "directory",
71+
}
72+
)
73+
paths.add(ppath)
8974
for p2 in self.pseudo_dirs:
90-
if self._parent(p2).strip("/") == path and p2.strip("/") not in paths:
91-
out.append({"name": p2 + "/", "size": 0, "type": "directory"})
75+
if p2.startswith(starter):
76+
if "/" not in p2[len(starter) :]:
77+
# exact child pdir
78+
if p2 not in paths:
79+
out.append({"name": p2, "size": 0, "type": "directory"})
80+
paths.add(p2)
81+
else:
82+
# directory implied by deeper pdir
83+
ppath = starter + p2[len(starter) :].split("/", 1)[0]
84+
if ppath not in paths:
85+
out.append({"name": ppath, "size": 0, "type": "directory"})
86+
paths.add(ppath)
87+
if not out:
88+
if path in self.pseudo_dirs:
89+
# empty dir
90+
return []
91+
raise FileNotFoundError(path)
9292
if detail:
9393
return out
9494
return sorted([f["name"] for f in out])
9595

9696
def mkdir(self, path, create_parents=True, **kwargs):
97-
path = path.rstrip("/")
98-
if create_parents and self._parent(path):
99-
self.mkdir(self._parent(path), create_parents, **kwargs)
100-
if self._parent(path) and not self.isdir(self._parent(path)):
97+
path = self._strip_protocol(path)
98+
if path in self.store or path in self.pseudo_dirs:
99+
raise FileExistsError
100+
if self._parent(path).strip("/") and self.isfile(self._parent(path)):
101101
raise NotADirectoryError(self._parent(path))
102+
if create_parents and self._parent(path).strip("/"):
103+
try:
104+
self.mkdir(self._parent(path), create_parents, **kwargs)
105+
except FileExistsError:
106+
pass
102107
if path and path not in self.pseudo_dirs:
103108
self.pseudo_dirs.append(path)
104109

110+
def makedirs(self, path, exist_ok=False):
111+
try:
112+
self.mkdir(path, create_parents=True)
113+
except FileExistsError:
114+
if not exist_ok:
115+
raise
116+
105117
def rmdir(self, path):
106-
path = path.rstrip("/")
118+
path = self._strip_protocol(path)
107119
if path in self.pseudo_dirs:
108120
if not self.ls(path):
109121
self.pseudo_dirs.remove(path)
@@ -116,6 +128,26 @@ def exists(self, path):
116128
path = self._strip_protocol(path)
117129
return path in self.store or path in self.pseudo_dirs
118130

131+
def info(self, path, **kwargs):
132+
path = self._strip_protocol(path)
133+
if path in self.pseudo_dirs or any(
134+
p.startswith(path + "/") for p in list(self.store) + self.pseudo_dirs
135+
):
136+
return {
137+
"name": path,
138+
"size": 0,
139+
"type": "directory",
140+
}
141+
elif path in self.store:
142+
return {
143+
"name": path,
144+
"size": self.store[path].getbuffer().nbytes,
145+
"type": "file",
146+
"created": self.store[path].created,
147+
}
148+
else:
149+
raise FileNotFoundError(path)
150+
119151
def _open(
120152
self,
121153
path,
@@ -125,6 +157,14 @@ def _open(
125157
cache_options=None,
126158
**kwargs,
127159
):
160+
path = self._strip_protocol(path)
161+
if path in self.pseudo_dirs:
162+
raise IsADirectoryError
163+
parent = path
164+
while len(parent) > 1:
165+
parent = self._parent(parent)
166+
if self.isfile(parent):
167+
raise FileExistsError(parent)
128168
if mode in ["rb", "ab", "rb+"]:
129169
if path in self.store:
130170
f = self.store[path]
@@ -144,6 +184,8 @@ def _open(
144184
return m
145185

146186
def cp_file(self, path1, path2, **kwargs):
187+
path1 = self._strip_protocol(path1)
188+
path2 = self._strip_protocol(path2)
147189
if self.isfile(path1):
148190
self.store[path2] = MemoryFile(self, path2, self.store[path1].getbuffer())
149191
elif self.isdir(path1):
@@ -153,18 +195,18 @@ def cp_file(self, path1, path2, **kwargs):
153195
raise FileNotFoundError
154196

155197
def cat_file(self, path, start=None, end=None, **kwargs):
198+
path = self._strip_protocol(path)
156199
try:
157200
return self.store[path].getvalue()[start:end]
158201
except KeyError:
159202
raise FileNotFoundError(path)
160203

161204
def _rm(self, path):
162-
if self.isfile(path):
205+
path = self._strip_protocol(path)
206+
try:
163207
del self.store[path]
164-
elif self.isdir(path):
165-
self.rmdir(path)
166-
else:
167-
raise FileNotFoundError
208+
except KeyError as e:
209+
raise FileNotFoundError from e
168210

169211
def rm(self, path, recursive=False, maxdepth=None):
170212
paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
@@ -175,13 +217,10 @@ def rm(self, path, recursive=False, maxdepth=None):
175217
# directories first.
176218
if not self.exists(p):
177219
continue
178-
self.rm_file(p)
179-
180-
def size(self, path):
181-
"""Size in bytes of the file at path"""
182-
if path not in self.store:
183-
raise FileNotFoundError(path)
184-
return self.store[path].getbuffer().nbytes
220+
if self.isfile(p):
221+
self.rm_file(p)
222+
else:
223+
self.rmdir(p)
185224

186225

187226
class MemoryFile(BytesIO):

fsspec/implementations/tests/test_cached.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -693,9 +693,9 @@ def test_multi_cache_chain(protocol):
693693
def test_strip(protocol):
694694
fs = fsspec.filesystem(protocol, target_protocol="memory")
695695
url1 = "memory://afile"
696-
assert fs._strip_protocol(url1) == "afile"
697-
assert fs._strip_protocol(protocol + "://afile") == "afile"
698-
assert fs._strip_protocol(protocol + "::memory://afile") == "afile"
696+
assert fs._strip_protocol(url1) == "/afile"
697+
assert fs._strip_protocol(protocol + "://afile") == "/afile"
698+
assert fs._strip_protocol(protocol + "::memory://afile") == "/afile"
699699

700700

701701
@pytest.mark.parametrize("protocol", ["simplecache", "filecache"])
@@ -713,7 +713,7 @@ def test_expiry():
713713

714714
d = tempfile.mkdtemp()
715715
fs = fsspec.filesystem("memory")
716-
fn = "afile"
716+
fn = "/afile"
717717
fn0 = "memory://afile"
718718
data = b"hello"
719719
with fs.open(fn0, "wb") as f:

fsspec/implementations/tests/test_dask.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,5 @@ def setup():
2626
def test_basic(cli):
2727

2828
fs = fsspec.filesystem("dask", target_protocol="memory")
29-
assert fs.ls("") == ["afile"]
30-
assert fs.cat("afile") == b"data"
29+
assert fs.ls("") == ["/afile"]
30+
assert fs.cat("/afile") == b"data"

0 commit comments

Comments
 (0)