Skip to content

Commit 6aa8a9a

Browse files
authored
Small optimisations for referenceFS (#1393)
1 parent c20c31a commit 6aa8a9a

File tree

1 file changed

+5
-6
lines changed

1 file changed

+5
-6
lines changed

fsspec/implementations/reference.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,9 @@ def __init__(
117117
self._items = {}
118118
self.dirs = None
119119
self.fs = fsspec.filesystem("file") if fs is None else fs
120-
with self.fs.open("/".join([self.root, ".zmetadata"]), "rb") as f:
121-
self._items[".zmetadata"] = f.read()
120+
self._items[".zmetadata"] = self.fs.cat_file(
121+
"/".join([self.root, ".zmetadata"])
122+
)
122123
met = json.loads(self._items[".zmetadata"])
123124
self.record_size = met["record_size"]
124125
self.zmetadata = met["metadata"]
@@ -131,10 +132,8 @@ def __init__(
131132
def open_refs(field, record):
132133
"""cached parquet file loader"""
133134
path = self.url.format(field=field, record=record)
134-
with self.fs.open(path) as f:
135-
# TODO: since all we do is iterate, is arrow without pandas
136-
# better here?
137-
df = self.pd.read_parquet(f, engine="fastparquet")
135+
data = io.BytesIO(self.fs.cat_file(path))
136+
df = self.pd.read_parquet(data, engine="fastparquet")
138137
refs = {c: df[c].values for c in df.columns}
139138
return refs
140139

0 commit comments

Comments
 (0)