Skip to content

Commit ca86f97

Browse files
d4l3kfacebook-github-bot
authored andcommitted
docker_workspace: support dockerignore to filter files from the workspace (#401)
Summary: This adds a filter when building the docker workspace context to filter files specified in the `.dockerignore` file. This reduces the size of the patch when operating in a larger repo. Reference: https://docs.docker.com/engine/reference/builder/#dockerignore-file Next steps from this would be to extend this logic to be workspace agnostic so we could have a `.torchxignore` file but not sure if that makes sense or is strictly necessary yet. Pull Request resolved: #401 Test Plan: $ torchx run --scheduler aws_batch --wait --log dist.ddp --script foo.py -j 1x1 $ pytest torchx/workspace/ Reviewed By: kiukchung Differential Revision: D34486363 Pulled By: d4l3k fbshipit-source-id: b71ac0bb2a798fdfd95f13703a700ab9850d127a
1 parent 8a7a95a commit ca86f97

File tree

2 files changed

+91
-5
lines changed

2 files changed

+91
-5
lines changed

torchx/workspace/docker_workspace.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,13 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66

7+
import fnmatch
78
import io
89
import logging
910
import posixpath
1011
import tarfile
1112
import tempfile
12-
from typing import IO, TYPE_CHECKING, Optional, Dict, Tuple, Mapping
13+
from typing import IO, TYPE_CHECKING, Optional, Dict, Tuple, Mapping, Iterable
1314

1415
import fsspec
1516
import torchx
@@ -129,17 +130,44 @@ def _build_context(img: str, workspace: str) -> IO[bytes]:
129130
return f
130131

131132

132-
def _copy_to_tarfile(workspace: str, tf: tarfile.TarFile) -> None:
133-
# TODO(d4l3k) implement docker ignore files
133+
def _ignore(s: str, patterns: Iterable[str]) -> bool:
134+
match = False
135+
for pattern in patterns:
136+
if pattern.startswith("!") and fnmatch.fnmatch(s, pattern[1:]):
137+
match = False
138+
elif fnmatch.fnmatch(s, pattern):
139+
match = True
140+
return match
141+
134142

143+
def _copy_to_tarfile(workspace: str, tf: tarfile.TarFile) -> None:
135144
fs, path = fsspec.core.url_to_fs(workspace)
136145
assert isinstance(path, str), "path must be str"
137146

147+
# load dockerignore
148+
# https://docs.docker.com/engine/reference/builder/#dockerignore-file
149+
ignore_patterns = []
150+
ignore_path = posixpath.join(path, ".dockerignore")
151+
if fs.exists(ignore_path):
152+
with fs.open(ignore_path, "rt") as f:
153+
lines = f.readlines()
154+
for line in lines:
155+
line, _, _ = line.partition("#")
156+
line = line.strip()
157+
if len(line) == 0 or line == ".":
158+
continue
159+
ignore_patterns.append(line)
160+
138161
for dir, dirs, files in fs.walk(path, detail=True):
139162
assert isinstance(dir, str), "path must be str"
140163
relpath = posixpath.relpath(dir, path)
164+
if _ignore(relpath, ignore_patterns):
165+
continue
141166
for file, info in files.items():
142167
with fs.open(info["name"], "rb") as f:
143-
tinfo = tarfile.TarInfo(posixpath.join(relpath, file))
168+
filepath = posixpath.join(relpath, file) if relpath != "." else file
169+
if _ignore(filepath, ignore_patterns):
170+
continue
171+
tinfo = tarfile.TarInfo(filepath)
144172
tinfo.size = info["size"]
145173
tf.addfile(tinfo, f)

torchx/workspace/test/docker_workspace_test.py

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,16 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66

7+
import tarfile
78
import unittest
89
from unittest.mock import MagicMock
910

1011
import fsspec
1112
from torchx.specs import Role, AppDef
12-
from torchx.workspace.docker_workspace import DockerWorkspace
13+
from torchx.workspace.docker_workspace import (
14+
DockerWorkspace,
15+
_build_context,
16+
)
1317

1418

1519
def has_docker() -> bool:
@@ -114,3 +118,57 @@ def test_push_images(self) -> None:
114118
def test_push_images_empty(self) -> None:
115119
workspace = DockerWorkspace()
116120
workspace._push_images({})
121+
122+
def test_dockerignore(self) -> None:
123+
fs = fsspec.filesystem("memory")
124+
files = [
125+
"dockerignore/ignoredir/bar",
126+
"dockerignore/dir1/bar",
127+
"dockerignore/dir/ignorefileglob1",
128+
"dockerignore/dir/recursive/ignorefileglob2",
129+
"dockerignore/dir/ignorefile",
130+
"dockerignore/ignorefile",
131+
"dockerignore/ignorefilesuffix",
132+
"dockerignore/dir/file",
133+
"dockerignore/foo.sh",
134+
"dockerignore/unignore",
135+
]
136+
for file in files:
137+
fs.touch(file)
138+
with fs.open("dockerignore/.dockerignore", "wt") as f:
139+
f.write(
140+
"""
141+
# comment
142+
143+
# dirs/files
144+
ignoredir
145+
ignorefile
146+
147+
# globs
148+
*/ignorefileglo*1
149+
**/ignorefileglob2
150+
dir?
151+
152+
# inverse patterns
153+
unignore
154+
!unignore
155+
156+
# ignore .
157+
.
158+
"""
159+
)
160+
161+
with _build_context("img", "memory://dockerignore") as f:
162+
with tarfile.open(fileobj=f, mode="r") as tf:
163+
self.assertCountEqual(
164+
tf.getnames(),
165+
{
166+
"Dockerfile",
167+
"foo.sh",
168+
".dockerignore",
169+
"dir/ignorefile",
170+
"ignorefilesuffix",
171+
"dir/file",
172+
"unignore",
173+
},
174+
)

0 commit comments

Comments
 (0)