From 6b8b2334e7530925a7d82eb386d80c1edee1d653 Mon Sep 17 00:00:00 2001 From: Samuel Gaist Date: Wed, 26 Oct 2022 15:11:04 +0200 Subject: [PATCH 1/6] feat: implement support for dockerignore and containerignore Currently repo2docker creates a context object that includes the whole content of the repository it builds an image for. Thus it includes folders like .git which is usually something that has no interest in the final image, can take quite a lot of space and most importantly, kills the caching of that layer. This patch adds support for reading dockerignore and containerignore files that are used to ensure only the relevant data are used to build the image. By default it also excludes the .git folder if neither of these files are provided. --- repo2docker/buildpacks/base.py | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/repo2docker/buildpacks/base.py b/repo2docker/buildpacks/base.py index 9ed622cb..9d164ff1 100644 --- a/repo2docker/buildpacks/base.py +++ b/repo2docker/buildpacks/base.py @@ -12,6 +12,8 @@ from functools import lru_cache import escapism import jinja2 +from docker.utils.build import exclude_paths + # Only use syntax features supported by Docker 17.09 TEMPLATE = r""" FROM {{base_image}} @@ -590,16 +592,16 @@ class BuildPack: tar.addfile(dockerfile_tarinfo, io.BytesIO(dockerfile)) - def _filter_tar(tar): + def _filter_tar(tarinfo): # We need to unset these for build_script_files we copy into tar # Otherwise they seem to vary each time, preventing effective use # of the cache! # https://github.com/docker/docker-py/pull/1582 is related - tar.uname = "" - tar.gname = "" - tar.uid = int(build_args.get("NB_UID", DEFAULT_NB_UID)) - tar.gid = int(build_args.get("NB_UID", DEFAULT_NB_UID)) - return tar + tarinfo.uname = "" + tarinfo.gname = "" + tarinfo.uid = int(build_args.get("NB_UID", DEFAULT_NB_UID)) + tarinfo.gid = int(build_args.get("NB_UID", DEFAULT_NB_UID)) + return tarinfo for src in sorted(self.get_build_script_files()): dest_path, src_path = self.generate_build_context_filename(src) @@ -608,7 +610,25 @@ class BuildPack: for fname in ("repo2docker-entrypoint", "python3-login"): tar.add(os.path.join(HERE, fname), fname, filter=_filter_tar) - tar.add(".", "src/", filter=_filter_tar) + exclude = [] + + for ignore_file in [".dockerignore", ".containerignore"]: + if os.path.exists(ignore_file): + with open(ignore_file, "r") as f: + exclude.extend( + list( + filter( + lambda x: x != "" and x[0] != "#", + [l.strip() for l in f.read().splitlines()], + ) + ) + ) + + if not exclude: + exclude = ["**/.git"] + + for item in exclude_paths(".", exclude): + tar.add(item, f"src/{item}", filter=_filter_tar) tar.close() tarf.seek(0) From 3028d0718200f3ce55103cbb5b1bb4f9cc674393 Mon Sep 17 00:00:00 2001 From: Samuel Gaist Date: Wed, 26 Oct 2022 17:19:12 +0200 Subject: [PATCH 2/6] fix: create src directory in any case The original behavior was to create an src directory with the content of the repository. The creation would happen in any case (remote or local repository). With the filtering in place and the default to remove the .git folder, it breaks the build as the src folder can be missing. This patch ensures that the directory is present in the tar so the build can continue as it did until now. --- repo2docker/buildpacks/base.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/repo2docker/buildpacks/base.py b/repo2docker/buildpacks/base.py index 9d164ff1..af4e7129 100644 --- a/repo2docker/buildpacks/base.py +++ b/repo2docker/buildpacks/base.py @@ -627,8 +627,17 @@ class BuildPack: if not exclude: exclude = ["**/.git"] - for item in exclude_paths(".", exclude): - tar.add(item, f"src/{item}", filter=_filter_tar) + files_to_add = exclude_paths(".", exclude) + + if files_to_add: + for item in files_to_add: + tar.add(item, f"src/{item}", filter=_filter_tar) + else: + # Either the source was empty or everything was filtered out. + # In any case, create an src dir so the build can proceed. + src = tarfile.TarInfo("src") + src.type = tarfile.DIRTYPE + tar.addfile(src) tar.close() tarf.seek(0) From d6670f4378127c15af8176a4a9b8b0c35ee2350f Mon Sep 17 00:00:00 2001 From: Samuel Gaist Date: Wed, 26 Oct 2022 17:59:56 +0200 Subject: [PATCH 3/6] refactor: use list comprehension rather filter+lambda --- repo2docker/buildpacks/base.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/repo2docker/buildpacks/base.py b/repo2docker/buildpacks/base.py index af4e7129..aa469ee0 100644 --- a/repo2docker/buildpacks/base.py +++ b/repo2docker/buildpacks/base.py @@ -612,16 +612,18 @@ class BuildPack: exclude = [] - for ignore_file in [".dockerignore", ".containerignore"]: - if os.path.exists(ignore_file): - with open(ignore_file, "r") as f: + for ignore_file_name in [".dockerignore", ".containerignore"]: + if os.path.exists(ignore_file_name): + with open(ignore_file_name, "r") as ignore_file: + cleaned_lines = [ + line.strip() for line in ignore_file.read().splitlines() + ] exclude.extend( - list( - filter( - lambda x: x != "" and x[0] != "#", - [l.strip() for l in f.read().splitlines()], - ) - ) + [ + line + for line in cleaned_lines + if line != "" and line[0] != "#" + ] ) if not exclude: From a51235f9a5e87763b1305d739de8a57b26b9df94 Mon Sep 17 00:00:00 2001 From: Samuel Gaist Date: Wed, 13 Dec 2023 17:56:03 +0100 Subject: [PATCH 4/6] fix: remove the ignore .git folder default when no exclude files are present --- repo2docker/buildpacks/base.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/repo2docker/buildpacks/base.py b/repo2docker/buildpacks/base.py index aa469ee0..06f52224 100644 --- a/repo2docker/buildpacks/base.py +++ b/repo2docker/buildpacks/base.py @@ -11,7 +11,6 @@ from functools import lru_cache import escapism import jinja2 - from docker.utils.build import exclude_paths # Only use syntax features supported by Docker 17.09 @@ -614,7 +613,7 @@ class BuildPack: for ignore_file_name in [".dockerignore", ".containerignore"]: if os.path.exists(ignore_file_name): - with open(ignore_file_name, "r") as ignore_file: + with open(ignore_file_name) as ignore_file: cleaned_lines = [ line.strip() for line in ignore_file.read().splitlines() ] @@ -626,9 +625,6 @@ class BuildPack: ] ) - if not exclude: - exclude = ["**/.git"] - files_to_add = exclude_paths(".", exclude) if files_to_add: From 09e503c4e6a5d60d1bfaf0b62b7dffe9bd5472e6 Mon Sep 17 00:00:00 2001 From: Samuel Gaist Date: Fri, 19 Jan 2024 09:36:52 +0100 Subject: [PATCH 5/6] feat: respect the root folder configured when looking for ignore files See the design chapter for more details. --- repo2docker/buildpacks/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/repo2docker/buildpacks/base.py b/repo2docker/buildpacks/base.py index 06f52224..f8dcf758 100644 --- a/repo2docker/buildpacks/base.py +++ b/repo2docker/buildpacks/base.py @@ -612,6 +612,7 @@ class BuildPack: exclude = [] for ignore_file_name in [".dockerignore", ".containerignore"]: + ignore_file_name = self.binder_path(ignore_file_name) if os.path.exists(ignore_file_name): with open(ignore_file_name) as ignore_file: cleaned_lines = [ From 77df191536f12d73ebba134982959f6d1e2c0a3e Mon Sep 17 00:00:00 2001 From: Samuel Gaist Date: Fri, 19 Jan 2024 09:40:49 +0100 Subject: [PATCH 6/6] test(venv): add ignore files above binderdir This ensures that the ignore files are retrieved from the proper folder. If they weren't the build would not succeed as the binder folder is ignored. --- tests/venv/binder-dir/.containerignore | 1 + tests/venv/binder-dir/.dockerignore | 1 + 2 files changed, 2 insertions(+) create mode 100644 tests/venv/binder-dir/.containerignore create mode 100644 tests/venv/binder-dir/.dockerignore diff --git a/tests/venv/binder-dir/.containerignore b/tests/venv/binder-dir/.containerignore new file mode 100644 index 00000000..64bb5d21 --- /dev/null +++ b/tests/venv/binder-dir/.containerignore @@ -0,0 +1 @@ +binder/ diff --git a/tests/venv/binder-dir/.dockerignore b/tests/venv/binder-dir/.dockerignore new file mode 100644 index 00000000..64bb5d21 --- /dev/null +++ b/tests/venv/binder-dir/.dockerignore @@ -0,0 +1 @@ +binder/