From 0eae8dc0c4b1c6e18d6a34389a318fe72c077e4c Mon Sep 17 00:00:00 2001 From: Tim Head Date: Tue, 25 Jun 2019 13:42:46 +0200 Subject: [PATCH 1/3] Install R packages before copying repo contents --- repo2docker/buildpacks/r.py | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/repo2docker/buildpacks/r.py b/repo2docker/buildpacks/r.py index 18bdef06..e7879dc8 100644 --- a/repo2docker/buildpacks/r.py +++ b/repo2docker/buildpacks/r.py @@ -291,15 +291,38 @@ class RBuildPack(PythonBuildPack): return super().get_build_scripts() + scripts - def get_assemble_scripts(self): - """ - Return series of build-steps specific to this repository. - """ - assemble_scripts = super().get_assemble_scripts() + def get_preassemble_scripts(self): + """Install contents of install.R""" + scripts = [] installR_path = self.binder_path("install.R") if os.path.exists(installR_path): - assemble_scripts += [("${NB_USER}", "Rscript %s" % installR_path)] + packages = [] + with open(installR_path) as f: + for line in f: + line = line.strip() + # skip commented out lines + if line.startswith("#"): + continue + else: + # XXX This needs a better solution for detecting which + # XXX kind of string quoting the user used/how to + # XXX escape quotes... + # using " as quote + if '"' in line: + packages.append("-e '{}'".format(line)) + # using ' as quote or no quotes + else: + packages.append('-e "{}"'.format(line)) + + package_expressions = " \\ \n".join(sorted(packages)) + scripts += [("${NB_USER}", "R --quiet %s" % package_expressions)] + + return super().get_preassemble_scripts() + scripts + + def get_assemble_scripts(self): + """Install the repository itself as a R package""" + assemble_scripts = super().get_assemble_scripts() description_R = "DESCRIPTION" if not self.binder_dir and os.path.exists(description_R): From 66227f85b497cd5ef412e762afbf5c6016fb9161 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Sat, 29 Jun 2019 11:52:01 +0200 Subject: [PATCH 2/3] Detect failure of pre-assembly in R buildpack If the pre-assembly step succeeds we do not re-run it after copying the repository contents. --- repo2docker/buildpacks/base.py | 31 +++++++++++++++--- repo2docker/buildpacks/r.py | 58 +++++++++++++++++++++------------- 2 files changed, 62 insertions(+), 27 deletions(-) diff --git a/repo2docker/buildpacks/base.py b/repo2docker/buildpacks/base.py index 5f3e4305..d78be4f3 100644 --- a/repo2docker/buildpacks/base.py +++ b/repo2docker/buildpacks/base.py @@ -125,7 +125,14 @@ ENV {{item[0]}} {{item[1]}} # of the repository but don't access any files in the repository. By executing # them before copying the repository itself we can cache these steps. For # example installing APT packages. -{% for sd in pre_assemble_script_directives -%} +{% if preassemble_script_files -%} +# If scripts required during build are present, copy them +{% for src, dst in preassemble_script_files.items() %} +COPY src/{{ src }} ${REPO_DIR}/{{ dst }} +{% endfor -%} +{% endif -%} + +{% for sd in preassemble_script_directives -%} {{ sd }} {% endfor %} @@ -379,6 +386,19 @@ class BuildPack: return [] + def get_preassemble_script_files(self): + """ + Dict of files to be copied to the container image for use in preassembly. + + This is copied before the `build_scripts`, `preassemble_scripts` and + `assemble_scripts` are run, so can be executed from either of them. + + It's a dictionary where the key is the source file path in the + repository and the value is the destination file path inside the + repository in the container. + """ + return {} + def get_preassemble_scripts(self): """ Ordered list of shell snippets to build an image for this repository. @@ -499,13 +519,13 @@ class BuildPack: "RUN {}".format(textwrap.dedent(script.strip("\n"))) ) - pre_assemble_script_directives = [] + preassemble_script_directives = [] last_user = "root" for user, script in self.get_preassemble_scripts(): if last_user != user: - pre_assemble_script_directives.append("USER {}".format(user)) + preassemble_script_directives.append("USER {}".format(user)) last_user = user - pre_assemble_script_directives.append( + preassemble_script_directives.append( "RUN {}".format(textwrap.dedent(script.strip("\n"))) ) @@ -516,7 +536,8 @@ class BuildPack: env=self.get_env(), labels=self.get_labels(), build_script_directives=build_script_directives, - pre_assemble_script_directives=pre_assemble_script_directives, + preassemble_script_files=self.get_preassemble_script_files(), + preassemble_script_directives=preassemble_script_directives, assemble_script_directives=assemble_script_directives, build_script_files=self.get_build_script_files(), base_packages=sorted(self.get_base_packages()), diff --git a/repo2docker/buildpacks/r.py b/repo2docker/buildpacks/r.py index e7879dc8..12207112 100644 --- a/repo2docker/buildpacks/r.py +++ b/repo2docker/buildpacks/r.py @@ -291,39 +291,53 @@ class RBuildPack(PythonBuildPack): return super().get_build_scripts() + scripts + def get_preassemble_script_files(self): + files = {} + installR_path = self.binder_path("install.R") + if os.path.exists(installR_path): + files[installR_path] = installR_path + + return files + def get_preassemble_scripts(self): - """Install contents of install.R""" + """Install contents of install.R + + Attempt to execute `install.R` before copying the contents of the + repository. We speculate that most of the time we do not need access. + In case this fails we re-run it after copying the repository contents. + + The advantage of executing it before copying is that minor edits to the + repository content will not trigger a re-install making things faster. + """ scripts = [] installR_path = self.binder_path("install.R") if os.path.exists(installR_path): - packages = [] - with open(installR_path) as f: - for line in f: - line = line.strip() - # skip commented out lines - if line.startswith("#"): - continue - else: - # XXX This needs a better solution for detecting which - # XXX kind of string quoting the user used/how to - # XXX escape quotes... - # using " as quote - if '"' in line: - packages.append("-e '{}'".format(line)) - # using ' as quote or no quotes - else: - packages.append('-e "{}"'.format(line)) - - package_expressions = " \\ \n".join(sorted(packages)) - scripts += [("${NB_USER}", "R --quiet %s" % package_expressions)] + scripts += [ + ( + "${NB_USER}", + "Rscript %s && touch /tmp/.preassembled || true" % installR_path, + ) + ] return super().get_preassemble_scripts() + scripts def get_assemble_scripts(self): - """Install the repository itself as a R package""" + """Install the dependencies of or the repository itself""" assemble_scripts = super().get_assemble_scripts() + installR_path = self.binder_path("install.R") + if os.path.exists(installR_path): + assemble_scripts += [ + ( + "${NB_USER}", + # only run install.R if the pre-assembly failed + "if [ ! -f /tmp/.preassembled ]; then Rscript {}; fi".format( + installR_path + ), + ) + ] + description_R = "DESCRIPTION" if not self.binder_dir and os.path.exists(description_R): assemble_scripts += [ From 09b84811aeb6eadbe7f25dfe2a753b5682b6baf0 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Tue, 9 Jul 2019 17:59:14 +0200 Subject: [PATCH 3/3] Make sure all loops over dict items are deterministic --- repo2docker/buildpacks/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/repo2docker/buildpacks/base.py b/repo2docker/buildpacks/base.py index d78be4f3..0bb9703b 100644 --- a/repo2docker/buildpacks/base.py +++ b/repo2docker/buildpacks/base.py @@ -90,7 +90,7 @@ ENV PATH {{ ':'.join(path) }}:${PATH} {% if build_script_files -%} # If scripts required during build are present, copy them -{% for src, dst in build_script_files.items() %} +{% for src, dst in build_script_files|dictsort %} COPY {{ src }} {{ dst }} {% endfor -%} {% endif -%} @@ -127,7 +127,7 @@ ENV {{item[0]}} {{item[1]}} # example installing APT packages. {% if preassemble_script_files -%} # If scripts required during build are present, copy them -{% for src, dst in preassemble_script_files.items() %} +{% for src, dst in preassemble_script_files|dictsort %} COPY src/{{ src }} ${REPO_DIR}/{{ dst }} {% endfor -%} {% endif -%} @@ -151,7 +151,7 @@ RUN chown -R ${NB_USER}:${NB_USER} ${REPO_DIR} # Container image Labels! # Put these at the end, since we don't want to rebuild everything # when these change! Did I mention I hate Dockerfile cache semantics? -{% for k, v in labels.items() %} +{% for k, v in labels|dictsort %} LABEL {{k}}="{{v}}" {%- endfor %}