From 10ca25d9862f1ab78f476ffa952f8a179979d6c0 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Tue, 25 Jun 2019 12:22:25 +0200 Subject: [PATCH 1/2] Install APT packages before copying the repo contents --- repo2docker/buildpacks/base.py | 64 +++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 12 deletions(-) diff --git a/repo2docker/buildpacks/base.py b/repo2docker/buildpacks/base.py index 5383bb90..5f3e4305 100644 --- a/repo2docker/buildpacks/base.py +++ b/repo2docker/buildpacks/base.py @@ -114,12 +114,6 @@ WORKDIR ${REPO_DIR} # installs. See https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html ENV PATH ${HOME}/.local/bin:${REPO_DIR}/.local/bin:${PATH} -# Copy and chown stuff. This doubles the size of the repo, because -# you can't actually copy as USER, only as root! Thanks, Docker! -USER root -COPY src/ ${REPO_DIR} -RUN chown -R ${NB_USER}:${NB_USER} ${REPO_DIR} - {% if env -%} # The rest of the environment {% for item in env -%} @@ -127,9 +121,22 @@ ENV {{item[0]}} {{item[1]}} {% endfor -%} {% endif -%} +# Run pre-assemble scripts! These are instructions that depend on the content +# of the repository but don't access any files in the repository. By executing +# them before copying the repository itself we can cache these steps. For +# example installing APT packages. +{% for sd in pre_assemble_script_directives -%} +{{ sd }} +{% endfor %} -# Run assemble scripts! These will actually build the specification -# in the repository into the image. +# Copy and chown stuff. This doubles the size of the repo, because +# you can't actually copy as USER, only as root! Thanks, Docker! +USER root +COPY src/ ${REPO_DIR} +RUN chown -R ${NB_USER}:${NB_USER} ${REPO_DIR} + +# Run assemble scripts! These will actually turn the specification +# in the repository into an image. {% for sd in assemble_script_directives -%} {{ sd }} {% endfor %} @@ -372,6 +379,22 @@ class BuildPack: return [] + def get_preassemble_scripts(self): + """ + Ordered list of shell snippets to build an image for this repository. + + A list of tuples, where the first item is a username & the + second is a single logical line of a bash script that should + be RUN as that user. + + These are run before the source of the repository is copied into + the container image. These should be the scripts that depend on the + repository but do not need access to the contents. + + For example the list of APT packages to install. + """ + return [] + def get_assemble_scripts(self): """ Ordered list of shell script snippets to build the repo into the image. @@ -476,6 +499,16 @@ class BuildPack: "RUN {}".format(textwrap.dedent(script.strip("\n"))) ) + pre_assemble_script_directives = [] + last_user = "root" + for user, script in self.get_preassemble_scripts(): + if last_user != user: + pre_assemble_script_directives.append("USER {}".format(user)) + last_user = user + pre_assemble_script_directives.append( + "RUN {}".format(textwrap.dedent(script.strip("\n"))) + ) + return t.render( packages=sorted(self.get_packages()), path=self.get_path(), @@ -483,6 +516,7 @@ class BuildPack: env=self.get_env(), labels=self.get_labels(), build_script_directives=build_script_directives, + pre_assemble_script_directives=pre_assemble_script_directives, assemble_script_directives=assemble_script_directives, build_script_files=self.get_build_script_files(), base_packages=sorted(self.get_base_packages()), @@ -618,8 +652,8 @@ class BaseImage(BuildPack): def detect(self): return True - def get_assemble_scripts(self): - assemble_scripts = [] + def get_preassemble_scripts(self): + scripts = [] try: with open(self.binder_path("apt.txt")) as f: extra_apt_packages = [] @@ -637,7 +671,7 @@ class BaseImage(BuildPack): ) extra_apt_packages.append(package) - assemble_scripts.append( + scripts.append( ( "root", # This apt-get install is *not* quiet, since users explicitly asked for this @@ -648,12 +682,18 @@ class BaseImage(BuildPack): apt-get -qq clean && \ rm -rf /var/lib/apt/lists/* """.format( - " ".join(extra_apt_packages) + " ".join(sorted(extra_apt_packages)) ), ) ) + except FileNotFoundError: pass + + return scripts + + def get_assemble_scripts(self): + assemble_scripts = [] if "py" in self.stencila_contexts: assemble_scripts.extend( [ From 41da67490f295f810310c30b1bd4c1bd2412ffc8 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Tue, 25 Jun 2019 13:15:45 +0200 Subject: [PATCH 2/2] Move more R config to the build phase --- repo2docker/buildpacks/r.py | 63 +++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/repo2docker/buildpacks/r.py b/repo2docker/buildpacks/r.py index 20b03318..18bdef06 100644 --- a/repo2docker/buildpacks/r.py +++ b/repo2docker/buildpacks/r.py @@ -158,6 +158,9 @@ class RBuildPack(PythonBuildPack): - IRKernel - nbrsessionproxy (to access RStudio via Jupyter Notebook) - stencila R package (if Stencila document with R code chunks detected) + + We set the snapshot date used to install R libraries from based on the + contents of runtime.txt. """ rstudio_url = "https://download2.rstudio.org/rstudio-server-1.1.419-amd64.deb" # This is MD5, because that is what RStudio download page provides! @@ -173,6 +176,10 @@ class RBuildPack(PythonBuildPack): # IRKernel version - specified as a tag in the IRKernel repository irkernel_version = "0.8.11" + mran_url = "https://mran.microsoft.com/snapshot/{}".format( + self.checkpoint_date.isoformat() + ) + scripts = [ ( "root", @@ -241,11 +248,32 @@ class RBuildPack(PythonBuildPack): "${NB_USER}", # Install shiny library r""" - R --quiet -e "install.packages('shiny', repos='https://mran.microsoft.com/snapshot/{}', method='libcurl')" + R --quiet -e "install.packages('shiny', repos='{}', method='libcurl')" """.format( - self.checkpoint_date.isoformat() + mran_url ), ), + ( + "root", + # We set the default CRAN repo to the MRAN one at given date + # We set download method to be curl so we get HTTPS support + r""" + echo "options(repos = c(CRAN='{mran_url}'), download.file.method = 'libcurl')" > /etc/R/Rprofile.site + """.format( + mran_url=mran_url + ), + ), + ( + # Not all of these locations are configurable; so we make sure + # they exist and have the correct permissions + "root", + r""" + install -o ${NB_USER} -g ${NB_USER} -d /var/log/shiny-server && \ + install -o ${NB_USER} -g ${NB_USER} -d /var/lib/shiny-server && \ + install -o ${NB_USER} -g ${NB_USER} /dev/null /var/log/shiny-server.log && \ + install -o ${NB_USER} -g ${NB_USER} /dev/null /var/run/shiny-server.pid + """, + ), ] if "r" in self.stencila_contexts: @@ -265,36 +293,9 @@ class RBuildPack(PythonBuildPack): def get_assemble_scripts(self): """ - Return series of build-steps specific to this repository - - We set the snapshot date used to install R libraries from based on the - contents of runtime.txt, and run the `install.R` script if it exists. + Return series of build-steps specific to this repository. """ - mran_url = "https://mran.microsoft.com/snapshot/{}".format( - self.checkpoint_date.isoformat() - ) - assemble_scripts = super().get_assemble_scripts() + [ - ( - "root", - # We set the default CRAN repo to the MRAN one at given date - # We set download method to be curl so we get HTTPS support - r""" - echo "options(repos = c(CRAN='{mran_url}'), download.file.method = 'libcurl')" > /etc/R/Rprofile.site - """.format( - mran_url=mran_url - ), - ), - ( - # Not all of these locations are configurable; log_dir is - "root", - r""" - install -o ${NB_USER} -g ${NB_USER} -d /var/log/shiny-server && \ - install -o ${NB_USER} -g ${NB_USER} -d /var/lib/shiny-server && \ - install -o ${NB_USER} -g ${NB_USER} /dev/null /var/log/shiny-server.log && \ - install -o ${NB_USER} -g ${NB_USER} /dev/null /var/run/shiny-server.pid - """, - ), - ] + assemble_scripts = super().get_assemble_scripts() installR_path = self.binder_path("install.R") if os.path.exists(installR_path):