From d77bcb4ecd6f20319f68f9522de60d77ca893375 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Tue, 4 Jul 2017 10:20:07 -0700 Subject: [PATCH] Fix python2.7 buildpack to work properly --- repo2docker/detectors.py | 799 ++++++++++++++++++++++++++++++++------- s2i-builders/generate.py | 707 ---------------------------------- 2 files changed, 664 insertions(+), 842 deletions(-) delete mode 100644 s2i-builders/generate.py diff --git a/repo2docker/detectors.py b/repo2docker/detectors.py index 1b4797e7..c2f19378 100644 --- a/repo2docker/detectors.py +++ b/repo2docker/detectors.py @@ -1,60 +1,645 @@ -import os -import sys -import subprocess -from textwrap import dedent - -import docker -from docker.utils import kwargs_from_env - -from traitlets import Unicode, Dict, Bool +""" +Generates a variety of Dockerfiles based on an input matrix +""" +import textwrap from traitlets.config import LoggingConfigurable +from traitlets import Unicode, Set, List, Dict, Tuple, Bool, default +import jinja2 +import tarfile +import io +import os +import re +import json +import docker -import logging -from pythonjsonlogger import jsonlogger +TEMPLATE = r""" +FROM ubuntu:17.04 -from .utils import execute_cmd +# Set up locales properly +RUN apt-get update && \ + apt-get install --yes --no-install-recommends locales && \ + apt-get purge && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \ + locale-gen + +ENV LC_ALL en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US.UTF-8 + +# Use bash as default shell, rather than sh +ENV SHELL /bin/bash + +# Set up user +ENV NB_USER jovyan +ENV NB_UID 1000 +ENV HOME /home/${NB_USER} + +RUN adduser --disabled-password \ + --gecos "Default user" \ + --uid ${NB_UID} \ + ${NB_USER} +WORKDIR ${HOME} + +RUN apt-get update && \ + apt-get install --yes \ + {% for package in packages -%} + {{ package }} \ + {% endfor -%} + && apt-get purge && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +EXPOSE 8888 + +{% if env -%} +# Almost all environment variables +{% for item in env -%} +ENV {{item[0]}} {{item[1]}} +{% endfor -%} +{% endif -%} + +{% if path -%} +# Special case PATH +ENV PATH {{ ':'.join(path) }}:${PATH} +{% endif -%} + +{% if build_script_files -%} +# If scripts required during build are present, copy them +{% for src, dst in build_script_files.items() %} +COPY {{ src }} {{ dst }} +{% endfor -%} +{% endif -%} + +{% for sd in build_script_directives -%} +{{sd}} +{% endfor %} + +# Copy and chown stuff. This doubles the size of the repo, because +# you can't actually copy as USER, only as root! Thanks, Docker! +USER root +COPY src/ ${HOME} +RUN chown -R ${NB_USER}:${NB_USER} ${HOME} + +# Run assemble scripts! These will actually build the specification +# in the repository into the image. +{% for sd in assemble_script_directives -%} +{{ sd }} +{% endfor %} + +# Container image Labels! +# Put these at the end, since we don't want to rebuild everything +# when these change! Did I mention I hate Dockerfile cache semantics? +{% for k, v in labels.items() -%} +LABEL {{k}}={{v}} +{%- endfor %} + +# We always want containers to run as non-root +USER ${NB_USER} +""" -here = os.path.abspath(os.path.dirname(__file__)) class BuildPack(LoggingConfigurable): - name = Unicode() - capture = Bool(False, help="Capture output for logging") + """ + A composable BuildPack. - def detect(self, workdir): - """ - Return True if app in workdir can be built with this buildpack - """ - pass + Specifically used for creating Dockerfiles for use with repo2docker only. - def build(self, workdir, ref, output_image_spec): + Things that are kept constant: + - base image + - some environment variables (such as locale) + - user creation & ownership of home directory + - working directory + + Everything that is configurable is additive & deduplicative, + and there are *some* general guarantees of ordering. + + """ + packages = Set( + set(), + help=""" + List of packages that are installed in this BuildPack by default. + + Versions are not specified, and ordering is not guaranteed. These + are usually installed as apt packages. """ - Run a command that will take workdir and produce an image ready to be pushed + ) + env = List( + [], + help=""" + Ordered list of environment variables to be set for this image. + + Ordered so that environment variables can use other environment + variables in their values. + + Expects tuples, with the first item being the environment variable + name and the second item being the value. """ - pass + ) + + path = List( + [], + help=""" + Ordered list of file system paths to look for executables in. + + Just sets the PATH environment variable. Separated out since + it is very commonly set by various buildpacks. + """ + ) + + labels = Dict( + {}, + help=""" + Docker labels to set on the built image. + """ + ) + + build_script_files = Dict( + {}, + help=""" + List of files to be copied to the container image for use in building. + + This is copied before the `build_scripts` & `assemble_scripts` are + run, so can be executed from either of them. + + It's a dictionary where the key is the source file path in the host + system, and the value is the destination file path inside the + container image. + """ + ) + + build_scripts = List( + [], + help=""" + Ordered list of shell script snippets to build the base image. + + A list of tuples, where the first item is a username & the + second is a single logical line of a bash script that should + be RUN as that user. + + These are run before the source of the repository is copied + into the container image, and hence can not reference stuff + from the repository. When the build scripts are done, the + container image should be in a state where it is generically + re-useable for building various other repositories with + similar environments. + + You can use environment variable substitutions in both the + username and the execution script. + """ + ) + + assemble_scripts = List( + [], + help=""" + Ordered list of shell script snippets to build the repo into the image. + + A list of tuples, where the first item is a username & the + second is a single logical line of a bash script that should + be RUN as that user. + + These are run after the source of the repository is copied into + the container image (into the current directory). These should be + the scripts that actually build the repository into the container + image. + + If this needs to be dynamically determined (based on the presence + or absence of certain files, for example), you can create any + method and decorate it with `traitlets.default('assemble_scripts)` + and the return value of this method is used as the value of + assemble_scripts. You can expect that the script is running in + the current directory of the repository being built when doing + dynamic detection. + + You can use environment variable substitutions in both the + username and the execution script. + """ + ) + + name = Unicode( + help=""" + Name of the BuildPack! + """ + ) + + components = Tuple(()) + + def compose_with(self, other): + """ + Compose this BuildPack with another, returning a new one + + Ordering does matter - the properties of the current BuildPack take + precedence (wherever that matters) over the properties of other + BuildPack. If there are any conflicts, this method is responsible + for resolving them. + """ + result = BuildPack(parent=self) + labels = {} + labels.update(self.labels) + labels.update(other.labels) + result.labels = labels + result.packages = self.packages.union(other.packages) + result.path = self.path + other.path + # FIXME: Deduplicate Env + result.env = self.env + other.env + result.build_scripts = self.build_scripts + other.build_scripts + result.assemble_scripts = self.assemble_scripts + other.assemble_scripts + + build_script_files = {} + build_script_files.update(self.build_script_files) + build_script_files.update(other.build_script_files) + result.build_script_files = build_script_files + + result.name = "{}-{}".format(self.name, other.name) + + result.components = (self, ) + self.components + (other, ) + other.components + return result + + def detect(self): + return all([p.detect() for p in self.components]) + + def render(self): + """ + Render BuildPack into Dockerfile + """ + t = jinja2.Template(TEMPLATE) + + build_script_directives = [] + last_user = 'root' + for user, script in self.build_scripts: + if last_user != user: + build_script_directives.append("USER {}".format(user)) + last_user = user + build_script_directives.append("RUN {}".format( + textwrap.dedent(script.strip('\n')) + )) + + assemble_script_directives = [] + last_user = 'root' + for user, script in self.assemble_scripts: + if last_user != user: + build_script_directives.append("USER {}".format(user)) + last_user = user + assemble_script_directives.append("RUN {}".format( + textwrap.dedent(script.strip('\n')) + )) + + return t.render( + packages=sorted(self.packages), + path=self.path, + env=self.env, + labels=self.labels, + build_script_directives=build_script_directives, + assemble_script_directives=assemble_script_directives, + build_script_files=self.build_script_files + ) + + def build(self, image_spec): + tarf = io.BytesIO() + tar = tarfile.open(fileobj=tarf, mode='x') + dockerfile_tarinfo = tarfile.TarInfo("Dockerfile") + dockerfile = self.render().encode('utf-8') + dockerfile_tarinfo.size = len(dockerfile) + + tar.addfile( + dockerfile_tarinfo, + io.BytesIO(dockerfile) + ) + + for src in self.build_script_files: + tar.add(src) + + tar.add('.', 'src/') + + tar.close() + tarf.seek(0) + + client = docker.APIClient(version='auto', **docker.utils.kwargs_from_env()) + for line in client.build( + fileobj=tarf, + tag=image_spec, + custom_context=True, + decode=True + ): + if 'stream' in line: + print(line['stream'], end='') + elif 'error' in line: + print(line['error'], end='') + break + else: + raise ValueError("Unexpected return from docker builder: {}".format(json.dumps(line))) + else: + print("Built image", image_spec) + + +class BaseImage(BuildPack): + name = "repo2docker" + version = "0.1" + packages = { + # Utils! + "git", + "tar", + "curl", + "wget", + "less", + # Build tools + "build-essential", + "pkg-config", + } + + labels = { + "io.openshift.s2i.scripts-url": "image:///usr/libexec/s2i" + } + + env = [ + ("APP_BASE", "/srv") + ] + + def detect(self): + return True + + @default('assemble_scripts') + def setup_assembly(self): + assemble_scripts = [] + try: + with open('apt.txt') as f: + extra_apt_packages = [l.strip() for l in f] + # Validate that this is, indeed, just a list of packages + # We're doing shell injection around here, gotta be careful. + # FIXME: Add support for specifying version numbers + for p in extra_apt_packages: + if not re.match(r"^[a-z0-9.+-]+", p): + raise ValueError("Found invalid package name {} in apt.txt".format(p)) + + assemble_scripts.append(( + 'root', + r""" + apt-get update && \ + apt-get install --yes --no-install-recommends {} && \ + apt-get purge && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + """.format(' '.join(extra_apt_packages)) + )) + except FileNotFoundError: + pass + return assemble_scripts + + +class PythonBuildPack(BuildPack): + name = "python3.5" + version = "0.1" + + packages = { + 'python3', + 'python3-venv', + 'python3-dev', + } + + env = [ + ("VENV_PATH", "${APP_BASE}/venv"), + # Prefix to use for installing kernels and finding jupyter binary + ("NB_PYTHON_PREFIX", "${VENV_PATH}"), + ] + + path = [ + "${VENV_PATH}/bin" + ] + + build_scripts = [ + ( + "root", + r""" + mkdir -p ${VENV_PATH} && \ + chown -R ${NB_USER}:${NB_USER} ${VENV_PATH} + """ + ), + ( + "${NB_USER}", + r""" + python3 -m venv ${VENV_PATH} + """ + ), + ( + "${NB_USER}", + r""" + pip install --no-cache-dir \ + notebook==5.0.0 \ + jupyterhub==0.7.2 \ + ipywidgets==6.0.0 \ + jupyterlab==0.24.1 \ + nbgoogleanalytics==0.1.0 && \ + jupyter nbextension enable --py widgetsnbextension --sys-prefix && \ + jupyter serverextension enable --py jupyterlab --sys-prefix && \ + jupyter nbextension install --py nbgoogleanalytics --sys-prefix && \ + jupyter nbextension enable --py nbgoogleanalytics --sys-prefix && \ + jupyter serverextension enable --py nbgoogleanalytics --sys-prefix + """ + ) + ] + + @default('assemble_scripts') + def setup_assembly(self): + # If we have a runtime.txt & that's set to python-2.7, + # we will *not* install requirements.txt but will find & + # install a requirements3.txt file if it exists. + # This way, when using python2 venv, requirements.txt will + # be installed in the python2 venv, and requirements3.txt + # will be installed in python3 venv. This is less of a + # surprise than requiring python2 to be requirements2.txt tho. + try: + with open('runtime.txt') as f: + runtime = f.read().strip() + except FileNotFoundError: + runtime = 'python-3.5' + if runtime == 'python-2.7': + requirements_file = 'requirements3.txt' + else: + requirements_file = 'requirements.txt' + if os.path.exists(requirements_file): + return [( + '${NB_USER}', + 'pip3 install --no-cache-dir -r {}'.format(requirements_file) + )] + return [] + + def detect(self): + return os.path.exists('requirements.txt') and super() + +class CondaBuildPack(BuildPack): + name = "conda" + version = "0.1" + env = [ + ('CONDA_DIR', '${APP_BASE}/conda'), + ('NB_PYTHON_PREFIX', '${CONDA_DIR}') + ] + + path = ['${CONDA_DIR}/bin'] + + build_script_files = { + 'conda/install-miniconda.bash': '/tmp/install-miniconda.bash', + 'conda/environment.yml': '/tmp/environment.yml' + } + + build_scripts = [ + ( + "root", + r""" + bash /tmp/install-miniconda.bash && \ + rm /tmp/install-miniconda.bash /tmp/environment.yml + """ + ) + ] + + @default('assemble_scripts') + def setup_assembly(self): + assembly_scripts = [] + if os.path.exists('environment.yml'): + assembly_scripts.append(( + '${NB_USER}', + r""" + conda env update -n root -f environment.yml && \ + conda clean -tipsy + """ + )) + return assembly_scripts + + def detect(self): + return os.path.exists('environment.yml') and super().detect() + + +class Python2BuildPack(BuildPack): + name = "python2.7" + version = "0.1" + + packages = { + 'python', + 'python-dev', + 'virtualenv' + } + + env = [ + ('VENV2_PATH', '${APP_BASE}/venv2') + ] + + path = [ + "${VENV2_PATH}/bin" + ] + + build_scripts = [ + ( + "root", + r""" + mkdir -p ${VENV2_PATH} && \ + chown -R ${NB_USER}:${NB_USER} ${VENV2_PATH} + """ + ), + ( + "${NB_USER}", + r""" + virtualenv -p python2 ${VENV2_PATH} + """ + ), + ( + "${NB_USER}", + r""" + pip2 install --no-cache-dir \ + ipykernel==4.6.1 && \ + python2 -m ipykernel install --prefix=${NB_PYTHON_PREFIX} + """ + ) + ] + + @default('assemble_scripts') + def setup_assembly(self): + return [ + ( + '${NB_USER}', + 'pip2 install --no-cache-dir -r requirements.txt' + ) + ] + + def detect(self): + if os.path.exists('requirements.txt'): + try: + with open('runtime.txt') as f: + runtime = f.read().strip() + if runtime == 'python-2.7': + return True + except FileNotFoundError: + return False + return False + +class JuliaBuildPack(BuildPack): + name = "julia" + version = "0.1" + env = [ + ('JULIA_PATH', '${APP_BASE}/julia'), + ('JULIA_HOME', '${JULIA_PATH}/bin'), + ('JULIA_PKGDIR', '${JULIA_PATH}/pkg'), + ('JULIA_VERSION', '0.6.0'), + ('JUPYTER', '${NB_PYTHON_PREFIX}/bin/jupyter') + ] + + path = [ + '${JULIA_PATH}/bin' + ] + + build_scripts = [ + ( + "root", + r""" + mkdir -p ${JULIA_PATH} && \ + curl -sSL "https://julialang-s3.julialang.org/bin/linux/x64/${JULIA_VERSION%[.-]*}/julia-${JULIA_VERSION}-linux-x86_64.tar.gz" | tar -xz -C ${JULIA_PATH} --strip-components 1 + """ + ), + ( + "root", + r""" + mkdir -p ${JULIA_PKGDIR} && \ + chown ${NB_USER}:${NB_USER} ${JULIA_PKGDIR} + """ + ), + ( + "${NB_USER}", + # HACK: Can't seem to tell IJulia to install in sys-prefix + # FIXME: Find way to get it to install under /srv and not $HOME? + r""" + julia -e 'Pkg.init(); Pkg.add("IJulia")' && \ + mv ${HOME}/.local/share/jupyter/kernels/julia-0.6 ${NB_PYTHON_PREFIX}/share/jupyter/kernels/julia-0.6 + """ + ) + ] + + @default('assemble_scripts') + def setup_assembly(self): + return [( + "${NB_USER}", + r""" + cat REQUIRE >> ${JULIA_PKGDIR}/v0.6/REQUIRE && \ + julia -e "Pkg.resolve()" + """ + )] + + def detect(self): + return os.path.exists('REQUIRE') and super() class DockerBuildPack(BuildPack): - name = Unicode('Dockerfile') - def detect(self, workdir): - return os.path.exists(os.path.join(workdir, 'Dockerfile')) + name = "Dockerfile" - def build(self, workdir, ref, output_image_spec): - client = docker.APIClient(version='auto', **kwargs_from_env()) - for progress in client.build( - path=workdir, - tag=output_image_spec, - decode=True - ): - if 'stream' in progress: - if self.capture: - self.log.info(progress['stream'], extra=dict(phase='building')) - else: - sys.stdout.write(progress['stream']) + def detect(self): + return os.path.exists('Dockerfile') + def render(self): + with open('Dockerfile') as f: + return f.read() class LegacyBinderDockerBuildPack(DockerBuildPack): - name = Unicode('Legacy Binder Dockerfile') + name = 'Legacy Binder Dockerfile' dockerfile_appendix = Unicode(dedent(r""" USER root @@ -74,104 +659,48 @@ class LegacyBinderDockerBuildPack(DockerBuildPack): CMD jupyter notebook --ip 0.0.0.0 """), config=True) + def render(self): + with open('Dockerfile') as f: + return f.read() + self.dockerfile_appendix + def detect(self, workdir): - dockerfile = os.path.join(workdir, 'Dockerfile') - if not os.path.exists(dockerfile): - return False - with open(dockerfile, 'r') as f: - for line in f: - if line.startswith('FROM'): - if 'andrewosh/binder-base' in line.split('#')[0].lower(): - self.amend_dockerfile(dockerfile) - return True - else: - return False - # No FROM?! + try: + with open('Dockerfile', 'r') as f: + for line in f: + if line.startswith('FROM'): + if 'andrewosh/binder-base' in line.split('#')[0].lower(): + return True + else: + return False + except FileNotFoundError: + pass + return False - def amend_dockerfile(self, dockerfile): - with open(dockerfile, 'a') as f: - f.write(self.dockerfile_appendix) +def c(*args): + image = args[0]() + for arg in args[1:]: + image = image.compose_with(arg()) + return image + +def main(): + images = [ + LegacyBinderDockerBuildPack(), + DockerBuildPack(), + + c(BaseImage, CondaBuildPack, JuliaBuildPack), + c(BaseImage, CondaBuildPack), + + c(BaseImage, PythonBuildPack, Python2BuildPack, JuliaBuildPack), + c(BaseImage, PythonBuildPack, JuliaBuildPack), + c(BaseImage, PythonBuildPack, Python2BuildPack), + c(BaseImage, PythonBuildPack), + ] + + for i in images: + if i.detect(): + i.build('wat') + break -class S2IBuildPack(BuildPack): - # Simple subclasses of S2IBuildPack must set build_image, - # either via config or during `detect()` - build_image = Unicode('') - - def s2i_build(self, workdir, ref, output_image_spec, build_image): - # Note: Ideally we'd just copy from workdir here, rather than clone and check out again - # However, setting just --copy and not specifying a ref seems to check out master for - # some reason. Investigate deeper FIXME - cmd = [ - 's2i', - 'build', - '--exclude', '""', - '--ref', ref, - '.', - build_image, - output_image_spec, - ] - env = os.environ.copy() - # add bundled s2i to *end* of PATH, - # in case user doesn't have s2i - env['PATH'] = os.pathsep.join([env.get('PATH') or os.defpath, here]) - try: - for line in execute_cmd(cmd, cwd=workdir, env=env, capture=self.capture): - self.log.info(line, extra=dict(phase='building', builder=self.name)) - except subprocess.CalledProcessError: - self.log.error('Failed to build image!', extra=dict(phase='failed')) - sys.exit(1) - - def build(self, workdir, ref, output_image_spec): - return self.s2i_build(workdir, ref, output_image_spec, self.build_image) - - -class CondaBuildPack(S2IBuildPack): - """Build Pack for installing from a conda environment.yml using S2I""" - - name = Unicode('conda') - build_image = Unicode('jupyterhub/singleuser-builder-conda:v0.2.1', config=True) - - def detect(self, workdir): - return os.path.exists(os.path.join(workdir, 'environment.yml')) - - -class JuliaBuildPack(S2IBuildPack): - name = Unicode('julia') - build_image = Unicode('jupyterhub/singleuser-builder-julia:v0.2.3', config=True) - - def detect(self, workdir): - return os.path.exists(os.path.join(workdir, 'REQUIRE')) - - -class PythonBuildPack(S2IBuildPack): - """Build Pack for installing from a pip requirements.txt using S2I""" - name = Unicode('python-pip') - runtime_builder_map = Dict({ - 'python-2.7': 'jupyterhub/singleuser-builder-venv-2.7:v0.2.1', - 'python-3.5': 'jupyterhub/singleuser-builder-venv-3.5:v0.2.1', - 'python-3.6': 'jupyterhub/singleuser-builder-venv-3.6:v0.2.1', - }) - - runtime = Unicode( - 'python-3.5', - config=True - ) - - def detect(self, workdir): - if os.path.exists(os.path.join(workdir, 'requirements.txt')): - try: - with open(os.path.join(workdir, 'runtime.txt')) as f: - self.runtime = f.read().strip() - except FileNotFoundError: - pass - self.build_image = self.runtime_builder_map[self.runtime] - return True - -class DefaultBuildPack(S2IBuildPack): - build_image = Unicode('jupyterhub/singleuser-builder-venv-3.5:v0.2.1') - name = Unicode('default') - def detect(self, workdir): - return True - +main() diff --git a/s2i-builders/generate.py b/s2i-builders/generate.py deleted file mode 100644 index 5b596269..00000000 --- a/s2i-builders/generate.py +++ /dev/null @@ -1,707 +0,0 @@ -""" -Generates a variety of Dockerfiles based on an input matrix -""" -import textwrap -from traitlets.config import LoggingConfigurable -from traitlets import Unicode, Set, List, Dict, Tuple, Bool, default -import jinja2 -import tarfile -import io -import os -import re -import json -import docker - -TEMPLATE = r""" -FROM ubuntu:17.04 - -# Set up locales properly -RUN apt-get update && \ - apt-get install --yes --no-install-recommends locales && \ - apt-get purge && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \ - locale-gen - -ENV LC_ALL en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US.UTF-8 - -# Use bash as default shell, rather than sh -ENV SHELL /bin/bash - -# Set up user -ENV NB_USER jovyan -ENV NB_UID 1000 -ENV HOME /home/${NB_USER} - -RUN adduser --disabled-password \ - --gecos "Default user" \ - --uid ${NB_UID} \ - ${NB_USER} -WORKDIR ${HOME} - -RUN apt-get update && \ - apt-get install --yes \ - {% for package in packages -%} - {{ package }} \ - {% endfor -%} - && apt-get purge && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -EXPOSE 8888 - -{% if env -%} -# Almost all environment variables -{% for item in env -%} -ENV {{item[0]}} {{item[1]}} -{% endfor -%} -{% endif -%} - -{% if path -%} -# Special case PATH -ENV PATH {{ ':'.join(path) }}:${PATH} -{% endif -%} - -{% if build_script_files -%} -# If scripts required during build are present, copy them -{% for src, dst in build_script_files.items() %} -COPY {{ src }} {{ dst }} -{% endfor -%} -{% endif -%} - -{% for sd in build_script_directives -%} -{{sd}} -{% endfor %} - -# Copy and chown stuff. This doubles the size of the repo, because -# you can't actually copy as USER, only as root! Thanks, Docker! -USER root -COPY src/ ${HOME} -RUN chown -R ${NB_USER}:${NB_USER} ${HOME} - -# Run assemble scripts! These will actually build the specification -# in the repository into the image. -{% for sd in assemble_script_directives -%} -{{ sd }} -{% endfor %} - -# Container image Labels! -# Put these at the end, since we don't want to rebuild everything -# when these change! Did I mention I hate Dockerfile cache semantics? -{% for k, v in labels.items() -%} -LABEL {{k}}={{v}} -{%- endfor %} - -# We always want containers to run as non-root -USER ${NB_USER} -""" - - -class BuildPack(LoggingConfigurable): - """ - A composable BuildPack. - - Specifically used for creating Dockerfiles for use with repo2docker only. - - Things that are kept constant: - - base image - - some environment variables (such as locale) - - user creation & ownership of home directory - - working directory - - Everything that is configurable is additive & deduplicative, - and there are *some* general guarantees of ordering. - - """ - packages = Set( - set(), - help=""" - List of packages that are installed in this BuildPack by default. - - Versions are not specified, and ordering is not guaranteed. These - are usually installed as apt packages. - """ - ) - env = List( - [], - help=""" - Ordered list of environment variables to be set for this image. - - Ordered so that environment variables can use other environment - variables in their values. - - Expects tuples, with the first item being the environment variable - name and the second item being the value. - """ - ) - - path = List( - [], - help=""" - Ordered list of file system paths to look for executables in. - - Just sets the PATH environment variable. Separated out since - it is very commonly set by various buildpacks. - """ - ) - - labels = Dict( - {}, - help=""" - Docker labels to set on the built image. - """ - ) - - build_script_files = Dict( - {}, - help=""" - List of files to be copied to the container image for use in building. - - This is copied before the `build_scripts` & `assemble_scripts` are - run, so can be executed from either of them. - - It's a dictionary where the key is the source file path in the host - system, and the value is the destination file path inside the - container image. - """ - ) - - build_scripts = List( - [], - help=""" - Ordered list of shell script snippets to build the base image. - - A list of tuples, where the first item is a username & the - second is a single logical line of a bash script that should - be RUN as that user. - - These are run before the source of the repository is copied - into the container image, and hence can not reference stuff - from the repository. When the build scripts are done, the - container image should be in a state where it is generically - re-useable for building various other repositories with - similar environments. - - You can use environment variable substitutions in both the - username and the execution script. - """ - ) - - assemble_scripts = List( - [], - help=""" - Ordered list of shell script snippets to build the repo into the image. - - A list of tuples, where the first item is a username & the - second is a single logical line of a bash script that should - be RUN as that user. - - These are run after the source of the repository is copied into - the container image (into the current directory). These should be - the scripts that actually build the repository into the container - image. - - If this needs to be dynamically determined (based on the presence - or absence of certain files, for example), you can create any - method and decorate it with `traitlets.default('assemble_scripts)` - and the return value of this method is used as the value of - assemble_scripts. You can expect that the script is running in - the current directory of the repository being built when doing - dynamic detection. - - You can use environment variable substitutions in both the - username and the execution script. - """ - ) - - name = Unicode( - help=""" - Name of the BuildPack! - """ - ) - - components = Tuple(()) - - def compose_with(self, other): - """ - Compose this BuildPack with another, returning a new one - - Ordering does matter - the properties of the current BuildPack take - precedence (wherever that matters) over the properties of other - BuildPack. If there are any conflicts, this method is responsible - for resolving them. - """ - result = BuildPack(parent=self) - labels = {} - labels.update(self.labels) - labels.update(other.labels) - result.labels = labels - result.packages = self.packages.union(other.packages) - result.path = self.path + other.path - # FIXME: Deduplicate Env - result.env = self.env + other.env - result.build_scripts = self.build_scripts + other.build_scripts - result.assemble_scripts = self.assemble_scripts + other.assemble_scripts - - build_script_files = {} - build_script_files.update(self.build_script_files) - build_script_files.update(other.build_script_files) - result.build_script_files = build_script_files - - result.name = "{}-{}".format(self.name, other.name) - - result.components = (self, ) + self.components + (other, ) + other.components - return result - - def detect(self): - return all([p.detect() for p in self.components]) - - def render(self): - """ - Render BuildPack into Dockerfile - """ - t = jinja2.Template(TEMPLATE) - - build_script_directives = [] - last_user = 'root' - for user, script in self.build_scripts: - if last_user != user: - build_script_directives.append("USER {}".format(user)) - last_user = user - build_script_directives.append("RUN {}".format( - textwrap.dedent(script.strip('\n')) - )) - - assemble_script_directives = [] - last_user = 'root' - for user, script in self.assemble_scripts: - if last_user != user: - build_script_directives.append("USER {}".format(user)) - last_user = user - assemble_script_directives.append("RUN {}".format( - textwrap.dedent(script.strip('\n')) - )) - - return t.render( - packages=sorted(self.packages), - path=self.path, - env=self.env, - labels=self.labels, - build_script_directives=build_script_directives, - assemble_script_directives=assemble_script_directives, - build_script_files=self.build_script_files - ) - - def build(self, image_spec): - tarf = io.BytesIO() - tar = tarfile.open(fileobj=tarf, mode='x') - dockerfile_tarinfo = tarfile.TarInfo("Dockerfile") - dockerfile = self.render().encode('utf-8') - dockerfile_tarinfo.size = len(dockerfile) - - tar.addfile( - dockerfile_tarinfo, - io.BytesIO(dockerfile) - ) - - for src in self.build_script_files: - tar.add(src) - - tar.add('.', 'src/') - - tar.close() - tarf.seek(0) - - client = docker.APIClient(version='auto', **docker.utils.kwargs_from_env()) - for line in client.build( - fileobj=tarf, - tag=image_spec, - custom_context=True, - decode=True - ): - if 'stream' in line: - print(line['stream'], end='') - elif 'error' in line: - print(line['error'], end='') - break - else: - raise ValueError("Unexpected return from docker builder: {}".format(json.dumps(line))) - else: - print("Built image", image_spec) - - -class BaseImage(BuildPack): - name = "repo2docker" - version = "0.1" - packages = { - # Utils! - "git", - "tar", - "curl", - "wget", - "less", - # Build tools - "build-essential", - "pkg-config", - } - - labels = { - "io.openshift.s2i.scripts-url": "image:///usr/libexec/s2i" - } - - env = [ - ("APP_BASE", "/srv") - ] - - def detect(self): - return True - - @default('assemble_scripts') - def setup_assembly(self): - assemble_scripts = [] - try: - with open('apt.txt') as f: - extra_apt_packages = [l.strip() for l in f] - # Validate that this is, indeed, just a list of packages - # We're doing shell injection around here, gotta be careful. - # FIXME: Add support for specifying version numbers - for p in extra_apt_packages: - if not re.match(r"^[a-z0-9.+-]+", p): - raise ValueError("Found invalid package name {} in apt.txt".format(p)) - - assemble_scripts.append(( - 'root', - r""" - apt-get update && \ - apt-get install --yes --no-install-recommends {} && \ - apt-get purge && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - """.format(' '.join(extra_apt_packages)) - )) - except FileNotFoundError: - pass - return assemble_scripts - - -class PythonBuildPack(BuildPack): - name = "python3.5" - version = "0.1" - - packages = { - 'python3', - 'python3-venv', - 'python3-dev', - } - - env = [ - ("VENV_PATH", "${APP_BASE}/venv"), - # Prefix to use for installing kernels and finding jupyter binary - ("NB_PYTHON_PREFIX", "${VENV_PATH}"), - ] - - path = [ - "${VENV_PATH}/bin" - ] - - build_scripts = [ - ( - "root", - r""" - mkdir -p ${VENV_PATH} && \ - chown -R ${NB_USER}:${NB_USER} ${VENV_PATH} - """ - ), - ( - "${NB_USER}", - r""" - python3 -m venv ${VENV_PATH} - """ - ), - ( - "${NB_USER}", - r""" - pip install --no-cache-dir \ - notebook==5.0.0 \ - jupyterhub==0.7.2 \ - ipywidgets==6.0.0 \ - jupyterlab==0.24.1 \ - nbgoogleanalytics==0.1.0 && \ - jupyter nbextension enable --py widgetsnbextension --sys-prefix && \ - jupyter serverextension enable --py jupyterlab --sys-prefix && \ - jupyter nbextension install --py nbgoogleanalytics --sys-prefix && \ - jupyter nbextension enable --py nbgoogleanalytics --sys-prefix && \ - jupyter serverextension enable --py nbgoogleanalytics --sys-prefix - """ - ) - ] - - @default('assemble_scripts') - def setup_assembly(self): - # If we have a runtime.txt & that's set to python-2.7, - # we will *not* install requirements.txt but will find & - # install a requirements3.txt file if it exists. - # This way, when using python2 venv, requirements.txt will - # be installed in the python2 venv, and requirements3.txt - # will be installed in python3 venv. This is less of a - # surprise than requiring python2 to be requirements2.txt tho. - try: - with open('runtime.txt') as f: - runtime = f.read().strip() - except FileNotFoundError: - runtime = 'python-3.5' - if runtime == 'python-2.7': - requirements_file = 'requirements3.txt' - else: - requirements_file = 'requirements.txt' - if os.path.exists(requirements_file): - return [( - '${NB_USER}', - 'pip3 install --no-cache-dir -r {}'.format(requirements_file) - )] - return [] - - def detect(self): - return os.path.exists('requirements.txt') and super() - -class CondaBuildPack(BuildPack): - name = "conda" - version = "0.1" - env = [ - ('CONDA_DIR', '${APP_BASE}/conda'), - ('NB_PYTHON_PREFIX', '${CONDA_DIR}') - ] - - path = ['${CONDA_DIR}/bin'] - - build_script_files = { - 'conda/install-miniconda.bash': '/tmp/install-miniconda.bash', - 'conda/environment.yml': '/tmp/environment.yml' - } - - build_scripts = [ - ( - "root", - r""" - bash /tmp/install-miniconda.bash && \ - rm /tmp/install-miniconda.bash /tmp/environment.yml - """ - ) - ] - - @default('assemble_scripts') - def setup_assembly(self): - assembly_scripts = [] - if os.path.exists('environment.yml'): - assembly_scripts.append(( - '${NB_USER}', - r""" - conda env update -n root -f environment.yml && \ - conda clean -tipsy - """ - )) - return assembly_scripts - - def detect(self): - return os.path.exists('environment.yml') and super().detect() - - -class Python2BuildPack(BuildPack): - name = "python2.7" - version = "0.1" - - packages = { - 'python', - 'python-dev', - 'virtualenv' - } - - env = [ - ('VENV2_PATH', '${APP_BASE}/venv2') - ] - - path = [ - "${VENV2_PATH}/bin" - ] - - build_scripts = [ - ( - "root", - r""" - mkdir -p ${VENV2_PATH} && \ - chown -R ${NB_USER}:${NB_USER} ${VENV2_PATH} - """ - ), - ( - "${NB_USER}", - r""" - virtualenv -p python2 ${VENV2_PATH} - """ - ), - ( - "${NB_USER}", - r""" - pip2 install --no-cache-dir \ - ipykernel==4.6.1 && \ - python2 -m ipykernel install --prefix=${NB_PYTHON_PREFIX} - """ - ) - ] - - @default('assemble_scripts') - def setup_assembly(self): - return [ - ( - '${NB_USER}', - 'pip2 install --no-cache-dir -r requirements.txt' - ) - ] - - def detect(self): - if os.path.exists('requirements.txt'): - try: - with open('runtime.txt') as f: - runtime = f.read().strip() - if runtime != 'python2.7': - return False - except FileNotFoundError: - return False - return super() - return False - -class JuliaBuildPack(BuildPack): - name = "julia" - version = "0.1" - env = [ - ('JULIA_PATH', '${APP_BASE}/julia'), - ('JULIA_HOME', '${JULIA_PATH}/bin'), - ('JULIA_PKGDIR', '${JULIA_PATH}/pkg'), - ('JULIA_VERSION', '0.6.0'), - ('JUPYTER', '${NB_PYTHON_PREFIX}/bin/jupyter') - ] - - path = [ - '${JULIA_PATH}/bin' - ] - - build_scripts = [ - ( - "root", - r""" - mkdir -p ${JULIA_PATH} && \ - curl -sSL "https://julialang-s3.julialang.org/bin/linux/x64/${JULIA_VERSION%[.-]*}/julia-${JULIA_VERSION}-linux-x86_64.tar.gz" | tar -xz -C ${JULIA_PATH} --strip-components 1 - """ - ), - ( - "root", - r""" - mkdir -p ${JULIA_PKGDIR} && \ - chown ${NB_USER}:${NB_USER} ${JULIA_PKGDIR} - """ - ), - ( - "${NB_USER}", - # HACK: Can't seem to tell IJulia to install in sys-prefix - # FIXME: Find way to get it to install under /srv and not $HOME? - r""" - julia -e 'Pkg.init(); Pkg.add("IJulia")' && \ - mv ${HOME}/.local/share/jupyter/kernels/julia-0.6 ${NB_PYTHON_PREFIX}/share/jupyter/kernels/julia-0.6 - """ - ) - ] - - @default('assemble_scripts') - def setup_assembly(self): - return [( - "${NB_USER}", - r""" - cat REQUIRE >> ${JULIA_PKGDIR}/v0.6/REQUIRE && \ - julia -e "Pkg.resolve()" - """ - )] - - def detect(self): - return os.path.exists('REQUIRE') and super() - - -class DockerBuildPack(BuildPack): - name = "Dockerfile" - - def detect(self): - return os.path.exists('Dockerfile') - - def render(self): - with open('Dockerfile') as f: - return f.read() - -class LegacyBinderDockerBuildPack(DockerBuildPack): - - name = 'Legacy Binder Dockerfile' - - dockerfile_appendix = Unicode(dedent(r""" - USER root - COPY . /home/main/notebooks - RUN chown -R main:main /home/main/notebooks - USER main - WORKDIR /home/main/notebooks - ENV PATH /home/main/anaconda2/envs/python3/bin:$PATH - RUN conda install -n python3 notebook==5.0.0 ipykernel==4.6.0 && \ - pip install jupyterhub==0.7.2 && \ - conda remove -n python3 nb_conda_kernels && \ - conda install -n root ipykernel==4.6.0 && \ - /home/main/anaconda2/envs/python3/bin/ipython kernel install --sys-prefix && \ - /home/main/anaconda2/bin/ipython kernel install --prefix=/home/main/anaconda2/envs/python3 && \ - /home/main/anaconda2/bin/ipython kernel install --sys-prefix - ENV JUPYTER_PATH /home/main/anaconda2/share/jupyter:$JUPYTER_PATH - CMD jupyter notebook --ip 0.0.0.0 - """), config=True) - - def render(self): - with open('Dockerfile') as f: - return f.read() + self.dockerfile_appendix - - def detect(self, workdir): - try: - with open('Dockerfile', 'r') as f: - for line in f: - if line.startswith('FROM'): - if 'andrewosh/binder-base' in line.split('#')[0].lower(): - return True - else: - return False - except FileNotFoundError: - pass - - return False - -def c(*args): - image = args[0]() - for arg in args[1:]: - image = image.compose_with(arg()) - return image - -def main(): - images = [ - LegacyBinderDockerBuildPack(), - DockerBuildPack(), - - c(BaseImage, CondaBuildPack, JuliaBuildPack), - c(BaseImage, CondaBuildPack), - - c(BaseImage, PythonBuildPack, Python2BuildPack, JuliaBuildPack), - c(BaseImage, PythonBuildPack, JuliaBuildPack), - c(BaseImage, PythonBuildPack, Python2BuildPack), - c(BaseImage, PythonBuildPack), - ] - - for i in images: - if i.detect(): - i.build('wat') - break - - -main()