Merge pull request #743 from minrk/conda-env-first-again-again

[MRG] preassembly for conda/python
pull/752/head
Tim Head 2019-07-19 10:36:15 +02:00 zatwierdzone przez GitHub
commit 6c5c09b62b
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
6 zmienionych plików z 218 dodań i 49 usunięć

Wyświetl plik

@ -132,6 +132,11 @@ COPY src/{{ src }} ${REPO_DIR}/{{ dst }}
{% endfor -%}
{% endif -%}
{% if preassemble_script_directives -%}
USER root
RUN chown -R ${NB_USER}:${NB_USER} ${REPO_DIR}
{% endif -%}
{% for sd in preassemble_script_directives -%}
{{ sd }}
{% endfor %}
@ -711,12 +716,8 @@ class BaseImage(BuildPack):
except FileNotFoundError:
pass
return scripts
def get_assemble_scripts(self):
assemble_scripts = []
if "py" in self.stencila_contexts:
assemble_scripts.extend(
scripts.extend(
[
(
"${NB_USER}",
@ -728,7 +729,7 @@ class BaseImage(BuildPack):
]
)
if self.stencila_manifest_dir:
assemble_scripts.extend(
scripts.extend(
[
(
"${NB_USER}",
@ -741,7 +742,11 @@ class BaseImage(BuildPack):
)
]
)
return assemble_scripts
return scripts
def get_assemble_scripts(self):
"""Return directives to run after the entire repository has been added to the image"""
return []
def get_post_build_scripts(self):
post_build = self.binder_path("postBuild")

Wyświetl plik

@ -6,6 +6,7 @@ from collections import Mapping
from ruamel.yaml import YAML
from ..base import BaseImage
from ...utils import is_local_pip_requirement
# pattern for parsing conda dependency line
PYTHON_REGEX = re.compile(r"python\s*=+\s*([\d\.]*)")
@ -127,6 +128,50 @@ class CondaBuildPack(BaseImage):
files.update(super().get_build_script_files())
return files
_environment_yaml = None
@property
def environment_yaml(self):
if self._environment_yaml is not None:
return self._environment_yaml
environment_yml = self.binder_path("environment.yml")
if not os.path.exists(environment_yml):
self._environment_yaml = {}
return self._environment_yaml
with open(environment_yml) as f:
env = YAML().load(f)
# check if the env file is empty, if so instantiate an empty dictionary.
if env is None:
env = {}
# check if the env file provided a dict-like thing not a list or other data structure.
if not isinstance(env, Mapping):
raise TypeError(
"environment.yml should contain a dictionary. Got %r" % type(env)
)
self._environment_yaml = env
return self._environment_yaml
@property
def _should_preassemble_env(self):
"""Check for local pip requirements in environment.yaml
If there are any local references, e.g. `-e .`,
stage the whole repo prior to installation.
"""
dependencies = self.environment_yaml.get("dependencies", [])
pip_requirements = None
for dep in dependencies:
if isinstance(dep, dict) and dep.get("pip"):
pip_requirements = dep["pip"]
if isinstance(pip_requirements, list):
for line in pip_requirements:
if is_local_pip_requirement(line):
return False
return True
@property
def python_version(self):
"""Detect the Python version for a given `environment.yml`
@ -135,31 +180,17 @@ class CondaBuildPack(BaseImage):
or a Falsy empty string '' if not found.
"""
environment_yml = self.binder_path("environment.yml")
if not os.path.exists(environment_yml):
return ""
if not hasattr(self, "_python_version"):
py_version = None
with open(environment_yml) as f:
env = YAML().load(f)
# check if the env file is empty, if so instantiate an empty dictionary.
if env is None:
env = {}
# check if the env file provided a dick-like thing not a list or other data structure.
if not isinstance(env, Mapping):
raise TypeError(
"environment.yml should contain a dictionary. Got %r"
% type(env)
)
for dep in env.get("dependencies", []):
if not isinstance(dep, str):
continue
match = PYTHON_REGEX.match(dep)
if not match:
continue
py_version = match.group(1)
break
env = self.environment_yaml
for dep in env.get("dependencies", []):
if not isinstance(dep, str):
continue
match = PYTHON_REGEX.match(dep)
if not match:
continue
py_version = match.group(1)
break
# extract major.minor
if py_version:
@ -178,14 +209,27 @@ class CondaBuildPack(BaseImage):
"""Am I building a Python 2 kernel environment?"""
return self.python_version and self.python_version.split(".")[0] == "2"
def get_assemble_scripts(self):
def get_preassemble_script_files(self):
"""preassembly only requires environment.yml
enables caching assembly result even when
repo contents change
"""
assemble_files = super().get_preassemble_script_files()
if self._should_preassemble_env:
environment_yml = self.binder_path("environment.yml")
if os.path.exists(environment_yml):
assemble_files[environment_yml] = environment_yml
return assemble_files
def get_env_scripts(self):
"""Return series of build-steps specific to this source repository.
"""
assembly_scripts = []
scripts = []
environment_yml = self.binder_path("environment.yml")
env_prefix = "${KERNEL_PYTHON_PREFIX}" if self.py2 else "${NB_PYTHON_PREFIX}"
if os.path.exists(environment_yml):
assembly_scripts.append(
scripts.append(
(
"${NB_USER}",
r"""
@ -197,7 +241,19 @@ class CondaBuildPack(BaseImage):
),
)
)
return super().get_assemble_scripts() + assembly_scripts
return scripts
def get_preassemble_scripts(self):
scripts = super().get_preassemble_scripts()
if self._should_preassemble_env:
scripts.extend(self.get_env_scripts())
return scripts
def get_assemble_scripts(self):
scripts = super().get_assemble_scripts()
if not self._should_preassemble_env:
scripts.extend(self.get_env_scripts())
return scripts
def detect(self):
"""Check if current repo should be built with the Conda BuildPack.

Wyświetl plik

@ -74,6 +74,24 @@ class PipfileBuildPack(CondaBuildPack):
self._python_version = self.major_pythons["3"]
return self._python_version
def get_preassemble_script_files(self):
"""Return files needed for preassembly"""
files = super().get_preassemble_script_files()
for name in ("requirements3.txt", "Pipfile", "Pipfile.lock"):
path = self.binder_path(name)
if os.path.exists(path):
files[path] = path
return files
def get_preassemble_scripts(self):
"""scripts to run prior to staging the repo contents"""
scripts = super().get_preassemble_scripts()
# install pipenv to install dependencies within Pipfile.lock or Pipfile
scripts.append(
("${NB_USER}", "${KERNEL_PYTHON_PREFIX}/bin/pip install pipenv==2018.11.26")
)
return scripts
def get_assemble_scripts(self):
"""Return series of build-steps specific to this repository.
"""
@ -113,11 +131,6 @@ class PipfileBuildPack(CondaBuildPack):
# my_package_example = {path=".", editable=true}
working_directory = self.binder_dir or "."
# install pipenv to install dependencies within Pipfile.lock or Pipfile
assemble_scripts.append(
("${NB_USER}", "${KERNEL_PYTHON_PREFIX}/bin/pip install pipenv==2018.11.26")
)
# NOTES:
# - Without prioritizing the PATH to KERNEL_PYTHON_PREFIX over
# NB_SERVER_PYTHON_PREFIX, 'pipenv' draws the wrong conclusion about

Wyświetl plik

@ -2,6 +2,7 @@
import os
from ..conda import CondaBuildPack
from ...utils import is_local_pip_requirement
class PythonBuildPack(CondaBuildPack):
@ -34,24 +35,22 @@ class PythonBuildPack(CondaBuildPack):
self._python_version = py_version
return self._python_version
def get_assemble_scripts(self):
"""Return series of build-steps specific to this repository.
def _get_pip_scripts(self):
"""Get pip install scripts
added to preassemble unless local references are found,
in which case this happens in assemble.
"""
# If we have a runtime.txt & that's set to python-2.7,
# requirements.txt will be installed in the *kernel* env
# and requirements3.txt (if it exists)
# will be installed in the python 3 notebook server env.
assemble_scripts = super().get_assemble_scripts()
setup_py = "setup.py"
# KERNEL_PYTHON_PREFIX is the env with the kernel,
# whether it's distinct from the notebook or the same.
pip = "${KERNEL_PYTHON_PREFIX}/bin/pip"
scripts = []
if self.py2:
# using python 2 kernel,
# requirements3.txt allows installation in the notebook server env
nb_requirements_file = self.binder_path("requirements3.txt")
if os.path.exists(nb_requirements_file):
assemble_scripts.append(
scripts.append(
(
"${NB_USER}",
# want the $NB_PYHTON_PREFIX environment variable, not for
@ -65,12 +64,65 @@ class PythonBuildPack(CondaBuildPack):
# install requirements.txt in the kernel env
requirements_file = self.binder_path("requirements.txt")
if os.path.exists(requirements_file):
assemble_scripts.append(
scripts.append(
(
"${NB_USER}",
'{} install --no-cache-dir -r "{}"'.format(pip, requirements_file),
)
)
return scripts
@property
def _should_preassemble_pip(self):
"""Peek in requirements.txt to determine if we can assemble from only env files
If there are any local references, e.g. `-e .`,
stage the whole repo prior to installation.
"""
if not os.path.exists("binder") and os.path.exists("setup.py"):
# can't install from subset if we're using setup.py
return False
for name in ("requirements.txt", "requirements3.txt"):
requirements_txt = self.binder_path(name)
if not os.path.exists(requirements_txt):
continue
with open(requirements_txt) as f:
for line in f:
if is_local_pip_requirement(line):
return False
# didn't find any local references,
# allow assembly from subset
return True
def get_preassemble_script_files(self):
assemble_files = super().get_preassemble_script_files()
for name in ("requirements.txt", "requirements3.txt"):
requirements_txt = self.binder_path(name)
if os.path.exists(requirements_txt):
assemble_files[requirements_txt] = requirements_txt
return assemble_files
def get_preassemble_scripts(self):
"""Return scripts to run before adding the full repository"""
scripts = super().get_preassemble_scripts()
if self._should_preassemble_pip:
scripts.extend(self._get_pip_scripts())
return scripts
def get_assemble_scripts(self):
"""Return series of build steps that require the full repository"""
# If we have a runtime.txt & that's set to python-2.7,
# requirements.txt will be installed in the *kernel* env
# and requirements3.txt (if it exists)
# will be installed in the python 3 notebook server env.
assemble_scripts = super().get_assemble_scripts()
setup_py = "setup.py"
# KERNEL_PYTHON_PREFIX is the env with the kernel,
# whether it's distinct from the notebook or the same.
pip = "${KERNEL_PYTHON_PREFIX}/bin/pip"
if not self._should_preassemble_pip:
assemble_scripts.extend(self._get_pip_scripts())
# setup.py exists *and* binder dir is not used
if not self.binder_dir and os.path.exists(setup_py):

Wyświetl plik

@ -431,3 +431,29 @@ def normalize_doi(val):
(e.g. https://doi.org/10.1234/jshd123)"""
m = doi_regexp.match(val)
return m.group(2)
def is_local_pip_requirement(line):
"""Return whether a pip requirement (e.g. in requirements.txt file) references a local file"""
# trim comments and skip empty lines
line = line.split("#", 1)[0].strip()
if not line:
return False
if line.startswith(("-r", "-c")):
# local -r or -c references break isolation
return True
# strip off `-e, etc.`
if line.startswith("-"):
line = line.split(None, 1)[1]
if "file://" in line:
# file references break isolation
return True
if "://" in line:
# handle git://../local/file
path = line.split("://", 1)[1]
else:
path = line
if path.startswith("."):
# references a local file
return True
return False

Wyświetl plik

@ -110,3 +110,20 @@ def test_normalize_doi():
assert utils.normalize_doi("http://doi.org/10.1234/jshd123") == "10.1234/jshd123"
assert utils.normalize_doi("https://doi.org/10.1234/jshd123") == "10.1234/jshd123"
assert utils.normalize_doi("http://dx.doi.org/10.1234/jshd123") == "10.1234/jshd123"
@pytest.mark.parametrize(
"req, is_local",
[
("-r requirements.txt", True),
("-e .", True),
("file://subdir", True),
("file://./subdir", True),
("git://github.com/jupyter/repo2docker", False),
("git+https://github.com/jupyter/repo2docker", False),
("numpy", False),
("# -e .", False),
],
)
def test_local_pip_requirement(req, is_local):
assert utils.is_local_pip_requirement(req) == is_local