repo2docker/repo2docker/buildpacks/conda/__init__.py

344 wiersze
12 KiB
Python

"""BuildPack for conda environments"""
import os
import re
from collections.abc import Mapping
from ruamel.yaml import YAML
from ..base import BaseImage
from .._r_base import rstudio_base_scripts, IRKERNEL_VERSION
from ...utils import is_local_pip_requirement
# pattern for parsing conda dependency line
PYTHON_REGEX = re.compile(r"python\s*=+\s*([\d\.]*)")
R_REGEX = re.compile(r"r-base\s*=+\s*([\d\.]*)")
# current directory
HERE = os.path.dirname(os.path.abspath(__file__))
class CondaBuildPack(BaseImage):
"""A conda BuildPack.
Uses miniconda since it is more lightweight than Anaconda.
"""
def get_build_env(self):
"""Return environment variables to be set.
We set `CONDA_DIR` to the conda install directory and
the `NB_PYTHON_PREFIX` to the location of the jupyter binary.
"""
env = super().get_build_env() + [
("CONDA_DIR", "${APP_BASE}/conda"),
("NB_PYTHON_PREFIX", "${CONDA_DIR}/envs/notebook"),
]
if self.py2:
env.append(("KERNEL_PYTHON_PREFIX", "${CONDA_DIR}/envs/kernel"))
else:
env.append(("KERNEL_PYTHON_PREFIX", "${NB_PYTHON_PREFIX}"))
return env
def get_env(self):
"""Make kernel env the default for `conda install`"""
env = super().get_env() + [("CONDA_DEFAULT_ENV", "${KERNEL_PYTHON_PREFIX}")]
return env
def get_path(self):
"""Return paths (including conda environment path) to be added to
the PATH environment variable.
"""
path = super().get_path()
path.insert(0, "${CONDA_DIR}/bin")
if self.py2:
path.insert(0, "${KERNEL_PYTHON_PREFIX}/bin")
path.insert(0, "${NB_PYTHON_PREFIX}/bin")
return path
def get_build_scripts(self):
"""
Return series of build-steps common to all Python 3 repositories.
All scripts here should be independent of contents of the repository.
This sets up through `install-miniforge.bash` (found in this directory):
- a directory for the conda environment and its ownership by the
notebook user
- a Python 3 interpreter for the conda environment
- a Python 3 jupyter kernel
- a frozen base set of requirements, including:
- support for Jupyter widgets
- support for JupyterLab
- support for nteract
"""
return super().get_build_scripts() + [
(
"root",
r"""
bash /tmp/install-miniforge.bash && \
rm /tmp/install-miniforge.bash /tmp/environment.yml
""",
)
]
major_pythons = {"2": "2.7", "3": "3.7"}
def get_build_script_files(self):
"""
Dict of files to be copied to the container image for use in building.
This is copied before the `build_scripts` & `assemble_scripts` are
run, so can be executed from either of them.
It's a dictionary where the key is the source file path in the host
system, and the value is the destination file path inside the
container image.
This currently adds a frozen set of Python requirements to the dict
of files.
"""
files = {
"conda/install-miniforge.bash": "/tmp/install-miniforge.bash",
"conda/activate-conda.sh": "/etc/profile.d/activate-conda.sh",
}
py_version = self.python_version
self.log.info("Building conda environment for python=%s" % py_version)
# Select the frozen base environment based on Python version.
# avoids expensive and possibly conflicting upgrades when changing
# major Python versions during upgrade.
# If no version is specified or no matching X.Y version is found,
# the default base environment is used.
frozen_name = "environment.frozen.yml"
if py_version:
if self.py2:
# python 2 goes in a different env
files[
"conda/environment.py-2.7.frozen.yml"
] = "/tmp/kernel-environment.yml"
else:
py_frozen_name = "environment.py-{py}.frozen.yml".format(py=py_version)
if os.path.exists(os.path.join(HERE, py_frozen_name)):
frozen_name = py_frozen_name
else:
self.log.warning("No frozen env: %s", py_frozen_name)
files["conda/" + frozen_name] = "/tmp/environment.yml"
files.update(super().get_build_script_files())
return files
_environment_yaml = None
@property
def environment_yaml(self):
if self._environment_yaml is not None:
return self._environment_yaml
environment_yml = self.binder_path("environment.yml")
if not os.path.exists(environment_yml):
self._environment_yaml = {}
return self._environment_yaml
with open(environment_yml) as f:
env = YAML().load(f)
# check if the env file is empty, if so instantiate an empty dictionary.
if env is None:
env = {}
# check if the env file provided a dict-like thing not a list or other data structure.
if not isinstance(env, Mapping):
raise TypeError(
"environment.yml should contain a dictionary. Got %r" % type(env)
)
self._environment_yaml = env
return self._environment_yaml
@property
def _should_preassemble_env(self):
"""Check for local pip requirements in environment.yaml
If there are any local references, e.g. `-e .`,
stage the whole repo prior to installation.
"""
dependencies = self.environment_yaml.get("dependencies", [])
pip_requirements = None
for dep in dependencies:
if isinstance(dep, dict) and dep.get("pip"):
pip_requirements = dep["pip"]
if isinstance(pip_requirements, list):
for line in pip_requirements:
if is_local_pip_requirement(line):
return False
return True
@property
def python_version(self):
"""Detect the Python version for a given `environment.yml`
Will return 'x.y' if version is found (e.g '3.6'),
or a Falsy empty string '' if not found.
Version information below the minor level is dropped.
"""
if not hasattr(self, "_python_version"):
py_version = None
env = self.environment_yaml
for dep in env.get("dependencies", []):
if not isinstance(dep, str):
continue
match = PYTHON_REGEX.match(dep)
if not match:
continue
py_version = match.group(1)
break
# extract major.minor
if py_version:
if len(py_version) == 1:
self._python_version = self.major_pythons.get(py_version[0])
else:
# return major.minor
self._python_version = ".".join(py_version.split(".")[:2])
else:
self._python_version = ""
return self._python_version
@property
def r_version(self):
"""Detect the Python version for a given `environment.yml`
Will return 'x.y' if version is found (e.g '3.6'),
or a Falsy empty string '' if not found.
"""
if not hasattr(self, "_r_version"):
self._r_version = ""
env = self.environment_yaml
for dep in env.get("dependencies", []):
if not isinstance(dep, str):
continue
match = R_REGEX.match(dep)
if not match:
continue
self._r_version = match.group(1)
break
return self._r_version
@property
def uses_r(self):
"""Detect whether the user also installs R packages.
Will return True when a package prefixed with 'r-' is being installed.
"""
if not hasattr(self, "_uses_r"):
deps = self.environment_yaml.get("dependencies", [])
self._uses_r = False
for dep in deps:
if not isinstance(dep, str):
continue
if dep.startswith("r-"):
self._uses_r = True
break
return self._uses_r
@property
def py2(self):
"""Am I building a Python 2 kernel environment?"""
return self.python_version and self.python_version.split(".")[0] == "2"
def get_preassemble_script_files(self):
"""preassembly only requires environment.yml
enables caching assembly result even when
repo contents change
"""
assemble_files = super().get_preassemble_script_files()
if self._should_preassemble_env:
environment_yml = self.binder_path("environment.yml")
if os.path.exists(environment_yml):
assemble_files[environment_yml] = environment_yml
return assemble_files
def get_env_scripts(self):
"""Return series of build-steps specific to this source repository."""
scripts = []
environment_yml = self.binder_path("environment.yml")
env_prefix = "${KERNEL_PYTHON_PREFIX}" if self.py2 else "${NB_PYTHON_PREFIX}"
if os.path.exists(environment_yml):
scripts.append(
(
"${NB_USER}",
r"""
conda env update -p {0} -f "{1}" && \
conda clean --all -f -y && \
conda list -p {0}
""".format(
env_prefix, environment_yml
),
)
)
if self.uses_r:
if self.r_version:
r_pin = "=" + self.r_version
else:
r_pin = ""
scripts.append(
(
"${NB_USER}",
r"""
conda install -p {0} r-base{1} r-irkernel={2} r-devtools && \
conda clean --all -f -y && \
conda list -p {0}
""".format(
env_prefix, r_pin, IRKERNEL_VERSION
),
)
)
scripts += rstudio_base_scripts()
scripts += [
(
"root",
r"""
echo auth-none=1 >> /etc/rstudio/rserver.conf && \
echo auth-minimum-user-id=0 >> /etc/rstudio/rserver.conf && \
echo "rsession-which-r={0}/bin/R" >> /etc/rstudio/rserver.conf && \
echo www-frame-origin=same >> /etc/rstudio/rserver.conf
""".format(
env_prefix
),
),
(
"${NB_USER}",
# Install a pinned version of IRKernel and set it up for use!
r"""
R --quiet -e "IRkernel::installspec(prefix='{0}')"
""".format(
env_prefix
),
),
]
return scripts
def get_preassemble_scripts(self):
scripts = super().get_preassemble_scripts()
if self._should_preassemble_env:
scripts.extend(self.get_env_scripts())
return scripts
def get_assemble_scripts(self):
scripts = super().get_assemble_scripts()
if not self._should_preassemble_env:
scripts.extend(self.get_env_scripts())
return scripts
def detect(self):
"""Check if current repo should be built with the Conda BuildPack."""
return os.path.exists(self.binder_path("environment.yml")) and super().detect()