"""BuildPack for conda environments""" import os import re from collections.abc import Mapping from ruamel.yaml import YAML from ...utils import is_local_pip_requirement from .._r_base import rstudio_base_scripts from ..base import BaseImage # pattern for parsing conda dependency line PYTHON_REGEX = re.compile(r"python\s*=+\s*([\d\.]*)") R_REGEX = re.compile(r"r-base\s*=+\s*([\d\.]*)") # current directory HERE = os.path.dirname(os.path.abspath(__file__)) class CondaBuildPack(BaseImage): """A conda BuildPack. Uses miniconda since it is more lightweight than Anaconda. """ # The kernel conda environment file, if any. # As an absolute path within the container. _kernel_environment_file = "" # extra pip requirements.txt for the kernel _kernel_requirements_file = "" # The notebook server environment file. # As an absolute path within the container. _nb_environment_file = "" # extra pip requirements.txt for the notebook env _nb_requirements_file = "" def get_build_env(self): """Return environment variables to be set. We set `CONDA_DIR` to the conda install directory and the `NB_PYTHON_PREFIX` to the location of the jupyter binary. """ if not self._nb_environment_file: # get_build_script_files locates requirements/environment files, # populating the _nb_environment_file attribute and others. # FIXME: move file detection and initialization of those attributes to its own step? self.get_build_script_files() env = super().get_build_env() + [ ("CONDA_DIR", "${APP_BASE}/conda"), ("NB_PYTHON_PREFIX", "${CONDA_DIR}/envs/notebook"), # We install npm / node from conda-forge ("NPM_DIR", "${APP_BASE}/npm"), ("NPM_CONFIG_GLOBALCONFIG", "${NPM_DIR}/npmrc"), ("NB_ENVIRONMENT_FILE", self._nb_environment_file), ("MAMBA_ROOT_PREFIX", "${CONDA_DIR}"), # this exe should be used for installs after bootstrap with micromamba # switch this to /usr/local/bin/micromamba to use it for all installs ("MAMBA_EXE", "${CONDA_DIR}/bin/mamba"), ] if self._nb_requirements_file: env.append(("NB_REQUIREMENTS_FILE", self._nb_requirements_file)) if self._kernel_environment_file: # if kernel environment file is separate env.extend( [ ("KERNEL_PYTHON_PREFIX", "${CONDA_DIR}/envs/kernel"), ("KERNEL_ENVIRONMENT_FILE", self._kernel_environment_file), ] ) if self._kernel_requirements_file: env.append(("KERNEL_REQUIREMENTS_FILE", self._kernel_requirements_file)) else: env.append(("KERNEL_PYTHON_PREFIX", "${NB_PYTHON_PREFIX}")) return env def get_env(self): """Make kernel env the default for `conda install`""" env = super().get_env() + [("CONDA_DEFAULT_ENV", "${KERNEL_PYTHON_PREFIX}")] return env def get_path(self): """Return paths (including conda environment path) to be added to the PATH environment variable. """ path = super().get_path() path.insert(0, "${CONDA_DIR}/bin") if self.py2: path.insert(0, "${KERNEL_PYTHON_PREFIX}/bin") path.insert(0, "${NB_PYTHON_PREFIX}/bin") # This is at the end of $PATH, for backwards compat reasons path.append("${NPM_DIR}/bin") return path def get_build_scripts(self): """ Return series of build-steps common to all Python 3 repositories. All scripts here should be independent of contents of the repository. This sets up through `install-base-env.bash` (found in this directory): - a directory for the conda environment and its ownership by the notebook user - a Python 3 interpreter for the conda environment - a Python 3 jupyter kernel - a frozen base set of requirements, including: - support for Jupyter widgets - support for JupyterLab - support for nteract """ return super().get_build_scripts() + [ ( "root", r""" TIMEFORMAT='time: %3R' \ bash -c 'time /tmp/install-base-env.bash' && \ rm -rf /tmp/install-base-env.bash /tmp/env """, ), ( "root", r""" mkdir -p ${NPM_DIR} && \ chown -R ${NB_USER}:${NB_USER} ${NPM_DIR} """, ), ] major_pythons = {"2": "2.7", "3": "3.7"} def get_build_script_files(self): """ Dict of files to be copied to the container image for use in building. This is copied before the `build_scripts` & `assemble_scripts` are run, so can be executed from either of them. It's a dictionary where the key is the source file path in the host system, and the value is the destination file path inside the container image. This currently adds a frozen set of Python requirements to the dict of files. """ files = { "conda/install-base-env.bash": "/tmp/install-base-env.bash", "conda/activate-conda.sh": "/etc/profile.d/activate-conda.sh", } py_version = self.python_version self.log.info(f"Building conda environment for python={py_version}\n") # Select the frozen base environment based on Python version. # avoids expensive and possibly conflicting upgrades when changing # major Python versions during upgrade. # If no version is specified or no matching X.Y version is found, # the default base environment is used. frozen_name = "environment.lock" pip_frozen_name = "requirements.txt" if py_version: if self.python_version == "2.7": # python 2 goes in a different env files[ "conda/environment.py-2.7.lock" ] = self._kernel_environment_file = "/tmp/env/kernel-environment.lock" # additional pip requirements for kernel env if os.path.exists(os.path.join(HERE, "requirements.py-2.7.txt")): files[ "conda/requirements.py-2.7.txt" ] = ( self._kernel_requirements_file ) = "/tmp/env/kernel-requirements.txt" else: py_frozen_name = f"environment.py-{py_version}.lock" if os.path.exists(os.path.join(HERE, py_frozen_name)): frozen_name = py_frozen_name pip_frozen_name = f"requirements.py-{py_version}.pip" else: raise ValueError(f"Python version {py_version} is not supported!") files[ "conda/" + frozen_name ] = self._nb_environment_file = "/tmp/env/environment.lock" # add requirements.txt, if present if os.path.exists(os.path.join(HERE, pip_frozen_name)): files[ "conda/" + pip_frozen_name ] = self._nb_requirements_file = "/tmp/env/requirements.txt" files.update(super().get_build_script_files()) return files _environment_yaml = None @property def environment_yaml(self): if self._environment_yaml is not None: return self._environment_yaml environment_yml = self.binder_path("environment.yml") if not os.path.exists(environment_yml): self._environment_yaml = {} return self._environment_yaml with open(environment_yml) as f: env = YAML().load(f) # check if the env file is empty, if so instantiate an empty dictionary. if env is None: env = {} # check if the env file provided a dict-like thing not a list or other data structure. if not isinstance(env, Mapping): raise TypeError( "environment.yml should contain a dictionary. Got %r" % type(env) ) self._environment_yaml = env return self._environment_yaml @property def _should_preassemble_env(self): """Check for local pip requirements in environment.yaml If there are any local references, e.g. `-e .`, stage the whole repo prior to installation. """ dependencies = self.environment_yaml.get("dependencies", []) pip_requirements = None for dep in dependencies: if isinstance(dep, dict) and dep.get("pip"): pip_requirements = dep["pip"] if isinstance(pip_requirements, list): for line in pip_requirements: if is_local_pip_requirement(line): return False return True @property def python_version(self): """Detect the Python version for a given `environment.yml` Will return 'x.y' if version is found (e.g '3.6'), or a Falsy empty string '' if not found. Version information below the minor level is dropped. """ if not hasattr(self, "_python_version"): py_version = None env = self.environment_yaml for dep in env.get("dependencies", []): if not isinstance(dep, str): continue match = PYTHON_REGEX.match(dep) if not match: continue py_version = match.group(1) break # extract major.minor if py_version: if len(py_version) == 1: self._python_version = self.major_pythons.get(py_version[0]) else: # return major.minor self._python_version = ".".join(py_version.split(".")[:2]) else: self._python_version = "" return self._python_version @property def r_version(self): """Detect the R version for a given `environment.yml` Will return 'x.y.z' if version is found (e.g '4.1.1'), or a Falsy empty string '' if not found. """ if not hasattr(self, "_r_version"): self._r_version = "" env = self.environment_yaml for dep in env.get("dependencies", []): if not isinstance(dep, str): continue match = R_REGEX.match(dep) if not match: continue self._r_version = match.group(1) break return self._r_version @property def uses_r(self): """Detect whether the user also installs R packages. Will return True when a package prefixed with 'r-' is being installed. """ if not hasattr(self, "_uses_r"): deps = self.environment_yaml.get("dependencies", []) self._uses_r = False for dep in deps: if not isinstance(dep, str): continue if dep.startswith("r-"): self._uses_r = True break return self._uses_r @property def py2(self): """Am I building a Python 2 kernel environment?""" return self.python_version and self.python_version.split(".")[0] == "2" def get_preassemble_script_files(self): """preassembly only requires environment.yml enables caching assembly result even when repo contents change """ assemble_files = super().get_preassemble_script_files() if self._should_preassemble_env: environment_yml = self.binder_path("environment.yml") if os.path.exists(environment_yml): assemble_files[environment_yml] = environment_yml return assemble_files def get_env_scripts(self): """Return series of build-steps specific to this source repository.""" scripts = [] environment_yml = self.binder_path("environment.yml") env_prefix = "${KERNEL_PYTHON_PREFIX}" if self.py2 else "${NB_PYTHON_PREFIX}" if os.path.exists(environment_yml): # TODO: when using micromamba, we call $MAMBA_EXE install -p ... # whereas mamba/conda need `env update -p ...` when it's an env.yaml file scripts.append( ( "${NB_USER}", rf""" TIMEFORMAT='time: %3R' \ bash -c 'time ${{MAMBA_EXE}} env update -p {env_prefix} --file "{environment_yml}" && \ time ${{MAMBA_EXE}} clean --all -f -y && \ ${{MAMBA_EXE}} list -p {env_prefix} \ ' """, ) ) if self.uses_r: if self.r_version: r_pin = "=" + self.r_version else: r_pin = "" scripts.append( ( "${NB_USER}", rf""" ${{MAMBA_EXE}} install -p {env_prefix} r-base{r_pin} r-irkernel r-devtools -y && \ ${{MAMBA_EXE}} clean --all -f -y && \ ${{MAMBA_EXE}} list -p {env_prefix} """, ) ) scripts += rstudio_base_scripts(self.r_version) scripts += [ ( "root", rf""" echo auth-none=1 >> /etc/rstudio/rserver.conf && \ echo auth-minimum-user-id=0 >> /etc/rstudio/rserver.conf && \ echo "rsession-which-r={env_prefix}/bin/R" >> /etc/rstudio/rserver.conf && \ echo www-frame-origin=same >> /etc/rstudio/rserver.conf """, ), ( "${NB_USER}", # Register the jupyter kernel rf""" R --quiet -e "IRkernel::installspec(prefix='{env_prefix}')" """, ), ] return scripts def get_preassemble_scripts(self): scripts = super().get_preassemble_scripts() if self._should_preassemble_env: scripts.extend(self.get_env_scripts()) return scripts def get_assemble_scripts(self): scripts = super().get_assemble_scripts() if not self._should_preassemble_env: scripts.extend(self.get_env_scripts()) return scripts def detect(self): """Check if current repo should be built with the Conda BuildPack.""" return os.path.exists(self.binder_path("environment.yml")) and super().detect()