diff --git a/repo2docker/buildpacks/_r_base.py b/repo2docker/buildpacks/_r_base.py index 09ec90d6..c7327379 100644 --- a/repo2docker/buildpacks/_r_base.py +++ b/repo2docker/buildpacks/_r_base.py @@ -33,7 +33,7 @@ def rstudio_base_scripts(r_version): echo '{shiny_sha256sum} /tmp/shiny.deb' | sha256sum -c - && \ apt-get update > /dev/null && \ apt install -y --no-install-recommends /tmp/rstudio.deb /tmp/shiny.deb && \ - rm /tmp/rstudio.deb && \ + rm /tmp/*.deb && \ apt-get -qq purge && \ apt-get -qq clean && \ rm -rf /var/lib/apt/lists/* diff --git a/repo2docker/buildpacks/pipfile/__init__.py b/repo2docker/buildpacks/pipfile/__init__.py index 959c8b72..e928d01e 100644 --- a/repo2docker/buildpacks/pipfile/__init__.py +++ b/repo2docker/buildpacks/pipfile/__init__.py @@ -11,6 +11,7 @@ import re import toml +from ...semver import parse_version as V from ..conda import CondaBuildPack VERSION_PAT = re.compile(r"\d+(\.\d+)*") @@ -87,8 +88,16 @@ class PipfileBuildPack(CondaBuildPack): """scripts to run prior to staging the repo contents""" scripts = super().get_preassemble_scripts() # install pipenv to install dependencies within Pipfile.lock or Pipfile + if V(self.python_version) < V("3.6"): + # last pipenv version to support 2.7, 3.5 + pipenv_version = "2021.5.29" + else: + pipenv_version = "2022.1.8" scripts.append( - ("${NB_USER}", "${KERNEL_PYTHON_PREFIX}/bin/pip install pipenv==2018.11.26") + ( + "${NB_USER}", + f"${{KERNEL_PYTHON_PREFIX}}/bin/pip install --no-cache-dir pipenv=={pipenv_version}", + ) ) return scripts diff --git a/tests/check-tmp b/tests/check-tmp new file mode 100755 index 00000000..6437c437 --- /dev/null +++ b/tests/check-tmp @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +""" +Script to check for leftover files + +Checks a collection of temporary or cache directories, +to ensure we aren't wasting image size by forgetting cleanup steps. + +This script is run in every local repo image we test +""" + +import os +import sys +from subprocess import check_output +from textwrap import indent + +# directories larger than this are considered a failure +# a few little files here aren't a problem +THRESHOLD = 1 # in MB + +MB = 1024 * 1024 + +# the paths to check +# all of these locations +# should be cleaned up +# missing is okay +PATHS = [ + "/tmp/", + # check whole home? + # this shouldn't be empty, but for our tests (so far) it should be very small + # This is the easiest way to ensure we aren't leaving any unexpected files + # without knowing ahead of time where all possible caches might be (.npm, .cache, etc.) + "~/", + "/root/", +] + + +def du(path): + """Return disk usage in megabytes of a path""" + # -ks: get total size, reported in kilobytes + out = check_output(["du", "-Hks", path]) + return int(out.split(None, 1)[0]) / 1024 + + +def check_dir_size(path): + """Check the size of a directory + + Returns: + + True: directory size is below THRESHOLD or is missing + False: directory is larger than THRESHOLD + """ + path = os.path.expanduser(path) + + if not os.path.exists(path): + print("{path}: missing OK".format(**locals())) + return True + + size_mb = du(path) + print("{path}: {size_mb:.1f} MB".format(**locals()), end=" ") + if size_mb <= THRESHOLD: + print("OK") + return True + else: + print("FAIL") + # check size of files one-level deep (du only reports dirs) + for name in os.listdir(path): + subpath = os.path.join(path, name) + if os.path.isfile(subpath): + file_sz = os.stat(subpath).st_size / MB + if file_sz > 0.1: + print(" {file_sz:.1f}M {subpath}".format(**locals())) + # get report on all subdirs that are at least 100k + print( + indent( + check_output(["du", "-Hh", "-t", "100000", path]).decode("utf8"), " " + ) + ) + return False + + +def main(): + results = [check_dir_size(path) for path in PATHS] + if not all(results): + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/conftest.py b/tests/conftest.py index 550eb359..19c0ca6e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,14 +22,17 @@ import shlex import requests import subprocess import time - from tempfile import TemporaryDirectory + +import escapism import pytest import yaml from repo2docker.__main__ import make_r2d +TESTS_DIR = os.path.abspath(os.path.dirname(__file__)) + def pytest_collect_file(parent, path): if path.basename == "verify": @@ -38,12 +41,20 @@ def pytest_collect_file(parent, path): return RemoteRepoList.from_parent(parent, fspath=path) -def make_test_func(args): +def make_test_func(args, skip_build=False, extra_run_kwargs=None): """Generate a test function that runs repo2docker""" def test(): app = make_r2d(args) app.initialize() + if extra_run_kwargs: + app.extra_run_kwargs.update(extra_run_kwargs) + if skip_build: + + def build_noop(): + print("Skipping build") + + app.skip_build = build_noop if app.run_cmd: # verify test, run it app.start() @@ -184,14 +195,18 @@ def repo_with_submodule(): class Repo2DockerTest(pytest.Function): """A pytest.Item for running repo2docker""" - def __init__(self, name, parent, args=None): + def __init__( + self, name, parent, args=None, skip_build=False, extra_run_kwargs=None + ): self.args = args self.save_cwd = os.getcwd() - f = parent.obj = make_test_func(args) + f = parent.obj = make_test_func( + args, skip_build=skip_build, extra_run_kwargs=extra_run_kwargs + ) super().__init__(name, parent, callobj=f) def reportinfo(self): - return self.parent.fspath, None, "" + return (self.parent.fspath, None, "") def repr_failure(self, excinfo): err = excinfo.value @@ -217,11 +232,35 @@ class LocalRepo(pytest.File): extra_args = yaml.safe_load(f) args += extra_args + print(self.fspath.basename, self.fspath.dirname, str(self.fspath)) + # re-use image name for multiple tests of the same image + # so we don't run through the build twice + rel_repo_dir = os.path.relpath(self.fspath.dirname, TESTS_DIR) + image_name = f"r2d-tests-{escapism.escape(rel_repo_dir, escape_char='-').lower()}-{int(time.time())}" + args.append(f"--image-name={image_name}") args.append(self.fspath.dirname) - yield Repo2DockerTest.from_parent(self, name="build", args=args) + yield Repo2DockerTest.from_parent( - self, name=self.fspath.basename, args=args + ["./verify"] + self, + name=self.fspath.basename, + args=args + ["./verify"], + skip_build=True, + ) + + # mount the tests dir as a volume + check_tmp_args = ( + args[:-1] + + ["--volume", f"{TESTS_DIR}:/io/tests"] + + [args[-1], "/io/tests/check-tmp"] + ) + + yield Repo2DockerTest.from_parent( + self, + name="check-tmp", + args=check_tmp_args, + skip_build=True, + extra_run_kwargs={"user": "root"}, ) diff --git a/tests/venv/postBuild/postBuild b/tests/venv/postBuild/postBuild index c1e89e8a..4f301be5 100755 --- a/tests/venv/postBuild/postBuild +++ b/tests/venv/postBuild/postBuild @@ -1,3 +1,4 @@ #!/bin/bash jupyter nbextension enable --py --sys-prefix ipyleaflet -npm install --global configurable-http-proxy \ No newline at end of file +npm install --global configurable-http-proxy +npm cache clean --force