From 38f5aceb45a8f3df1f6d68f619c7904cb047fd6f Mon Sep 17 00:00:00 2001 From: Min RK Date: Wed, 26 Jan 2022 15:49:49 +0100 Subject: [PATCH 1/6] Add check-tmp step to local repo tests - check-tmp checks disk usage in a collection of directories, and fails if they exceed a certain size, reporting on contents - re-use images and skip build stage in verify/check-tmp tests. Should save some time, even though the build cache would have been used. This way, we don't even check. --- tests/check-tmp | 86 +++++++++++++++++++++++++++++++++++++++++++++++ tests/conftest.py | 46 +++++++++++++++++++++---- 2 files changed, 125 insertions(+), 7 deletions(-) create mode 100755 tests/check-tmp diff --git a/tests/check-tmp b/tests/check-tmp new file mode 100755 index 00000000..3c63489f --- /dev/null +++ b/tests/check-tmp @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +""" +Script to check for leftover files + +Checks a collection of temporary or cache directories, +to ensure we aren't wasting image size by forgetting cleanup steps. + +This script is run in every local repo image we test +""" + +import os +import sys +from subprocess import check_output +from textwrap import indent + +# directories larger than this are considered a failure +# a few little files here aren't a problem +THRESHOLD = 1 # in MB + +MB = 1024 * 1024 + +# the paths to check +# all of these locations +# should be cleaned up +# missing is okay +PATHS = [ + "/tmp/", + "~/", + "~/.cache/", + # not running with read permissions on root + # "/root/", +] + + +def du(path): + """Return disk usage in megabytes of a path""" + # -ks: get total size, reported in kilobytes + out = check_output(["du", "-Hks", path]) + return int(out.split(None, 1)[0]) / 1024 + + +def check_dir_size(path): + """Check the size of a directory + + Returns: + + True: directory size is below THRESHOLD or is missing + False: directory is larger than THRESHOLD + """ + path = os.path.expanduser(path) + + if not os.path.exists(path): + print(f"{path}: missing OK") + return True + + size_mb = du(path) + print(f"{path}: {size_mb:.1f} MB", end=" ") + if size_mb <= THRESHOLD: + print("OK") + return True + else: + print("FAIL") + # check size of files one-level deep (du only reports dirs) + for name in os.listdir(path): + subpath = os.path.join(path, name) + if os.path.isfile(subpath): + file_sz = os.stat(subpath).st_size / MB + if file_sz > 0.1: + print(f" {file_sz:.1f}M {subpath}") + # get report on all subdirs that are at least 100k + print( + indent( + check_output(["du", "-Hh", "-t", "100000", path]).decode("utf8"), " " + ) + ) + return False + + +def main(): + results = [check_dir_size(path) for path in PATHS] + if not all(results): + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/conftest.py b/tests/conftest.py index 550eb359..c9bd11d9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,14 +22,17 @@ import shlex import requests import subprocess import time - from tempfile import TemporaryDirectory + +import escapism import pytest import yaml from repo2docker.__main__ import make_r2d +TESTS_DIR = os.path.abspath(os.path.dirname(__file__)) + def pytest_collect_file(parent, path): if path.basename == "verify": @@ -38,12 +41,18 @@ def pytest_collect_file(parent, path): return RemoteRepoList.from_parent(parent, fspath=path) -def make_test_func(args): +def make_test_func(args, skip_build=False): """Generate a test function that runs repo2docker""" def test(): app = make_r2d(args) app.initialize() + if skip_build: + + def build_noop(): + print("Skipping build") + + app.skip_build = build_noop if app.run_cmd: # verify test, run it app.start() @@ -184,14 +193,14 @@ def repo_with_submodule(): class Repo2DockerTest(pytest.Function): """A pytest.Item for running repo2docker""" - def __init__(self, name, parent, args=None): + def __init__(self, name, parent, args=None, skip_build=False): self.args = args self.save_cwd = os.getcwd() - f = parent.obj = make_test_func(args) + f = parent.obj = make_test_func(args, skip_build=skip_build) super().__init__(name, parent, callobj=f) def reportinfo(self): - return self.parent.fspath, None, "" + return (self.parent.fspath, None, "") def repr_failure(self, excinfo): err = excinfo.value @@ -217,11 +226,34 @@ class LocalRepo(pytest.File): extra_args = yaml.safe_load(f) args += extra_args + print(self.fspath.basename, self.fspath.dirname, str(self.fspath)) + # re-use image name for multiple tests of the same image + # so we don't run through the build twice + rel_repo_dir = os.path.relpath(self.fspath.dirname, TESTS_DIR) + image_name = f"r2d-tests-{escapism.escape(rel_repo_dir, escape_char='-').lower()}-{int(time.time())}" + args.append(f"--image-name={image_name}") args.append(self.fspath.dirname) - yield Repo2DockerTest.from_parent(self, name="build", args=args) + yield Repo2DockerTest.from_parent( - self, name=self.fspath.basename, args=args + ["./verify"] + self, + name=self.fspath.basename, + args=args + ["./verify"], + skip_build=True, + ) + + # mount the tests dir as a volume + check_tmp_args = ( + args[:-1] + + ["--volume", f"{TESTS_DIR}:/io/tests"] + + [args[-1], "/io/tests/check-tmp"] + ) + + yield Repo2DockerTest.from_parent( + self, + name="check-tmp", + args=check_tmp_args, + skip_build=True, ) From 2da784ca215cd158a22f6d4a84c98e888ce7337c Mon Sep 17 00:00:00 2001 From: Min RK Date: Wed, 26 Jan 2022 16:35:37 +0100 Subject: [PATCH 2/6] check-tmp needs to run on Python 3.5 --- tests/check-tmp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/check-tmp b/tests/check-tmp index 3c63489f..d7657283 100755 --- a/tests/check-tmp +++ b/tests/check-tmp @@ -50,11 +50,11 @@ def check_dir_size(path): path = os.path.expanduser(path) if not os.path.exists(path): - print(f"{path}: missing OK") + print("{path}: missing OK".format(**locals())) return True size_mb = du(path) - print(f"{path}: {size_mb:.1f} MB", end=" ") + print("{path}: {size_mb:.1f} MB".format(**locals()), end=" ") if size_mb <= THRESHOLD: print("OK") return True @@ -66,7 +66,7 @@ def check_dir_size(path): if os.path.isfile(subpath): file_sz = os.stat(subpath).st_size / MB if file_sz > 0.1: - print(f" {file_sz:.1f}M {subpath}") + print(" {file_sz:.1f}M {subpath}".format(**locals())) # get report on all subdirs that are at least 100k print( indent( From 6b0291551dba56ca5b3522ab170ebb2aa124cc04 Mon Sep 17 00:00:00 2001 From: Min RK Date: Wed, 26 Jan 2022 16:47:40 +0100 Subject: [PATCH 3/6] Make sure we delete /tmp/shiny.deb revealed by check-tmp --- repo2docker/buildpacks/_r_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repo2docker/buildpacks/_r_base.py b/repo2docker/buildpacks/_r_base.py index 09ec90d6..c7327379 100644 --- a/repo2docker/buildpacks/_r_base.py +++ b/repo2docker/buildpacks/_r_base.py @@ -33,7 +33,7 @@ def rstudio_base_scripts(r_version): echo '{shiny_sha256sum} /tmp/shiny.deb' | sha256sum -c - && \ apt-get update > /dev/null && \ apt install -y --no-install-recommends /tmp/rstudio.deb /tmp/shiny.deb && \ - rm /tmp/rstudio.deb && \ + rm /tmp/*.deb && \ apt-get -qq purge && \ apt-get -qq clean && \ rm -rf /var/lib/apt/lists/* From bd02e2e997cf1bcf6a20d798891cf1d4de371c18 Mon Sep 17 00:00:00 2001 From: Min RK Date: Thu, 27 Jan 2022 10:09:45 +0100 Subject: [PATCH 4/6] run check-tmp as root - avoids permission errors - lets us check /root/ where we've left files in the past --- tests/check-tmp | 8 +++++--- tests/conftest.py | 13 ++++++++++--- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/check-tmp b/tests/check-tmp index d7657283..6437c437 100755 --- a/tests/check-tmp +++ b/tests/check-tmp @@ -25,10 +25,12 @@ MB = 1024 * 1024 # missing is okay PATHS = [ "/tmp/", + # check whole home? + # this shouldn't be empty, but for our tests (so far) it should be very small + # This is the easiest way to ensure we aren't leaving any unexpected files + # without knowing ahead of time where all possible caches might be (.npm, .cache, etc.) "~/", - "~/.cache/", - # not running with read permissions on root - # "/root/", + "/root/", ] diff --git a/tests/conftest.py b/tests/conftest.py index c9bd11d9..19c0ca6e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -41,12 +41,14 @@ def pytest_collect_file(parent, path): return RemoteRepoList.from_parent(parent, fspath=path) -def make_test_func(args, skip_build=False): +def make_test_func(args, skip_build=False, extra_run_kwargs=None): """Generate a test function that runs repo2docker""" def test(): app = make_r2d(args) app.initialize() + if extra_run_kwargs: + app.extra_run_kwargs.update(extra_run_kwargs) if skip_build: def build_noop(): @@ -193,10 +195,14 @@ def repo_with_submodule(): class Repo2DockerTest(pytest.Function): """A pytest.Item for running repo2docker""" - def __init__(self, name, parent, args=None, skip_build=False): + def __init__( + self, name, parent, args=None, skip_build=False, extra_run_kwargs=None + ): self.args = args self.save_cwd = os.getcwd() - f = parent.obj = make_test_func(args, skip_build=skip_build) + f = parent.obj = make_test_func( + args, skip_build=skip_build, extra_run_kwargs=extra_run_kwargs + ) super().__init__(name, parent, callobj=f) def reportinfo(self): @@ -254,6 +260,7 @@ class LocalRepo(pytest.File): name="check-tmp", args=check_tmp_args, skip_build=True, + extra_run_kwargs={"user": "root"}, ) From 757799516da67482e343ee64bdf536955eb91a71 Mon Sep 17 00:00:00 2001 From: Min RK Date: Thu, 27 Jan 2022 10:54:11 +0100 Subject: [PATCH 5/6] ensure some cache files are cleaned up --- repo2docker/buildpacks/pipfile/__init__.py | 5 ++++- tests/venv/postBuild/postBuild | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/repo2docker/buildpacks/pipfile/__init__.py b/repo2docker/buildpacks/pipfile/__init__.py index 959c8b72..20214099 100644 --- a/repo2docker/buildpacks/pipfile/__init__.py +++ b/repo2docker/buildpacks/pipfile/__init__.py @@ -88,7 +88,10 @@ class PipfileBuildPack(CondaBuildPack): scripts = super().get_preassemble_scripts() # install pipenv to install dependencies within Pipfile.lock or Pipfile scripts.append( - ("${NB_USER}", "${KERNEL_PYTHON_PREFIX}/bin/pip install pipenv==2018.11.26") + ( + "${NB_USER}", + "${KERNEL_PYTHON_PREFIX}/bin/pip install --no-cache-dir pipenv==2018.11.26", + ) ) return scripts diff --git a/tests/venv/postBuild/postBuild b/tests/venv/postBuild/postBuild index c1e89e8a..4f301be5 100755 --- a/tests/venv/postBuild/postBuild +++ b/tests/venv/postBuild/postBuild @@ -1,3 +1,4 @@ #!/bin/bash jupyter nbextension enable --py --sys-prefix ipyleaflet -npm install --global configurable-http-proxy \ No newline at end of file +npm install --global configurable-http-proxy +npm cache clean --force From e7c93b0fc8bcb89efae203b3432e49a00be522d4 Mon Sep 17 00:00:00 2001 From: Min RK Date: Tue, 1 Feb 2022 14:51:13 +0100 Subject: [PATCH 6/6] update pipenv to 2022.1.8 pipenv 2018 doesn't actually clear its cache when asked to 2021.5.29 is the last to support 2.7, 3.5 --- repo2docker/buildpacks/pipfile/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/repo2docker/buildpacks/pipfile/__init__.py b/repo2docker/buildpacks/pipfile/__init__.py index 20214099..e928d01e 100644 --- a/repo2docker/buildpacks/pipfile/__init__.py +++ b/repo2docker/buildpacks/pipfile/__init__.py @@ -11,6 +11,7 @@ import re import toml +from ...semver import parse_version as V from ..conda import CondaBuildPack VERSION_PAT = re.compile(r"\d+(\.\d+)*") @@ -87,10 +88,15 @@ class PipfileBuildPack(CondaBuildPack): """scripts to run prior to staging the repo contents""" scripts = super().get_preassemble_scripts() # install pipenv to install dependencies within Pipfile.lock or Pipfile + if V(self.python_version) < V("3.6"): + # last pipenv version to support 2.7, 3.5 + pipenv_version = "2021.5.29" + else: + pipenv_version = "2022.1.8" scripts.append( ( "${NB_USER}", - "${KERNEL_PYTHON_PREFIX}/bin/pip install --no-cache-dir pipenv==2018.11.26", + f"${{KERNEL_PYTHON_PREFIX}}/bin/pip install --no-cache-dir pipenv=={pipenv_version}", ) ) return scripts