Add caching of already built repositories

Add tests for image caching

Adjust tests and main app for cached builds

Remove obsolete command-line handling

Remove print statement from test

Fix subdirectory handling

Put back exception instead of sys.exit()
pull/511/head
Tim Head 2018-12-17 09:18:30 +01:00
rodzic 8d2617479a
commit e7018d7ca5
10 zmienionych plików z 226 dodań i 60 usunięć

Wyświetl plik

@ -196,7 +196,6 @@ def get_argparser():
'--cache-from',
action='append',
default=[],
#help=self.traits()['cache_from'].help
)
return argparser
@ -208,7 +207,6 @@ def make_r2d(argv=None):
if argv is None:
argv = sys.argv[1:]
# version must be checked before parse, as repo/cmd are required and
# will spit out an error if allowed to be parsed first.
if '--version' in argv:
@ -244,9 +242,11 @@ def make_r2d(argv=None):
extra=dict(phase='failed'))
sys.exit(1)
if args.image_name:
r2d.output_image_spec = args.image_name
else:
# we will pick a name after fetching the repository
r2d.output_image_spec = ""
r2d.json_logs = args.json_logs
@ -343,5 +343,6 @@ def main():
r2d.log.exception(e)
sys.exit(1)
if __name__ == '__main__':
main()

Wyświetl plik

@ -13,7 +13,6 @@ import sys
import logging
import os
import pwd
import subprocess
import shutil
import tempfile
import time
@ -31,8 +30,7 @@ from traitlets.config import Application
from . import __version__
from .buildpacks import (
PythonBuildPack, DockerBuildPack, LegacyBinderDockerBuildPack,
CondaBuildPack, JuliaBuildPack, BaseImage,
RBuildPack, NixBuildPack
CondaBuildPack, JuliaBuildPack, RBuildPack, NixBuildPack
)
from . import contentproviders
from .utils import (
@ -364,7 +362,21 @@ class Repo2Docker(Application):
spec, checkout_path, yield_output=self.json_logs):
self.log.info(log_line, extra=dict(phase='fetching'))
if not self.output_image_spec:
self.output_image_spec = (
'r2d' + escapism.escape(self.repo, escape_char='-').lower()
)
# if we are building from a subdirectory include that in the
# image name so we can tell builds from different sub-directories
# apart.
if self.subdir:
self.output_image_spec += (
escapism.escape(self.subdir, escape_char='-').lower()
)
if picked_content_provider.content_id is not None:
self.output_image_spec += picked_content_provider.content_id
else:
self.output_image_spec += str(int(time.time()))
def json_excepthook(self, etype, evalue, traceback):
"""Called on an uncaught exception when using json logging
@ -399,15 +411,6 @@ class Repo2Docker(Application):
fmt='%(message)s'
)
if self.output_image_spec == "":
# Attempt to set a sane default!
# HACK: Provide something more descriptive?
self.output_image_spec = (
'r2d' +
escapism.escape(self.repo, escape_char='-').lower() +
str(int(time.time()))
)
if self.dry_run and (self.run or self.push):
raise ValueError("Cannot push or run image if we are not building it")
@ -546,6 +549,20 @@ class Repo2Docker(Application):
s.close()
return port
def find_image(self):
# if this is a dry run it is Ok for dockerd to be unreachable so we
# always return False for dry runs.
if self.dry_run:
return False
# check if we already have an image for this content
client = docker.APIClient(version='auto', **kwargs_from_env())
for image in client.images():
if image['RepoTags'] is not None:
for tag in image['RepoTags']:
if tag == self.output_image_spec + ":latest":
return True
return False
def build(self):
"""
Build docker image
@ -553,7 +570,7 @@ class Repo2Docker(Application):
# Check if r2d can connect to docker daemon
if not self.dry_run:
try:
api_client = docker.APIClient(version='auto',
docker_client = docker.APIClient(version='auto',
**kwargs_from_env())
except DockerException as e:
self.log.exception(e)
@ -574,6 +591,14 @@ class Repo2Docker(Application):
try:
self.fetch(self.repo, self.ref, checkout_path)
if self.find_image():
self.log.info("Reusing existing image ({}), not "
"building.".format(self.output_image_spec))
# no need to build, so skip to the end by `return`ing here
# this will still execute the finally clause and let's us
# avoid having to indent the build code by an extra level
return
if self.subdir:
checkout_path = os.path.join(checkout_path, self.subdir)
if not os.path.isdir(checkout_path):
@ -610,8 +635,11 @@ class Repo2Docker(Application):
self.log.info('Using %s builder\n', bp.__class__.__name__,
extra=dict(phase='building'))
for l in picked_buildpack.build(api_client, self.output_image_spec,
self.build_memory_limit, build_args, self.cache_from):
for l in picked_buildpack.build(docker_client,
self.output_image_spec,
self.build_memory_limit,
build_args,
self.cache_from):
if 'stream' in l:
self.log.info(l['stream'],
extra=dict(phase='building'))
@ -624,8 +652,9 @@ class Repo2Docker(Application):
else:
self.log.info(json.dumps(l),
extra=dict(phase='building'))
finally:
# Cheanup checkout if necessary
# Cleanup checkout if necessary
if self.cleanup_checkout:
shutil.rmtree(checkout_path, ignore_errors=True)

Wyświetl plik

@ -17,6 +17,20 @@ class ContentProvider:
def __init__(self):
self.log = logging.getLogger("repo2docker")
@property
def content_id(self):
"""A unique ID to represent the version of the content.
This ID is used to name the built images. If the ID is the same between
two runs of repo2docker we will reuse an existing image (if it exists).
By providing an ID that summarizes the content we can reuse existing
images and speed up build times. A good ID is the revision of a Git
repository or a hash computed from all the content.
The type content ID can be any string.
To disable this behaviour set this property to `None` in which case
a fresh image will always be built.
"""
return None
def detect(self, repo, ref=None, extra_args=None):
"""Determine compatibility between source and this provider.

Wyświetl plik

@ -44,3 +44,14 @@ class Git(ContentProvider):
cwd=output_dir,
capture=yield_output):
yield line
cmd = ['git', 'rev-parse', 'HEAD']
sha1 = subprocess.Popen(cmd, stdout=subprocess.PIPE, cwd=output_dir)
self._sha1 = sha1.stdout.read().decode().strip()
@property
def content_id(self):
"""A unique ID to represent the version of the content.
Uses the first seven characters of the git commit ID of the repository.
"""
return self._sha1[:7]

Wyświetl plik

@ -12,8 +12,11 @@ import os
import pipes
import shlex
import requests
import subprocess
import time
from tempfile import TemporaryDirectory
import pytest
import yaml
@ -77,6 +80,35 @@ def run_repo2docker():
return run_test
@pytest.fixture()
def git_repo():
"""
Make a dummy git repo in which user can perform git operations
Should be used as a contextmanager, it will delete directory when done
"""
with TemporaryDirectory() as gitdir:
subprocess.check_call(['git', 'init'], cwd=gitdir)
yield gitdir
@pytest.fixture()
def repo_with_content(git_repo):
"""Create a git repository with content"""
with open(os.path.join(git_repo, 'test'), 'w') as f:
f.write("Hello")
subprocess.check_call(['git', 'add', 'test'], cwd=git_repo)
subprocess.check_call(['git', 'commit', '-m', 'Test commit'],
cwd=git_repo)
# get the commit's SHA1
sha1 = subprocess.Popen(['git', 'rev-parse', 'HEAD'],
stdout=subprocess.PIPE, cwd=git_repo)
sha1 = sha1.stdout.read().decode().strip()
yield git_repo, sha1
class Repo2DockerTest(pytest.Function):
"""A pytest.Item for running repo2docker"""
def __init__(self, name, parent, args):

Wyświetl plik

@ -1,51 +1,35 @@
from contextlib import contextmanager
import os
import subprocess
import pytest
from tempfile import TemporaryDirectory
from repo2docker.contentproviders import Git
@contextmanager
def git_repo():
"""
Makes a dummy git repo in which user can perform git operations
Should be used as a contextmanager, it will delete directory when done
"""
with TemporaryDirectory() as gitdir:
subprocess.check_call(['git', 'init'], cwd=gitdir)
yield gitdir
def test_clone():
def test_clone(repo_with_content):
"""Test simple git clone to a target dir"""
with git_repo() as upstream:
with open(os.path.join(upstream, 'test'), 'w') as f:
f.write("Hello")
subprocess.check_call(['git', 'add', 'test'], cwd=upstream)
subprocess.check_call(['git', 'commit', '-m', 'Test commit'],
cwd=upstream)
upstream, sha1 = repo_with_content
with TemporaryDirectory() as clone_dir:
spec = {'repo': upstream}
for _ in Git().fetch(spec, clone_dir):
git_content = Git()
for _ in git_content.fetch(spec, clone_dir):
pass
assert os.path.exists(os.path.join(clone_dir, 'test'))
def test_bad_ref():
assert git_content.content_id == sha1[:7]
def test_bad_ref(repo_with_content):
"""
Test trying to checkout a ref that doesn't exist
"""
with git_repo() as upstream:
upstream, sha1 = repo_with_content
with TemporaryDirectory() as clone_dir:
spec = {'repo': upstream, 'ref': 'does-not-exist'}
with pytest.raises(ValueError):
for _ in Git().fetch(spec, clone_dir):
pass
def test_always_accept():
# The git content provider should always accept a spec
assert Git().detect('/tmp/doesnt-exist', ref='1234')

Wyświetl plik

@ -24,6 +24,13 @@ def test_not_detect_local_file():
assert spec is None, spec
def test_content_id_is_None():
# content_id property should always be None for local content provider
# as we rely on the caching done by docker
local = Local()
assert local.content_id is None
def test_content_available():
# create a directory with files, check they are available in the output
# directory
@ -31,7 +38,11 @@ def test_content_available():
with open(os.path.join(d, 'test'), 'w') as f:
f.write("Hello")
local = Local()
spec = {'path': d}
for _ in Local().fetch(spec, d):
for _ in local.fetch(spec, d):
pass
assert os.path.exists(os.path.join(d, 'test'))
# content_id property should always be None for local content provider
# as we rely on the caching done by docker
assert local.content_id is None

Wyświetl plik

@ -0,0 +1,74 @@
from tempfile import TemporaryDirectory
from unittest.mock import patch
import escapism
from repo2docker.app import Repo2Docker
from repo2docker.__main__ import make_r2d
def test_find_image():
images = [{'RepoTags': ['some-org/some-repo:latest']}]
with patch('repo2docker.app.docker.APIClient') as FakeDockerClient:
instance = FakeDockerClient.return_value
instance.images.return_value = images
r2d = Repo2Docker()
r2d.output_image_spec = 'some-org/some-repo'
assert r2d.find_image()
instance.images.assert_called_with()
def test_dont_find_image():
images = [{'RepoTags': ['some-org/some-image-name:latest']}]
with patch('repo2docker.app.docker.APIClient') as FakeDockerClient:
instance = FakeDockerClient.return_value
instance.images.return_value = images
r2d = Repo2Docker()
r2d.output_image_spec = 'some-org/some-other-image-name'
assert not r2d.find_image()
instance.images.assert_called_with()
def test_image_name_remains_unchanged():
# if we specify an image name, it should remain unmodified
with TemporaryDirectory() as src:
app = Repo2Docker()
argv = ['--image-name', 'a-special-name', '--no-build', src]
app = make_r2d(argv)
app.start()
assert app.output_image_spec == 'a-special-name'
def test_image_name_contains_sha1(repo_with_content):
upstream, sha1 = repo_with_content
app = Repo2Docker()
# force selection of the git content provider by prefixing path with
# file://. This is important as the Local content provider does not
# store the SHA1 in the repo spec
argv = ['--no-build', 'file://' + upstream]
app = make_r2d(argv)
app.start()
assert app.output_image_spec.endswith(sha1[:7])
def test_local_dir_image_name(repo_with_content):
upstream, sha1 = repo_with_content
app = Repo2Docker()
argv = ['--no-build', upstream]
app = make_r2d(argv)
app.start()
assert app.output_image_spec.startswith(
'r2d' + escapism.escape(upstream, escape_char='-').lower()
)

Wyświetl plik

@ -66,7 +66,6 @@ def test_memlimit_nondockerfile(tmpdir, test, mem_limit, mem_allocate_mb, expect
assert success == expected
def test_memlimit_same_postbuild():
"""
Validate that the postBuild files for dockerfile & nondockerfile are same

Wyświetl plik

@ -2,7 +2,8 @@
Test if the subdirectory is correctly navigated to
"""
import os
import logging
import escapism
import pytest
from repo2docker.app import Repo2Docker
@ -23,10 +24,20 @@ def test_subdir(run_repo2docker):
assert cwd == os.getcwd(), "We should be back in %s" % cwd
def test_subdir_in_image_name():
app = Repo2Docker(
repo=TEST_REPO,
subdir='a directory',
)
app.initialize()
app.build()
escaped_dirname = escapism.escape('a directory', escape_char='-').lower()
assert escaped_dirname in app.output_image_spec
def test_subdir_invalid(caplog):
# test an error is raised when requesting a non existent subdir
#caplog.set_level(logging.INFO, logger='Repo2Docker')
app = Repo2Docker(
repo=TEST_REPO,
subdir='invalid-sub-dir',