Merge pull request #242 from betatim/content-provider

[MRG] Add Content Providers Infrastructure
pull/407/head
Tim Head 2018-10-16 11:35:46 +02:00 zatwierdzone przez GitHub
commit 4310a39e28
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
7 zmienionych plików z 257 dodań i 40 usunięć

Wyświetl plik

@ -51,7 +51,7 @@ env:
- REPO_TYPE=r
- REPO_TYPE=dockerfile
- REPO_TYPE=external/*
- REPO_TYPE=*.py
- REPO_TYPE=**/*.py
global:
- secure: gX7IOkbjlvcDwIH24sOLhutINx6TZRwujEusMWh1dqgYG2D69qQai/mTrRXO9PGRrsvQwIBk4RcILKAiZnk5O2Z1hLoIHk/oU2mNUmE44dDm4Xf/VTTdeYhjeOTR9B+KJ9NVwPxuSEDSND3lD7yFfvCqNXykipEhBtTliLupjWVxxXnaz0aZTYHUPJwanxdUc06AphSPwZjtm1m3qMUU8v7UdTGGAdW3NlgkKw0Xx2x5W31fW676vskC/GNQAbcRociYipuhSFWV4lu+6d8XF2xVO97xtzf54tBQzt6RgVfAKtiqkEIYSzJQBBpkQ6SM6yg+fQoQpOo8jPU9ZBjvaoopUG9vn8HRS/OtQrDcG3kEFnFAnaes8Iqtidp1deTn27LIlfCTl7kTFOp8yaaNlIMHJTJKTEMRhfdDlBYx7qiH8e9d/z37lupzY2loLHeNHdMRS1uYsfacZsmrnu9vAdpQmP1LuHivBPZEvgerinADaJiekelWOIEn956pDrno/YgnzP0i9LEBYnbbunqT8oEzLintNt5CXGdhkiG60j38McKCIn4sD6jbMMwgsqVFdClCBersyorKhOs7P8at5vX4xf8fMiKPC8LZPzYVIQYzCjmwSOFQ+Rzmz5gSj+DRTANKfHpzZCKZEF6amBYMGE1O5osF8m6M10vtW9ToK+s=
- secure: Cfhb0BUT54JjEZD8n44Jj+o1lt5p32Lfg7W/euTyZ61YylDx0+XEYTzfWcwxOzH9fLpWr6dDrBMGHA/FPqsWA5BkoGdiBJ1OOVy2tmDRButctobWM3SVwa+Rhh8bZWlK8yKT2S3n6CtK4mesmjzdbUShL7YnKOSl8LBaTT5Y5oT8Oxsq51pfg8fJUImim8H20t8H7emaEzZorF4OSGRtajcAgukt5YoAqTEVDq+bFRBHZalxkcRqLhsGe3CCWa28kjGTL4MPZpCI6/AXIXHzihfG3rGq40ZT8jZ9GPP3MBgkiJWtFiTC9h16G34b/JI/TD40zCmoW9/9oVjRK4UlLGCAv6bgzFhCRof2abhB9NTZDniNzkO0T15uHs3VLbLCPYB0xYyClAFxm2P6e8WPChyENKfTNh+803IKFFo4JaTjOnKzi89N72v5+bT6ghP932nmjJr1AO65xjw63CeDmaLoHDY73n11DibybWQgEeiNzJuSzbIHyqMPhW5XqeroEjKKstdPHtVfOViI9ywjEMy0HCPsspaVI7Aow0Iv8E4Ajvd32W7z0h0fSCx/i25hEOAo2vhBsmQKJA7IquB3N88M11L874h/8J+oc/osW1EB5z7Ukke5YCq94Qh3qImSIhJULXMMc1QjEqYsqhLXtiMG2HUge0Y5hwwnnbEIRMQ=

Wyświetl plik

@ -30,12 +30,12 @@ from traitlets.config import Application
from . import __version__
from .buildpacks import (
PythonBuildPack, DockerBuildPack, LegacyBinderDockerBuildPack,
CondaBuildPack, JuliaBuildPack, BaseImage,
RBuildPack
CondaBuildPack, JuliaBuildPack, RBuildPack
)
from . import contentproviders
from .utils import (
execute_cmd, ByteSpecification, maybe_cleanup, is_valid_docker_image_name,
validate_and_generate_port_mapping, check_ref
ByteSpecification, maybe_cleanup, is_valid_docker_image_name,
validate_and_generate_port_mapping
)
@ -95,6 +95,23 @@ class Repo2Docker(Application):
"""
)
# Git is our content provider of last resort. This is to maintain the
# old behaviour when git and local directories were the only supported
# content providers. We can detect local directories from the path, but
# detecting if something will successfully `git clone` is very hard if all
# you can do is look at the path/URL to it.
content_providers = List(
[
contentproviders.Local,
contentproviders.Git,
],
config=True,
help="""
Ordered list by priority of ContentProviders to try in turn to fetch
the contents specified by the user.
"""
)
build_memory_limit = ByteSpecification(
0,
help="""
@ -175,30 +192,28 @@ class Repo2Docker(Application):
)
def fetch(self, url, ref, checkout_path):
"""Check out a repo using url and ref to the checkout_path location"""
try:
cmd = ['git', 'clone', '--recursive']
if not ref:
cmd.extend(['--depth', '1'])
cmd.extend([url, checkout_path])
for line in execute_cmd(cmd, capture=self.json_logs):
self.log.info(line, extra=dict(phase='fetching'))
except subprocess.CalledProcessError:
self.log.error('Failed to clone repository!',
extra=dict(phase='failed'))
sys.exit(1)
"""Check out a repo using url and ref to the checkout_path locationself.
if ref:
hash = check_ref(ref, checkout_path)
if hash is None:
self.log.error('Failed to check out ref %s', ref,
extra=dict(phase='failed'))
sys.exit(1)
# If the hash is resolved above, we should be able to reset to it
for line in execute_cmd(['git', 'reset', '--hard', hash],
cwd=checkout_path,
capture=self.json_logs):
self.log.info(line, extra=dict(phase='fetching'))
Iterate through possible content providers until a valid provider,
based on URL, is found.
"""
picked_content_provider = None
for ContentProvider in self.content_providers:
cp = ContentProvider()
spec = cp.detect(url, ref=ref)
if spec is not None:
picked_content_provider = cp
self.log.info("Picked {cp} content "
"provider.\n".format(cp=cp.__class__.__name__))
break
if picked_content_provider is None:
self.log.error("No matching content provider found for "
"{url}.".format(url=url))
for log_line in picked_content_provider.fetch(
spec, checkout_path, yield_output=self.json_logs):
self.log.info(log_line, extra=dict(phase='fetching'))
def validate_image_name(self, image_name):
"""
@ -409,20 +424,29 @@ class Repo2Docker(Application):
if args.appendix:
self.appendix = args.appendix
self.repo = args.repo
self.ref = args.ref
# if the source exists locally we don't want to delete it at the end
if os.path.exists(args.repo):
# Let's treat this as a local directory we are building
self.repo_type = 'local'
self.repo = args.repo
self.ref = None
self.cleanup_checkout = False
if args.editable:
self.volumes[os.path.abspath(args.repo)] = '.'
else:
self.repo_type = 'remote'
self.repo = args.repo
self.ref = args.ref
self.cleanup_checkout = args.clean
# user wants to mount a local directory into the container for
# editing
if args.editable:
# the user has to point at a directory, not just a path for us
# to be able to mount it. We might have content providers that can
# provide content from a local `something.zip` file, which we
# couldn't mount in editable mode
if os.path.isdir(args.repo):
self.volumes[os.path.abspath(args.repo)] = '.'
else:
self.log.error('Can not mount "{}" in editable mode '
'as it is not a directory'.format(args.repo),
extra=dict(phase='failed'))
sys.exit(1)
if args.json_logs:
# register JSON excepthook to avoid non-JSON output on errors
sys.excepthook = self.json_excepthook
@ -661,7 +685,12 @@ class Repo2Docker(Application):
raise e
sys.exit(1)
if self.repo_type == 'local':
# If the source to be executed is a directory, continue using the
# directory. In the case of a local directory, it is used as both the
# source and target. Reusing a local directory seems better than
# making a copy of it as it might contain large files that would be
# expensive to copy.
if os.path.isdir(self.repo):
checkout_path = self.repo
else:
if self.git_workdir is None:
@ -672,8 +701,7 @@ class Repo2Docker(Application):
# keep as much as possible in the context manager to make sure we
# cleanup if things go wrong
with maybe_cleanup(checkout_path, self.cleanup_checkout):
if self.repo_type == 'remote':
self.fetch(self.repo, self.ref, checkout_path)
self.fetch(self.repo, self.ref, checkout_path)
if self.subdir:
checkout_path = os.path.join(checkout_path, self.subdir).rstrip('/')

Wyświetl plik

@ -0,0 +1,2 @@
from .git import Git
from .base import Local

Wyświetl plik

@ -0,0 +1,59 @@
"""
Base classes for repo2docker ContentProviders
ContentProviders accept a `spec` of various kinds, and
provide the contents from the spec to a given output directory.
"""
import logging
import os
class ContentProviderException(Exception):
"""Exception raised when a ContentProvider can not provide content."""
pass
class ContentProvider:
def __init__(self):
self.log = logging.getLogger("repo2docker")
def detect(self, repo, ref=None, extra_args=None):
"""Determine compatibility between source and this provider.
If the provider knows how to fetch this source it will return a
`spec` that can be passed to `fetch`. The arguments are the `repo`
string passed on the command-line, the value of the --ref parameter,
if provided and any provider specific arguments provided on the
command-line.
If the provider does not know how to fetch this source it will return
`None`.
"""
raise NotImplementedError()
def fetch(self, spec, output_dir, yield_output=False):
"""Provide the contents of given spec to output_dir
This generator yields logging information if `yield_output=True`,
otherwise log output is printed to stdout.
Arguments:
spec -- Dict specification understood by this ContentProvider
output_dir {string} -- Path to output directory (must already exist)
yield_output {bool} -- If True, return output line by line. If not,
output just goes to stdout.
"""
raise NotImplementedError()
class Local(ContentProvider):
def detect(self, source, ref=None, extra_args=None):
if os.path.isdir(source):
return {'path': source}
def fetch(self, spec, output_dir, yield_output=False):
# nothing to be done if your content is already in the output directory
msg = "Local content provider assumes {} == {}".format(spec['path'],
output_dir)
assert output_dir == spec['path'], msg
yield

Wyświetl plik

@ -0,0 +1,46 @@
import subprocess
import sys
from .base import ContentProvider, ContentProviderException
from ..utils import execute_cmd, check_ref
class Git(ContentProvider):
"""Provide contents of a remote git repository."""
def detect(self, source, ref=None, extra_args=None):
# Git is our content provider of last resort. This is to maintain the
# old behaviour when git and local directories were the only supported
# content providers. This means that this content provider will always
# match. The downside is that the call to `fetch()` later on might fail
return {'repo': source, 'ref': ref}
def fetch(self, spec, output_dir, yield_output=False):
repo = spec['repo']
ref = spec.get('ref', None)
# make a, possibly shallow, clone of the remote repository
try:
cmd = ['git', 'clone', '--recursive']
if ref is None:
cmd.extend(['--depth', '1'])
cmd.extend([repo, output_dir])
for line in execute_cmd(cmd, capture=yield_output):
yield line
except subprocess.CalledProcessError as e:
msg = "Failed to clone repository from {repo}.".format(repo=repo)
raise ContentProviderException(msg) from e
# check out the specific ref given by the user
if ref is not None:
hash = check_ref(ref, output_dir)
if hash is None:
self.log.error('Failed to check out ref %s', ref,
extra=dict(phase='failed'))
sys.exit(1)
# If the hash is resolved above, we should be able to reset to it
for line in execute_cmd(['git', 'reset', '--hard', hash],
cwd=output_dir,
capture=yield_output):
yield line

Wyświetl plik

@ -0,0 +1,45 @@
from contextlib import contextmanager
import os
import subprocess
from tempfile import TemporaryDirectory
from repo2docker.contentproviders import Git
@contextmanager
def git_repo():
"""
Makes a dummy git repo in which user can perform git operations
Should be used as a contextmanager, it will delete directory when done
"""
with TemporaryDirectory() as gitdir:
subprocess.check_call(['git', 'init'], cwd=gitdir)
yield gitdir
def test_clone():
"""Test simple git clone to a target dir"""
with git_repo() as upstream:
with open(os.path.join(upstream, 'test'), 'w') as f:
f.write("Hello")
subprocess.check_call(['git', 'add', 'test'], cwd=upstream)
subprocess.check_call(['git', 'commit', '-m', 'Test commit'],
cwd=upstream)
with TemporaryDirectory() as clone_dir:
spec = {'repo': upstream}
for _ in Git().fetch(spec, clone_dir):
pass
assert os.path.exists(os.path.join(clone_dir, 'test'))
def test_always_accept():
# The git content provider should always accept a spec
assert Git().detect('/tmp/doesnt-exist', ref='1234')
assert Git().detect('/tmp/doesnt-exist')
# a path that exists
assert Git().detect('/etc', ref='1234')
# a remote URL
assert Git().detect('https://example.com/path/here')

Wyświetl plik

@ -0,0 +1,37 @@
import os
from tempfile import TemporaryDirectory, NamedTemporaryFile
from repo2docker.contentproviders import Local
def test_detect_local_dir():
with TemporaryDirectory() as d:
local = Local()
spec = local.detect(d)
# should accept a local directory
assert spec is not None, spec
assert 'path' in spec, spec
assert spec['path'] == d
def test_not_detect_local_file():
with NamedTemporaryFile() as f:
local = Local()
spec = local.detect(f.name)
# should NOT accept a local file
assert spec is None, spec
def test_content_available():
# create a directory with files, check they are available in the output
# directory
with TemporaryDirectory() as d:
with open(os.path.join(d, 'test'), 'w') as f:
f.write("Hello")
spec = {'path': d}
for _ in Local().fetch(spec, d):
pass
assert os.path.exists(os.path.join(d, 'test'))