kopia lustrzana https://github.com/jupyterhub/repo2docker
Merge pull request #242 from betatim/content-provider
[MRG] Add Content Providers Infrastructurepull/407/head
commit
4310a39e28
|
@ -51,7 +51,7 @@ env:
|
|||
- REPO_TYPE=r
|
||||
- REPO_TYPE=dockerfile
|
||||
- REPO_TYPE=external/*
|
||||
- REPO_TYPE=*.py
|
||||
- REPO_TYPE=**/*.py
|
||||
global:
|
||||
- secure: gX7IOkbjlvcDwIH24sOLhutINx6TZRwujEusMWh1dqgYG2D69qQai/mTrRXO9PGRrsvQwIBk4RcILKAiZnk5O2Z1hLoIHk/oU2mNUmE44dDm4Xf/VTTdeYhjeOTR9B+KJ9NVwPxuSEDSND3lD7yFfvCqNXykipEhBtTliLupjWVxxXnaz0aZTYHUPJwanxdUc06AphSPwZjtm1m3qMUU8v7UdTGGAdW3NlgkKw0Xx2x5W31fW676vskC/GNQAbcRociYipuhSFWV4lu+6d8XF2xVO97xtzf54tBQzt6RgVfAKtiqkEIYSzJQBBpkQ6SM6yg+fQoQpOo8jPU9ZBjvaoopUG9vn8HRS/OtQrDcG3kEFnFAnaes8Iqtidp1deTn27LIlfCTl7kTFOp8yaaNlIMHJTJKTEMRhfdDlBYx7qiH8e9d/z37lupzY2loLHeNHdMRS1uYsfacZsmrnu9vAdpQmP1LuHivBPZEvgerinADaJiekelWOIEn956pDrno/YgnzP0i9LEBYnbbunqT8oEzLintNt5CXGdhkiG60j38McKCIn4sD6jbMMwgsqVFdClCBersyorKhOs7P8at5vX4xf8fMiKPC8LZPzYVIQYzCjmwSOFQ+Rzmz5gSj+DRTANKfHpzZCKZEF6amBYMGE1O5osF8m6M10vtW9ToK+s=
|
||||
- secure: Cfhb0BUT54JjEZD8n44Jj+o1lt5p32Lfg7W/euTyZ61YylDx0+XEYTzfWcwxOzH9fLpWr6dDrBMGHA/FPqsWA5BkoGdiBJ1OOVy2tmDRButctobWM3SVwa+Rhh8bZWlK8yKT2S3n6CtK4mesmjzdbUShL7YnKOSl8LBaTT5Y5oT8Oxsq51pfg8fJUImim8H20t8H7emaEzZorF4OSGRtajcAgukt5YoAqTEVDq+bFRBHZalxkcRqLhsGe3CCWa28kjGTL4MPZpCI6/AXIXHzihfG3rGq40ZT8jZ9GPP3MBgkiJWtFiTC9h16G34b/JI/TD40zCmoW9/9oVjRK4UlLGCAv6bgzFhCRof2abhB9NTZDniNzkO0T15uHs3VLbLCPYB0xYyClAFxm2P6e8WPChyENKfTNh+803IKFFo4JaTjOnKzi89N72v5+bT6ghP932nmjJr1AO65xjw63CeDmaLoHDY73n11DibybWQgEeiNzJuSzbIHyqMPhW5XqeroEjKKstdPHtVfOViI9ywjEMy0HCPsspaVI7Aow0Iv8E4Ajvd32W7z0h0fSCx/i25hEOAo2vhBsmQKJA7IquB3N88M11L874h/8J+oc/osW1EB5z7Ukke5YCq94Qh3qImSIhJULXMMc1QjEqYsqhLXtiMG2HUge0Y5hwwnnbEIRMQ=
|
||||
|
|
|
@ -30,12 +30,12 @@ from traitlets.config import Application
|
|||
from . import __version__
|
||||
from .buildpacks import (
|
||||
PythonBuildPack, DockerBuildPack, LegacyBinderDockerBuildPack,
|
||||
CondaBuildPack, JuliaBuildPack, BaseImage,
|
||||
RBuildPack
|
||||
CondaBuildPack, JuliaBuildPack, RBuildPack
|
||||
)
|
||||
from . import contentproviders
|
||||
from .utils import (
|
||||
execute_cmd, ByteSpecification, maybe_cleanup, is_valid_docker_image_name,
|
||||
validate_and_generate_port_mapping, check_ref
|
||||
ByteSpecification, maybe_cleanup, is_valid_docker_image_name,
|
||||
validate_and_generate_port_mapping
|
||||
)
|
||||
|
||||
|
||||
|
@ -95,6 +95,23 @@ class Repo2Docker(Application):
|
|||
"""
|
||||
)
|
||||
|
||||
# Git is our content provider of last resort. This is to maintain the
|
||||
# old behaviour when git and local directories were the only supported
|
||||
# content providers. We can detect local directories from the path, but
|
||||
# detecting if something will successfully `git clone` is very hard if all
|
||||
# you can do is look at the path/URL to it.
|
||||
content_providers = List(
|
||||
[
|
||||
contentproviders.Local,
|
||||
contentproviders.Git,
|
||||
],
|
||||
config=True,
|
||||
help="""
|
||||
Ordered list by priority of ContentProviders to try in turn to fetch
|
||||
the contents specified by the user.
|
||||
"""
|
||||
)
|
||||
|
||||
build_memory_limit = ByteSpecification(
|
||||
0,
|
||||
help="""
|
||||
|
@ -175,30 +192,28 @@ class Repo2Docker(Application):
|
|||
)
|
||||
|
||||
def fetch(self, url, ref, checkout_path):
|
||||
"""Check out a repo using url and ref to the checkout_path location"""
|
||||
try:
|
||||
cmd = ['git', 'clone', '--recursive']
|
||||
if not ref:
|
||||
cmd.extend(['--depth', '1'])
|
||||
cmd.extend([url, checkout_path])
|
||||
for line in execute_cmd(cmd, capture=self.json_logs):
|
||||
self.log.info(line, extra=dict(phase='fetching'))
|
||||
except subprocess.CalledProcessError:
|
||||
self.log.error('Failed to clone repository!',
|
||||
extra=dict(phase='failed'))
|
||||
sys.exit(1)
|
||||
"""Check out a repo using url and ref to the checkout_path locationself.
|
||||
|
||||
if ref:
|
||||
hash = check_ref(ref, checkout_path)
|
||||
if hash is None:
|
||||
self.log.error('Failed to check out ref %s', ref,
|
||||
extra=dict(phase='failed'))
|
||||
sys.exit(1)
|
||||
# If the hash is resolved above, we should be able to reset to it
|
||||
for line in execute_cmd(['git', 'reset', '--hard', hash],
|
||||
cwd=checkout_path,
|
||||
capture=self.json_logs):
|
||||
self.log.info(line, extra=dict(phase='fetching'))
|
||||
Iterate through possible content providers until a valid provider,
|
||||
based on URL, is found.
|
||||
"""
|
||||
picked_content_provider = None
|
||||
for ContentProvider in self.content_providers:
|
||||
cp = ContentProvider()
|
||||
spec = cp.detect(url, ref=ref)
|
||||
if spec is not None:
|
||||
picked_content_provider = cp
|
||||
self.log.info("Picked {cp} content "
|
||||
"provider.\n".format(cp=cp.__class__.__name__))
|
||||
break
|
||||
|
||||
if picked_content_provider is None:
|
||||
self.log.error("No matching content provider found for "
|
||||
"{url}.".format(url=url))
|
||||
|
||||
for log_line in picked_content_provider.fetch(
|
||||
spec, checkout_path, yield_output=self.json_logs):
|
||||
self.log.info(log_line, extra=dict(phase='fetching'))
|
||||
|
||||
def validate_image_name(self, image_name):
|
||||
"""
|
||||
|
@ -409,20 +424,29 @@ class Repo2Docker(Application):
|
|||
if args.appendix:
|
||||
self.appendix = args.appendix
|
||||
|
||||
self.repo = args.repo
|
||||
self.ref = args.ref
|
||||
# if the source exists locally we don't want to delete it at the end
|
||||
if os.path.exists(args.repo):
|
||||
# Let's treat this as a local directory we are building
|
||||
self.repo_type = 'local'
|
||||
self.repo = args.repo
|
||||
self.ref = None
|
||||
self.cleanup_checkout = False
|
||||
if args.editable:
|
||||
self.volumes[os.path.abspath(args.repo)] = '.'
|
||||
else:
|
||||
self.repo_type = 'remote'
|
||||
self.repo = args.repo
|
||||
self.ref = args.ref
|
||||
self.cleanup_checkout = args.clean
|
||||
|
||||
# user wants to mount a local directory into the container for
|
||||
# editing
|
||||
if args.editable:
|
||||
# the user has to point at a directory, not just a path for us
|
||||
# to be able to mount it. We might have content providers that can
|
||||
# provide content from a local `something.zip` file, which we
|
||||
# couldn't mount in editable mode
|
||||
if os.path.isdir(args.repo):
|
||||
self.volumes[os.path.abspath(args.repo)] = '.'
|
||||
else:
|
||||
self.log.error('Can not mount "{}" in editable mode '
|
||||
'as it is not a directory'.format(args.repo),
|
||||
extra=dict(phase='failed'))
|
||||
sys.exit(1)
|
||||
|
||||
if args.json_logs:
|
||||
# register JSON excepthook to avoid non-JSON output on errors
|
||||
sys.excepthook = self.json_excepthook
|
||||
|
@ -661,7 +685,12 @@ class Repo2Docker(Application):
|
|||
raise e
|
||||
sys.exit(1)
|
||||
|
||||
if self.repo_type == 'local':
|
||||
# If the source to be executed is a directory, continue using the
|
||||
# directory. In the case of a local directory, it is used as both the
|
||||
# source and target. Reusing a local directory seems better than
|
||||
# making a copy of it as it might contain large files that would be
|
||||
# expensive to copy.
|
||||
if os.path.isdir(self.repo):
|
||||
checkout_path = self.repo
|
||||
else:
|
||||
if self.git_workdir is None:
|
||||
|
@ -672,8 +701,7 @@ class Repo2Docker(Application):
|
|||
# keep as much as possible in the context manager to make sure we
|
||||
# cleanup if things go wrong
|
||||
with maybe_cleanup(checkout_path, self.cleanup_checkout):
|
||||
if self.repo_type == 'remote':
|
||||
self.fetch(self.repo, self.ref, checkout_path)
|
||||
self.fetch(self.repo, self.ref, checkout_path)
|
||||
|
||||
if self.subdir:
|
||||
checkout_path = os.path.join(checkout_path, self.subdir).rstrip('/')
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
from .git import Git
|
||||
from .base import Local
|
|
@ -0,0 +1,59 @@
|
|||
"""
|
||||
Base classes for repo2docker ContentProviders
|
||||
|
||||
ContentProviders accept a `spec` of various kinds, and
|
||||
provide the contents from the spec to a given output directory.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
|
||||
|
||||
class ContentProviderException(Exception):
|
||||
"""Exception raised when a ContentProvider can not provide content."""
|
||||
pass
|
||||
|
||||
|
||||
class ContentProvider:
|
||||
def __init__(self):
|
||||
self.log = logging.getLogger("repo2docker")
|
||||
|
||||
def detect(self, repo, ref=None, extra_args=None):
|
||||
"""Determine compatibility between source and this provider.
|
||||
|
||||
If the provider knows how to fetch this source it will return a
|
||||
`spec` that can be passed to `fetch`. The arguments are the `repo`
|
||||
string passed on the command-line, the value of the --ref parameter,
|
||||
if provided and any provider specific arguments provided on the
|
||||
command-line.
|
||||
|
||||
If the provider does not know how to fetch this source it will return
|
||||
`None`.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def fetch(self, spec, output_dir, yield_output=False):
|
||||
"""Provide the contents of given spec to output_dir
|
||||
|
||||
This generator yields logging information if `yield_output=True`,
|
||||
otherwise log output is printed to stdout.
|
||||
|
||||
Arguments:
|
||||
spec -- Dict specification understood by this ContentProvider
|
||||
output_dir {string} -- Path to output directory (must already exist)
|
||||
yield_output {bool} -- If True, return output line by line. If not,
|
||||
output just goes to stdout.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class Local(ContentProvider):
|
||||
def detect(self, source, ref=None, extra_args=None):
|
||||
if os.path.isdir(source):
|
||||
return {'path': source}
|
||||
|
||||
def fetch(self, spec, output_dir, yield_output=False):
|
||||
# nothing to be done if your content is already in the output directory
|
||||
msg = "Local content provider assumes {} == {}".format(spec['path'],
|
||||
output_dir)
|
||||
assert output_dir == spec['path'], msg
|
||||
yield
|
|
@ -0,0 +1,46 @@
|
|||
import subprocess
|
||||
import sys
|
||||
|
||||
from .base import ContentProvider, ContentProviderException
|
||||
from ..utils import execute_cmd, check_ref
|
||||
|
||||
|
||||
class Git(ContentProvider):
|
||||
"""Provide contents of a remote git repository."""
|
||||
|
||||
def detect(self, source, ref=None, extra_args=None):
|
||||
# Git is our content provider of last resort. This is to maintain the
|
||||
# old behaviour when git and local directories were the only supported
|
||||
# content providers. This means that this content provider will always
|
||||
# match. The downside is that the call to `fetch()` later on might fail
|
||||
return {'repo': source, 'ref': ref}
|
||||
|
||||
def fetch(self, spec, output_dir, yield_output=False):
|
||||
repo = spec['repo']
|
||||
ref = spec.get('ref', None)
|
||||
|
||||
# make a, possibly shallow, clone of the remote repository
|
||||
try:
|
||||
cmd = ['git', 'clone', '--recursive']
|
||||
if ref is None:
|
||||
cmd.extend(['--depth', '1'])
|
||||
cmd.extend([repo, output_dir])
|
||||
for line in execute_cmd(cmd, capture=yield_output):
|
||||
yield line
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
msg = "Failed to clone repository from {repo}.".format(repo=repo)
|
||||
raise ContentProviderException(msg) from e
|
||||
|
||||
# check out the specific ref given by the user
|
||||
if ref is not None:
|
||||
hash = check_ref(ref, output_dir)
|
||||
if hash is None:
|
||||
self.log.error('Failed to check out ref %s', ref,
|
||||
extra=dict(phase='failed'))
|
||||
sys.exit(1)
|
||||
# If the hash is resolved above, we should be able to reset to it
|
||||
for line in execute_cmd(['git', 'reset', '--hard', hash],
|
||||
cwd=output_dir,
|
||||
capture=yield_output):
|
||||
yield line
|
|
@ -0,0 +1,45 @@
|
|||
from contextlib import contextmanager
|
||||
import os
|
||||
import subprocess
|
||||
from tempfile import TemporaryDirectory
|
||||
from repo2docker.contentproviders import Git
|
||||
|
||||
|
||||
@contextmanager
|
||||
def git_repo():
|
||||
"""
|
||||
Makes a dummy git repo in which user can perform git operations
|
||||
|
||||
Should be used as a contextmanager, it will delete directory when done
|
||||
"""
|
||||
|
||||
with TemporaryDirectory() as gitdir:
|
||||
subprocess.check_call(['git', 'init'], cwd=gitdir)
|
||||
yield gitdir
|
||||
|
||||
|
||||
def test_clone():
|
||||
"""Test simple git clone to a target dir"""
|
||||
with git_repo() as upstream:
|
||||
with open(os.path.join(upstream, 'test'), 'w') as f:
|
||||
f.write("Hello")
|
||||
|
||||
subprocess.check_call(['git', 'add', 'test'], cwd=upstream)
|
||||
subprocess.check_call(['git', 'commit', '-m', 'Test commit'],
|
||||
cwd=upstream)
|
||||
|
||||
with TemporaryDirectory() as clone_dir:
|
||||
spec = {'repo': upstream}
|
||||
for _ in Git().fetch(spec, clone_dir):
|
||||
pass
|
||||
assert os.path.exists(os.path.join(clone_dir, 'test'))
|
||||
|
||||
|
||||
def test_always_accept():
|
||||
# The git content provider should always accept a spec
|
||||
assert Git().detect('/tmp/doesnt-exist', ref='1234')
|
||||
assert Git().detect('/tmp/doesnt-exist')
|
||||
# a path that exists
|
||||
assert Git().detect('/etc', ref='1234')
|
||||
# a remote URL
|
||||
assert Git().detect('https://example.com/path/here')
|
|
@ -0,0 +1,37 @@
|
|||
import os
|
||||
from tempfile import TemporaryDirectory, NamedTemporaryFile
|
||||
|
||||
from repo2docker.contentproviders import Local
|
||||
|
||||
|
||||
def test_detect_local_dir():
|
||||
with TemporaryDirectory() as d:
|
||||
local = Local()
|
||||
spec = local.detect(d)
|
||||
|
||||
# should accept a local directory
|
||||
assert spec is not None, spec
|
||||
assert 'path' in spec, spec
|
||||
assert spec['path'] == d
|
||||
|
||||
|
||||
def test_not_detect_local_file():
|
||||
with NamedTemporaryFile() as f:
|
||||
local = Local()
|
||||
spec = local.detect(f.name)
|
||||
|
||||
# should NOT accept a local file
|
||||
assert spec is None, spec
|
||||
|
||||
|
||||
def test_content_available():
|
||||
# create a directory with files, check they are available in the output
|
||||
# directory
|
||||
with TemporaryDirectory() as d:
|
||||
with open(os.path.join(d, 'test'), 'w') as f:
|
||||
f.write("Hello")
|
||||
|
||||
spec = {'path': d}
|
||||
for _ in Local().fetch(spec, d):
|
||||
pass
|
||||
assert os.path.exists(os.path.join(d, 'test'))
|
Ładowanie…
Reference in New Issue