From 866dd4f800973861118f901a8f42a5418c69b919 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Sun, 18 Feb 2018 13:41:22 +0100 Subject: [PATCH] Start using content providers --- repo2docker/app.py | 64 ++++++++++++++---------- repo2docker/contentproviders/__init__.py | 2 + repo2docker/contentproviders/base.py | 52 ++++++++++++++----- repo2docker/contentproviders/git.py | 56 ++++++++++++++------- 4 files changed, 117 insertions(+), 57 deletions(-) diff --git a/repo2docker/app.py b/repo2docker/app.py index 6c6b1348..c1d3d1fa 100644 --- a/repo2docker/app.py +++ b/repo2docker/app.py @@ -30,12 +30,12 @@ from traitlets.config import Application from . import __version__ from .buildpacks import ( PythonBuildPack, DockerBuildPack, LegacyBinderDockerBuildPack, - CondaBuildPack, JuliaBuildPack, BaseImage, - RBuildPack + CondaBuildPack, JuliaBuildPack, RBuildPack ) +from . import contentproviders from .utils import ( - execute_cmd, ByteSpecification, maybe_cleanup, is_valid_docker_image_name, - validate_and_generate_port_mapping, check_ref + ByteSpecification, maybe_cleanup, is_valid_docker_image_name, + validate_and_generate_port_mapping ) @@ -95,6 +95,23 @@ class Repo2Docker(Application): """ ) + # Git is our content provider of last resort. This is to maintain the + # old behaviour when git and local directories were the only supported + # content providers. We can detect local directories from the path, but + # detecting if something will successfully `git clone` is very hard if all + # you can do is look at the path/URL to it. + content_providers = List( + [ + contentproviders.Local, + contentproviders.Git, + ], + config=True, + help=""" + Ordered list of ContentProviders to try in turn to fetch the contents + specified by the user. + """ + ) + build_memory_limit = ByteSpecification( 0, help=""" @@ -176,29 +193,24 @@ class Repo2Docker(Application): def fetch(self, url, ref, checkout_path): """Check out a repo using url and ref to the checkout_path location""" - try: - cmd = ['git', 'clone', '--recursive'] - if not ref: - cmd.extend(['--depth', '1']) - cmd.extend([url, checkout_path]) - for line in execute_cmd(cmd, capture=self.json_logs): - self.log.info(line, extra=dict(phase='fetching')) - except subprocess.CalledProcessError: - self.log.error('Failed to clone repository!', - extra=dict(phase='failed')) - sys.exit(1) + # Pick a content provider based on URL + picked_content_provider = None + for CP in self.content_providers: + cp = CP(self.log) + spec = cp.detect(url, ref=ref) + if spec is not None: + picked_content_provider = cp + self.log.info("Picked {cp} content " + "provider.\n".format(cp=cp.__class__.__name__)) + break - if ref: - hash = check_ref(ref, checkout_path) - if hash is None: - self.log.error('Failed to check out ref %s', ref, - extra=dict(phase='failed')) - sys.exit(1) - # If the hash is resolved above, we should be able to reset to it - for line in execute_cmd(['git', 'reset', '--hard', hash], - cwd=checkout_path, - capture=self.json_logs): - self.log.info(line, extra=dict(phase='fetching')) + if picked_content_provider is None: + self.log.error("No matching content provider found for " + "{url}.".format(url=url)) + + for log_line in picked_content_provider.fetch( + spec, checkout_path, yield_output=self.json_logs): + self.log.info(log_line, extra=dict(phase='fetching')) def validate_image_name(self, image_name): """ diff --git a/repo2docker/contentproviders/__init__.py b/repo2docker/contentproviders/__init__.py index e69de29b..cfe334b5 100644 --- a/repo2docker/contentproviders/__init__.py +++ b/repo2docker/contentproviders/__init__.py @@ -0,0 +1,2 @@ +from .git import Git +from .base import Local diff --git a/repo2docker/contentproviders/base.py b/repo2docker/contentproviders/base.py index dbbc1215..3e19fce4 100644 --- a/repo2docker/contentproviders/base.py +++ b/repo2docker/contentproviders/base.py @@ -1,26 +1,54 @@ """ Base classes for repo2docker ContentProviders -ContentProviders accept a `spec` of various kinds, and +ContentProviders accept a `spec` of various kinds, and provide the contents from the spec to a given output directory. """ +import os + + class ContentProviderException(Exception): - """Exception raised when a ContentProvider can not provide content - """ + """Exception raised when a ContentProvider can not provide content.""" pass + class ContentProvider: - kind = "" + def __init__(self, logger): + self.log = logger - def provide(self, spec, output_dir, yield_output=False): - """Provide the contents of given spec to output_dir + def detect(self, repo, ref=None, extra_args=None): + """Determine compatibility between source and this provider. - This is a generator, and so should be yielded from or iterated over. - - Arguments: - spec -- Dict / String specification understood by this ContentProvider - output_dir {string} -- Path to output directory (must already exist) - yield_output {bool} -- If True, return output line by line. If not, output just goes to stdout. + If the provider knows how to fetch this source it will return a + `spec` that can be passed to `fetch`. The arguments are the `repo` + string passed on the command-line, the value of the --ref parameter, + if provided and any provider specific arguments provided on the + command-line. + + If the provider does not know how to fetch this source it will return + `None`. """ raise NotImplementedError() + def fetch(self, spec, output_dir, yield_output=False): + """Provide the contents of given spec to output_dir + + This generator yields logging information if `yield_output=True`, + otherwise log output is printed to stdout. + + Arguments: + spec -- Dict specification understood by this ContentProvider + output_dir {string} -- Path to output directory (must already exist) + yield_output {bool} -- If True, return output line by line. If not, + output just goes to stdout. + """ + raise NotImplementedError() + + +class Local(ContentProvider): + def detect(self, source, ref=None, extra_args=None): + if os.path.exists(source): + return {'path': source} + + def fetch(self, spec, output_dir, yield_output=False): + pass diff --git a/repo2docker/contentproviders/git.py b/repo2docker/contentproviders/git.py index 3c6b1026..29652f8c 100644 --- a/repo2docker/contentproviders/git.py +++ b/repo2docker/contentproviders/git.py @@ -1,28 +1,46 @@ import subprocess +import sys from .base import ContentProvider, ContentProviderException -from ..utils import execute_cmd +from ..utils import execute_cmd, check_ref -class GitContentProvider(ContentProvider): - """Provides contents of a git repository (optionally at a given ref) - """ - kind = "git" - def provide(self, spec, output_dir, yield_output=False): - url = spec['url'] +class Git(ContentProvider): + """Provide contents of a remote git repository.""" + + def detect(self, source, ref=None, extra_args=None): + # Git is our content provider of last resort. This is to maintain the + # old behaviour when git and local directories were the only supported + # content providers. This means that this content provider will always + # match. The downside is that the call to `fetch()` later on might fail + return {'repo': source, 'ref': ref} + + def fetch(self, spec, output_dir, yield_output=False): + repo = spec['repo'] ref = spec.get('ref', None) + + # make a, possibly shallow, clone of the remote repository try: - for line in execute_cmd(['git', 'clone', url, output_dir], + cmd = ['git', 'clone', '--recursive'] + if ref is None: + cmd.extend(['--depth', '1']) + cmd.extend([repo, output_dir]) + for line in execute_cmd(cmd, capture=yield_output): + yield line + + except subprocess.CalledProcessError as e: + msg = "Failed to clone repository from {repo}.".format(repo=repo) + raise ContentProviderException(msg) from e + + # check out the specific ref given by the user + if ref is not None: + hash = check_ref(ref, output_dir) + if hash is None: + self.log.error('Failed to check out ref %s', ref, + extra=dict(phase='failed')) + sys.exit(1) + # If the hash is resolved above, we should be able to reset to it + for line in execute_cmd(['git', 'reset', '--hard', hash], + cwd=output_dir, capture=yield_output): yield line - except subprocess.CalledProcessError as e: - raise ContentProviderException("Failed to clone repository!") from e - - if ref: - try: - for line in execute_cmd(['git', 'reset', '--hard', ref], - cwd=output_dir, - capture=yield_output): - yield line - except subprocess.CalledProcessError: - raise ContentProviderException("Failed to checkout ref {}!".format(ref)) from e