From c19d0370362c63ac013ab8168f104266e958ca17 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Wed, 25 Oct 2017 08:26:47 +0200 Subject: [PATCH] Add check for ref and deep clone If the ref does not exist in the shallow clone, create a full clone of the repository and retry. --- repo2docker/app.py | 60 +++++++++++++++---- .../external/datasci-shallow-clone.repos.yaml | 6 ++ 2 files changed, 55 insertions(+), 11 deletions(-) create mode 100644 tests/external/datasci-shallow-clone.repos.yaml diff --git a/repo2docker/app.py b/repo2docker/app.py index c7f36604..f95afac7 100644 --- a/repo2docker/app.py +++ b/repo2docker/app.py @@ -91,24 +91,62 @@ class Repo2Docker(Application): ) def fetch(self, url, ref, checkout_path): - try: - for line in execute_cmd(['git', 'clone', '--depth', '50', - url, checkout_path], - capture=self.json_logs): - self.log.info(line, extra=dict(phase='fetching')) - except subprocess.CalledProcessError: - self.log.error('Failed to clone repository!', extra=dict(phase='failed')) - sys.exit(1) + def _clone(depth=None): + if depth is not None: + command = ['git', 'clone', '--depth', str(depth), + url, checkout_path] + else: + command = ['git', 'clone', url, checkout_path] - if ref: try: - for line in execute_cmd(['git', 'reset', '--hard', ref], cwd=checkout_path, + for line in execute_cmd(command, capture=self.json_logs): + self.log.info(line, extra=dict(phase='fetching')) + except subprocess.CalledProcessError: + self.log.error('Failed to clone repository!', + extra=dict(phase='failed')) + sys.exit(1) + + def _unshallow(): + try: + for line in execute_cmd(['git', 'fetch', '--unshallow'], + capture=self.json_logs, + cwd=checkout_path): + self.log.info(line, extra=dict(phase='fetching')) + except subprocess.CalledProcessError: + self.log.error('Failed to unshallow repository!', + extra=dict(phase='failed')) + sys.exit(1) + + def _contains(ref): + try: + for line in execute_cmd(['git', 'cat-file', '-t', ref], + capture=self.json_logs, + cwd=checkout_path): + self.log.debug(line, extra=dict(phase='fetching')) + except subprocess.CalledProcessError: + return False + + return True + + def _checkout(ref): + try: + for line in execute_cmd(['git', 'reset', '--hard', ref], + cwd=checkout_path, capture=self.json_logs): self.log.info(line, extra=dict(phase='fetching')) except subprocess.CalledProcessError: - self.log.error('Failed to check out ref %s', ref, extra=dict(phase='failed')) + self.log.error('Failed to check out ref %s', ref, + extra=dict(phase='failed')) sys.exit(1) + # create a shallow clone first + _clone(depth=50) + if ref: + if not _contains(ref): + # have to create a full clone + _unshallow() + _checkout(ref) + def get_argparser(self): argparser = argparse.ArgumentParser() argparser.add_argument( diff --git a/tests/external/datasci-shallow-clone.repos.yaml b/tests/external/datasci-shallow-clone.repos.yaml new file mode 100644 index 00000000..36724be0 --- /dev/null +++ b/tests/external/datasci-shallow-clone.repos.yaml @@ -0,0 +1,6 @@ +# Check that we correctly detect that this ref is more than 50 commits ago +# and trigger a full clone of the repository +- name: Jake's Data Science Book + url: https://github.com/jakevdp/PythonDataScienceHandbook + ref: 8761de29a853f0c187286b7c7bc1e4767e7c5574 + verify: python -c 'import matplotlib'