From dce6c1e8d731b4846d722ef0701745321c3a694c Mon Sep 17 00:00:00 2001 From: Tim Head Date: Mon, 27 May 2019 17:32:03 +0200 Subject: [PATCH 1/9] Add basic Zenodo content provider --- repo2docker/app.py | 1 + repo2docker/contentproviders/__init__.py | 1 + repo2docker/contentproviders/zenodo.py | 73 +++++++++++++++++++ repo2docker/utils.py | 90 ++++++++++++++++++++++++ 4 files changed, 165 insertions(+) create mode 100644 repo2docker/contentproviders/zenodo.py diff --git a/repo2docker/app.py b/repo2docker/app.py index 254c8d5d..4b02a628 100644 --- a/repo2docker/app.py +++ b/repo2docker/app.py @@ -136,6 +136,7 @@ class Repo2Docker(Application): content_providers = List( [ contentproviders.Local, + contentproviders.Zenodo, contentproviders.Git, ], config=True, diff --git a/repo2docker/contentproviders/__init__.py b/repo2docker/contentproviders/__init__.py index cfe334b5..d648731d 100644 --- a/repo2docker/contentproviders/__init__.py +++ b/repo2docker/contentproviders/__init__.py @@ -1,2 +1,3 @@ from .git import Git from .base import Local +from .zenodo import Zenodo diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py new file mode 100644 index 00000000..3ad06a02 --- /dev/null +++ b/repo2docker/contentproviders/zenodo.py @@ -0,0 +1,73 @@ +import json +import shutil + +from os import makedirs +from os import path +from urllib.request import urlopen, Request +from zipfile import ZipFile, is_zipfile + +from .base import ContentProvider +from ..utils import copytree + + +class Zenodo(ContentProvider): + """Provide contents of a Zenodo deposit.""" + + def detect(self, doi, ref=None, extra_args=None): + # 10.5281 is the Zenodo DOI prefix + if doi.startswith('10.5281'): + resp = urlopen("https://doi.org/{}".format(doi)) + self.record_id = resp.url.rsplit("/", maxsplit=1)[1] + return {'record': self.record_id} + + def fetch(self, spec, output_dir, yield_output=False): + record_id = spec['record'] + + yield "Fetching Zenodo record {}.\n".format(record_id) + req = Request("https://zenodo.org/api/records/{}".format(record_id), + headers={"accept": "application/json"}) + resp = urlopen(req) + + record = json.loads(resp.read().decode("utf-8")) + + def _fetch(file_ref, unzip=False): + with urlopen(file_ref["links"]["download"]) as src: + fname = file_ref["filename"] + sub_dir = path.join(output_dir, path.dirname(fname)) + if not path.exists(sub_dir): + print("Creating", sub_dir) + makedirs(sub_dir, exist_ok=True) + + dst_fname = path.join(output_dir, fname) + with open(dst_fname, "wb") as dst: + yield "Fetching {}\n".format(fname) + shutil.copyfileobj(src, dst) + + # first close the newly written file, then continue + # processing it + if unzip and is_zipfile(dst_fname): + zfile = ZipFile(dst_fname) + zfile.extractall(path=output_dir) + zfile.close() + import os + d = os.listdir(output_dir)[0] + print(output_dir) + print(os.listdir(output_dir)) + copytree(path.join(output_dir, d), output_dir) + shutil.rmtree(sub_dir) + shutil.rmtree(path.join(output_dir, d)) + + is_software = record["metadata"]["upload_type"] == "software" + only_one_file = len(record["files"]) == 1 + for file_ref in record['files']: + for line in _fetch(file_ref, unzip=is_software and only_one_file): + yield line + + import pdb; pdb.set_trace() + + @property + def content_id(self): + """A unique ID to represent the version of the content. + Uses the first seven characters of the git commit ID of the repository. + """ + return self.record_id diff --git a/repo2docker/utils.py b/repo2docker/utils.py index 22c19068..d703d612 100644 --- a/repo2docker/utils.py +++ b/repo2docker/utils.py @@ -4,6 +4,8 @@ import os import re import subprocess +from shutil import copystat, copy2 + from traitlets import Integer, TraitError @@ -287,3 +289,91 @@ def check_ref(ref, cwd=None): # We'll throw an error later if no refs resolve pass return hash + + +class Error(OSError): + pass + + +# a copy of shutil.copytree() that is ok with the target directory +# already existing +def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, + ignore_dangling_symlinks=False): + """Recursively copy a directory tree. + The destination directory must not already exist. + If exception(s) occur, an Error is raised with a list of reasons. + If the optional symlinks flag is true, symbolic links in the + source tree result in symbolic links in the destination tree; if + it is false, the contents of the files pointed to by symbolic + links are copied. If the file pointed by the symlink doesn't + exist, an exception will be added in the list of errors raised in + an Error exception at the end of the copy process. + You can set the optional ignore_dangling_symlinks flag to true if you + want to silence this exception. Notice that this has no effect on + platforms that don't support os.symlink. + The optional ignore argument is a callable. If given, it + is called with the `src` parameter, which is the directory + being visited by copytree(), and `names` which is the list of + `src` contents, as returned by os.listdir(): + callable(src, names) -> ignored_names + Since copytree() is called recursively, the callable will be + called once for each directory that is copied. It returns a + list of names relative to the `src` directory that should + not be copied. + The optional copy_function argument is a callable that will be used + to copy each file. It will be called with the source path and the + destination path as arguments. By default, copy2() is used, but any + function that supports the same signature (like copy()) can be used. + """ + names = os.listdir(src) + if ignore is not None: + ignored_names = ignore(src, names) + else: + ignored_names = set() + + os.makedirs(dst, exist_ok=True) + errors = [] + for name in names: + if name in ignored_names: + continue + srcname = os.path.join(src, name) + dstname = os.path.join(dst, name) + try: + if os.path.islink(srcname): + linkto = os.readlink(srcname) + if symlinks: + # We can't just leave it to `copy_function` because legacy + # code with a custom `copy_function` may rely on copytree + # doing the right thing. + os.symlink(linkto, dstname) + copystat(srcname, dstname, follow_symlinks=not symlinks) + else: + # ignore dangling symlink if the flag is on + if not os.path.exists(linkto) and ignore_dangling_symlinks: + continue + # otherwise let the copy occurs. copy2 will raise an error + if os.path.isdir(srcname): + copytree(srcname, dstname, symlinks, ignore, + copy_function) + else: + copy_function(srcname, dstname) + elif os.path.isdir(srcname): + copytree(srcname, dstname, symlinks, ignore, copy_function) + else: + # Will raise a SpecialFileError for unsupported file types + copy_function(srcname, dstname) + # catch the Error from the recursive copytree so that we can + # continue with other files + except Error as err: + errors.extend(err.args[0]) + except OSError as why: + errors.append((srcname, dstname, str(why))) + try: + copystat(src, dst) + except OSError as why: + # Copying file access times may fail on Windows + if getattr(why, 'winerror', None) is None: + errors.append((src, dst, str(why))) + if errors: + raise Error(errors) + return dst From 2ed3818f997802567cac65f0fd446f7fa0fd0668 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Mon, 27 May 2019 19:53:20 +0200 Subject: [PATCH 2/9] Better handling of software archives Unpack a single ZIP file, then move it to the root of the directory --- repo2docker/contentproviders/zenodo.py | 38 ++++++++++++++++---------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py index 3ad06a02..07db4a22 100644 --- a/repo2docker/contentproviders/zenodo.py +++ b/repo2docker/contentproviders/zenodo.py @@ -1,3 +1,4 @@ +import os import json import shutil @@ -31,31 +32,42 @@ class Zenodo(ContentProvider): record = json.loads(resp.read().decode("utf-8")) def _fetch(file_ref, unzip=False): + # the assumption is that `unzip=True` means that this is the only + # file related to the zenodo record with urlopen(file_ref["links"]["download"]) as src: fname = file_ref["filename"] - sub_dir = path.join(output_dir, path.dirname(fname)) - if not path.exists(sub_dir): - print("Creating", sub_dir) - makedirs(sub_dir, exist_ok=True) + if path.dirname(fname): + sub_dir = path.join(output_dir, path.dirname(fname)) + if not path.exists(sub_dir): + yield 'Creating {}\n'.format(sub_dir) + makedirs(sub_dir, exist_ok=True) dst_fname = path.join(output_dir, fname) with open(dst_fname, "wb") as dst: yield "Fetching {}\n".format(fname) shutil.copyfileobj(src, dst) - # first close the newly written file, then continue # processing it if unzip and is_zipfile(dst_fname): + yield "Extracting {}\n".format(fname) zfile = ZipFile(dst_fname) zfile.extractall(path=output_dir) zfile.close() - import os - d = os.listdir(output_dir)[0] - print(output_dir) - print(os.listdir(output_dir)) - copytree(path.join(output_dir, d), output_dir) - shutil.rmtree(sub_dir) - shutil.rmtree(path.join(output_dir, d)) + + # delete downloaded file ... + os.remove(dst_fname) + # ... and any directories we might have created, + # in which case sub_dir will be defined + if path.dirname(fname): + shutil.rmtree(sub_dir) + + new_subdirs = os.listdir(output_dir) + # if there is only one new subdirectory move its contents + # to the top level directory + if len(new_subdirs) == 1: + d = new_subdirs[0] + copytree(path.join(output_dir, d), output_dir) + shutil.rmtree(path.join(output_dir, d)) is_software = record["metadata"]["upload_type"] == "software" only_one_file = len(record["files"]) == 1 @@ -63,8 +75,6 @@ class Zenodo(ContentProvider): for line in _fetch(file_ref, unzip=is_software and only_one_file): yield line - import pdb; pdb.set_trace() - @property def content_id(self): """A unique ID to represent the version of the content. From ec8659e62f3efaccae620e93bd1dc1324d95138a Mon Sep 17 00:00:00 2001 From: Tim Head Date: Tue, 28 May 2019 19:10:32 +0200 Subject: [PATCH 3/9] Add tests for Zenodo content provider --- repo2docker/contentproviders/zenodo.py | 30 +++- tests/unit/contentproviders/test_zenodo.py | 159 +++++++++++++++++++++ 2 files changed, 182 insertions(+), 7 deletions(-) create mode 100644 tests/unit/contentproviders/test_zenodo.py diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py index 07db4a22..84fd88a6 100644 --- a/repo2docker/contentproviders/zenodo.py +++ b/repo2docker/contentproviders/zenodo.py @@ -15,18 +15,34 @@ class Zenodo(ContentProvider): """Provide contents of a Zenodo deposit.""" def detect(self, doi, ref=None, extra_args=None): + doi = doi.lower() # 10.5281 is the Zenodo DOI prefix - if doi.startswith('10.5281'): + if doi.startswith("10.5281/"): resp = urlopen("https://doi.org/{}".format(doi)) self.record_id = resp.url.rsplit("/", maxsplit=1)[1] - return {'record': self.record_id} + return {"record": self.record_id} + + elif doi.startswith("https://doi.org/10.5281/") or doi.startswith( + "http://doi.org/10.5281/" + ): + resp = urlopen(doi) + self.record_id = resp.url.rsplit("/", maxsplit=1)[1] + return {"record": self.record_id} + + elif doi.startswith("https://zenodo.org/record/") or doi.startswith( + "http://zenodo.org/record/" + ): + self.record_id = doi.rsplit("/", maxsplit=1)[1] + return {"record": self.record_id} def fetch(self, spec, output_dir, yield_output=False): - record_id = spec['record'] + record_id = spec["record"] yield "Fetching Zenodo record {}.\n".format(record_id) - req = Request("https://zenodo.org/api/records/{}".format(record_id), - headers={"accept": "application/json"}) + req = Request( + "https://zenodo.org/api/records/{}".format(record_id), + headers={"accept": "application/json"}, + ) resp = urlopen(req) record = json.loads(resp.read().decode("utf-8")) @@ -39,7 +55,7 @@ class Zenodo(ContentProvider): if path.dirname(fname): sub_dir = path.join(output_dir, path.dirname(fname)) if not path.exists(sub_dir): - yield 'Creating {}\n'.format(sub_dir) + yield "Creating {}\n".format(sub_dir) makedirs(sub_dir, exist_ok=True) dst_fname = path.join(output_dir, fname) @@ -71,7 +87,7 @@ class Zenodo(ContentProvider): is_software = record["metadata"]["upload_type"] == "software" only_one_file = len(record["files"]) == 1 - for file_ref in record['files']: + for file_ref in record["files"]: for line in _fetch(file_ref, unzip=is_software and only_one_file): yield line diff --git a/tests/unit/contentproviders/test_zenodo.py b/tests/unit/contentproviders/test_zenodo.py new file mode 100644 index 00000000..55e32895 --- /dev/null +++ b/tests/unit/contentproviders/test_zenodo.py @@ -0,0 +1,159 @@ +import json +import os + +from contextlib import contextmanager +from io import BytesIO +from tempfile import TemporaryDirectory, NamedTemporaryFile +from unittest.mock import patch +from urllib.request import urlopen, Request +from zipfile import ZipFile + +from repo2docker.contentproviders import Zenodo + + +def test_content_id(): + zen = Zenodo() + + zen.detect("10.5281/zenodo.3232985") + assert zen.content_id == "3232985" + + +def test_detect(): + # valid Zenodo DOIs trigger this content provider + assert Zenodo().detect("10.5281/zenodo.3232985") == {"record": "3232985"} + assert Zenodo().detect("https://doi.org/10.5281/zenodo.3232985") == {"record": "3232985"} + assert Zenodo().detect("https://zenodo.org/record/3232985") == {"record": "3232985"} + + # Don't trigger the Zenodo content provider + assert Zenodo().detect("/some/path/here") is None + assert Zenodo().detect("https://example.com/path/here") is None + # donn't handle DOIs that aren't from Zenodo + assert Zenodo().detect("https://doi.org/10.21105/joss.01277") is None + + +@contextmanager +def zenodo_archive(prefix="a_directory"): + with NamedTemporaryFile(suffix=".zip") as zfile: + with ZipFile(zfile.name, mode="w") as zip: + zip.writestr("{}/some-file.txt".format(prefix), "some content") + zip.writestr("{}/some-other-file.txt".format(prefix), "some more content") + + yield zfile.name + + +def test_fetch_software_from_github_archive(): + # we "fetch" a local ZIP file to simulate a Zenodo record created from a + # GitHub repository via the Zenodo-GitHub integration + with zenodo_archive() as zen_path: + mock_response = BytesIO( + json.dumps( + { + "files": [ + { + "filename": "some_dir/afake.zip", + "links": {"download": "file://{}".format(zen_path)}, + } + ], + "metadata": {"upload_type": "software"}, + } + ).encode("utf-8") + ) + + def mock_urlopen(req_or_path): + if isinstance(req_or_path, Request): + return mock_response + else: + return urlopen(req_or_path) + + with patch("repo2docker.contentproviders.zenodo.urlopen", new=mock_urlopen): + with TemporaryDirectory() as d: + zen = Zenodo() + + output = [] + for l in zen.fetch({"record": "1234"}, d): + output.append(l) + + unpacked_files = os.listdir(d) + expected = ["some-other-file.txt", "some-file.txt"] + assert expected == unpacked_files + + +def test_fetch_software(): + # we "fetch" a local ZIP file to simulate a Zenodo software record with a + # ZIP file in it + with zenodo_archive() as zen_path: + mock_response = BytesIO( + json.dumps( + { + "files": [ + { + # this is the difference to the GitHub generated one, + # the ZIP file isn't in a directory + "filename": "afake.zip", + "links": {"download": "file://{}".format(zen_path)}, + } + ], + "metadata": {"upload_type": "software"}, + } + ).encode("utf-8") + ) + + def mock_urlopen(req_or_path): + if isinstance(req_or_path, Request): + return mock_response + else: + return urlopen(req_or_path) + + with patch("repo2docker.contentproviders.zenodo.urlopen", new=mock_urlopen): + with TemporaryDirectory() as d: + zen = Zenodo() + + output = [] + for l in zen.fetch({"record": "1234"}, d): + output.append(l) + + unpacked_files = os.listdir(d) + expected = ["some-other-file.txt", "some-file.txt"] + assert expected == unpacked_files + + +def test_fetch_data(): + # we "fetch" a local ZIP file to simulate a Zenodo data record + with zenodo_archive() as a_zen_path: + with zenodo_archive() as b_zen_path: + mock_response = BytesIO( + json.dumps( + { + "files": [ + { + "filename": "afake.zip", + "links": {"download": "file://{}".format(a_zen_path)}, + }, + { + "filename": "bfake.zip", + "links": {"download": "file://{}".format(b_zen_path)}, + } + ], + "metadata": {"upload_type": "data"}, + } + ).encode("utf-8") + ) + + def mock_urlopen(req_or_path): + if isinstance(req_or_path, Request): + return mock_response + else: + return urlopen(req_or_path) + + with patch("repo2docker.contentproviders.zenodo.urlopen", new=mock_urlopen): + with TemporaryDirectory() as d: + zen = Zenodo() + + output = [] + for l in zen.fetch({"record": "1234"}, d): + output.append(l) + + unpacked_files = os.listdir(d) + # ZIP files shouldn't have been unpacked + expected = ['bfake.zip', 'afake.zip'] + assert expected == unpacked_files From 9bda1152fbf9af9aab7e3c0b0fe2bb7650a35970 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Tue, 28 May 2019 19:28:05 +0200 Subject: [PATCH 4/9] Update docstrings in Zenodo provider --- repo2docker/contentproviders/zenodo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py index 84fd88a6..c3bbde6a 100644 --- a/repo2docker/contentproviders/zenodo.py +++ b/repo2docker/contentproviders/zenodo.py @@ -15,6 +15,7 @@ class Zenodo(ContentProvider): """Provide contents of a Zenodo deposit.""" def detect(self, doi, ref=None, extra_args=None): + """Trigger this provider for things that resolve to a Zenodo record""" doi = doi.lower() # 10.5281 is the Zenodo DOI prefix if doi.startswith("10.5281/"): @@ -36,6 +37,7 @@ class Zenodo(ContentProvider): return {"record": self.record_id} def fetch(self, spec, output_dir, yield_output=False): + """Fetch and unpack a Zenodo record""" record_id = spec["record"] yield "Fetching Zenodo record {}.\n".format(record_id) @@ -93,7 +95,5 @@ class Zenodo(ContentProvider): @property def content_id(self): - """A unique ID to represent the version of the content. - Uses the first seven characters of the git commit ID of the repository. - """ + """The Zenodo record ID as the content of a record is immutable""" return self.record_id From 8fe9c718263b372f26084eb673714e315f033392 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Wed, 29 May 2019 07:00:10 +0200 Subject: [PATCH 5/9] Mock urlopen calls during Zenodo tests --- tests/unit/contentproviders/test_zenodo.py | 40 ++++++++++++++-------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/tests/unit/contentproviders/test_zenodo.py b/tests/unit/contentproviders/test_zenodo.py index 55e32895..cc7d446f 100644 --- a/tests/unit/contentproviders/test_zenodo.py +++ b/tests/unit/contentproviders/test_zenodo.py @@ -19,16 +19,26 @@ def test_content_id(): def test_detect(): - # valid Zenodo DOIs trigger this content provider - assert Zenodo().detect("10.5281/zenodo.3232985") == {"record": "3232985"} - assert Zenodo().detect("https://doi.org/10.5281/zenodo.3232985") == {"record": "3232985"} - assert Zenodo().detect("https://zenodo.org/record/3232985") == {"record": "3232985"} + with patch("repo2docker.contentproviders.zenodo.urlopen") as fake_urlopen: + fake_urlopen.return_value.url = "https://zenodo.org/record/3232985" + # valid Zenodo DOIs trigger this content provider + assert Zenodo().detect("10.5281/zenodo.3232985") == {"record": "3232985"} + assert Zenodo().detect("https://doi.org/10.5281/zenodo.3232985") == {"record": "3232985"} + assert Zenodo().detect("https://zenodo.org/record/3232985") == {"record": "3232985"} - # Don't trigger the Zenodo content provider - assert Zenodo().detect("/some/path/here") is None - assert Zenodo().detect("https://example.com/path/here") is None - # donn't handle DOIs that aren't from Zenodo - assert Zenodo().detect("https://doi.org/10.21105/joss.01277") is None + # only two of the three calls above have to resolve a DOI + assert fake_urlopen.call_count == 2 + + with patch("repo2docker.contentproviders.zenodo.urlopen") as fake_urlopen: + # Don't trigger the Zenodo content provider + assert Zenodo().detect("/some/path/here") is None + assert Zenodo().detect("https://example.com/path/here") is None + # donn't handle DOIs that aren't from Zenodo + assert Zenodo().detect("https://doi.org/10.21105/joss.01277") is None + + # none of the examples are Zenodo like, so we should not attempt to + # resolve a DOI either + assert not fake_urlopen.called @contextmanager @@ -73,8 +83,8 @@ def test_fetch_software_from_github_archive(): for l in zen.fetch({"record": "1234"}, d): output.append(l) - unpacked_files = os.listdir(d) - expected = ["some-other-file.txt", "some-file.txt"] + unpacked_files = set(os.listdir(d)) + expected = set(["some-other-file.txt", "some-file.txt"]) assert expected == unpacked_files @@ -112,8 +122,8 @@ def test_fetch_software(): for l in zen.fetch({"record": "1234"}, d): output.append(l) - unpacked_files = os.listdir(d) - expected = ["some-other-file.txt", "some-file.txt"] + unpacked_files = set(os.listdir(d)) + expected = set(["some-other-file.txt", "some-file.txt"]) assert expected == unpacked_files @@ -153,7 +163,7 @@ def test_fetch_data(): for l in zen.fetch({"record": "1234"}, d): output.append(l) - unpacked_files = os.listdir(d) + unpacked_files = set(os.listdir(d)) # ZIP files shouldn't have been unpacked - expected = ['bfake.zip', 'afake.zip'] + expected = {'bfake.zip', 'afake.zip'} assert expected == unpacked_files From e99c80799d632bcf90196a0d79c4cbfe8c252100 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Wed, 29 May 2019 07:10:35 +0200 Subject: [PATCH 6/9] Add comment about supporting Zenodo instances not at zenodo.org --- repo2docker/contentproviders/zenodo.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py index c3bbde6a..a1b60ece 100644 --- a/repo2docker/contentproviders/zenodo.py +++ b/repo2docker/contentproviders/zenodo.py @@ -16,6 +16,10 @@ class Zenodo(ContentProvider): def detect(self, doi, ref=None, extra_args=None): """Trigger this provider for things that resolve to a Zenodo record""" + # To support Zenodo instances not hosted at zenodo.org we need to + # start maintaining a list of known DOI prefixes and their hostname. + # We should also change to returning a complete `record_url` that + # fetch() can use instead of constructing a URL there doi = doi.lower() # 10.5281 is the Zenodo DOI prefix if doi.startswith("10.5281/"): From 363c962efd152045fcc0942987acce04ef0efd2d Mon Sep 17 00:00:00 2001 From: Tim Head Date: Wed, 29 May 2019 08:17:22 +0200 Subject: [PATCH 7/9] Add a urlopen helper to Zenodo content provider Use a helper function to inject a default user-agent header into every request we make. --- repo2docker/contentproviders/zenodo.py | 24 ++++++++++--- tests/unit/contentproviders/test_zenodo.py | 40 ++++++++++++---------- 2 files changed, 40 insertions(+), 24 deletions(-) diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py index a1b60ece..d7aa21b9 100644 --- a/repo2docker/contentproviders/zenodo.py +++ b/repo2docker/contentproviders/zenodo.py @@ -4,16 +4,30 @@ import shutil from os import makedirs from os import path -from urllib.request import urlopen, Request +from urllib.request import build_opener, urlopen, Request from zipfile import ZipFile, is_zipfile from .base import ContentProvider from ..utils import copytree +from .. import __version__ class Zenodo(ContentProvider): """Provide contents of a Zenodo deposit.""" + def _urlopen(self, req, headers=None): + """A urlopen() helper""" + # someone passed a string, not a request + if not isinstance(req, Request): + req = Request(req) + + req.add_header("User-Agent", "repo2docker {}".format(__version__)) + if headers is not None: + for key, value in headers.items(): + req.add_header(key, value) + + return urlopen(req) + def detect(self, doi, ref=None, extra_args=None): """Trigger this provider for things that resolve to a Zenodo record""" # To support Zenodo instances not hosted at zenodo.org we need to @@ -23,14 +37,14 @@ class Zenodo(ContentProvider): doi = doi.lower() # 10.5281 is the Zenodo DOI prefix if doi.startswith("10.5281/"): - resp = urlopen("https://doi.org/{}".format(doi)) + resp = self._urlopen("https://doi.org/{}".format(doi)) self.record_id = resp.url.rsplit("/", maxsplit=1)[1] return {"record": self.record_id} elif doi.startswith("https://doi.org/10.5281/") or doi.startswith( "http://doi.org/10.5281/" ): - resp = urlopen(doi) + resp = self._urlopen(doi) self.record_id = resp.url.rsplit("/", maxsplit=1)[1] return {"record": self.record_id} @@ -49,14 +63,14 @@ class Zenodo(ContentProvider): "https://zenodo.org/api/records/{}".format(record_id), headers={"accept": "application/json"}, ) - resp = urlopen(req) + resp = self._urlopen(req) record = json.loads(resp.read().decode("utf-8")) def _fetch(file_ref, unzip=False): # the assumption is that `unzip=True` means that this is the only # file related to the zenodo record - with urlopen(file_ref["links"]["download"]) as src: + with self._urlopen(file_ref["links"]["download"]) as src: fname = file_ref["filename"] if path.dirname(fname): sub_dir = path.join(output_dir, path.dirname(fname)) diff --git a/tests/unit/contentproviders/test_zenodo.py b/tests/unit/contentproviders/test_zenodo.py index cc7d446f..288f794d 100644 --- a/tests/unit/contentproviders/test_zenodo.py +++ b/tests/unit/contentproviders/test_zenodo.py @@ -12,14 +12,16 @@ from repo2docker.contentproviders import Zenodo def test_content_id(): - zen = Zenodo() + with patch.object(Zenodo, "_urlopen") as fake_urlopen: + fake_urlopen.return_value.url = "https://zenodo.org/record/3232985" + zen = Zenodo() - zen.detect("10.5281/zenodo.3232985") - assert zen.content_id == "3232985" + zen.detect("10.5281/zenodo.3232985") + assert zen.content_id == "3232985" def test_detect(): - with patch("repo2docker.contentproviders.zenodo.urlopen") as fake_urlopen: + with patch.object(Zenodo, "_urlopen") as fake_urlopen: fake_urlopen.return_value.url = "https://zenodo.org/record/3232985" # valid Zenodo DOIs trigger this content provider assert Zenodo().detect("10.5281/zenodo.3232985") == {"record": "3232985"} @@ -29,7 +31,7 @@ def test_detect(): # only two of the three calls above have to resolve a DOI assert fake_urlopen.call_count == 2 - with patch("repo2docker.contentproviders.zenodo.urlopen") as fake_urlopen: + with patch.object(Zenodo, "_urlopen") as fake_urlopen: # Don't trigger the Zenodo content provider assert Zenodo().detect("/some/path/here") is None assert Zenodo().detect("https://example.com/path/here") is None @@ -69,16 +71,16 @@ def test_fetch_software_from_github_archive(): ).encode("utf-8") ) - def mock_urlopen(req_or_path): - if isinstance(req_or_path, Request): + def mock_urlopen(self, req): + if isinstance(req, Request): return mock_response else: - return urlopen(req_or_path) + return urlopen(req) + + with patch.object(Zenodo, '_urlopen', new=mock_urlopen): + zen = Zenodo() - with patch("repo2docker.contentproviders.zenodo.urlopen", new=mock_urlopen): with TemporaryDirectory() as d: - zen = Zenodo() - output = [] for l in zen.fetch({"record": "1234"}, d): output.append(l) @@ -108,13 +110,13 @@ def test_fetch_software(): ).encode("utf-8") ) - def mock_urlopen(req_or_path): - if isinstance(req_or_path, Request): + def mock_urlopen(self, req): + if isinstance(req, Request): return mock_response else: - return urlopen(req_or_path) + return urlopen(req) - with patch("repo2docker.contentproviders.zenodo.urlopen", new=mock_urlopen): + with patch.object(Zenodo, '_urlopen', new=mock_urlopen): with TemporaryDirectory() as d: zen = Zenodo() @@ -149,13 +151,13 @@ def test_fetch_data(): ).encode("utf-8") ) - def mock_urlopen(req_or_path): - if isinstance(req_or_path, Request): + def mock_urlopen(self, req): + if isinstance(req, Request): return mock_response else: - return urlopen(req_or_path) + return urlopen(req) - with patch("repo2docker.contentproviders.zenodo.urlopen", new=mock_urlopen): + with patch.object(Zenodo, '_urlopen', new=mock_urlopen): with TemporaryDirectory() as d: zen = Zenodo() From c96cadafadc9933baee8df60b331b824a8d92b1d Mon Sep 17 00:00:00 2001 From: Tim Head Date: Wed, 29 May 2019 18:24:48 +0200 Subject: [PATCH 8/9] Add basic documentation on Zenodo content provider --- docs/source/index.rst | 9 ++++++--- docs/source/usage.rst | 18 ++++++++++++------ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index d5aff875..e8795aec 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -2,11 +2,14 @@ jupyter-repo2docker =================== ``jupyter-repo2docker`` is a tool to **build, run, and push Docker -images from source code repositories** that run via a Jupyter server. +images from source code repositories**. ``repo2docker`` fetches a repository -(from GitHub, GitLab or other locations) and builds a container image -based on the configuration files found in the repository. It can be +(from GitHub, GitLab, Zenodo, a Git repository or a local directory) +and builds a container image inn which the code can be executed. +The image build process is based on the configuration files found in the repository. + +``repo2docker`` can be used to explore a repository locally by building and executing the constructed image of the repository, or as a means of building images that are pushed to a Docker registry. diff --git a/docs/source/usage.rst b/docs/source/usage.rst index a4360760..b0fff1be 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -11,14 +11,15 @@ Using ``repo2docker`` ``repo2docker``, see :ref:`install`. ``repo2docker`` can build a reproducible computational environment for any repository that -follows :ref:`specification`. repo2docker is called with a URL/path to a repository. It then +follows :ref:`specification`. repo2docker is called with the URL of a Git repository, +a Zenodo DOI or a path to a local directory. It then performs these steps: 1. Inspects the repository for :ref:`configuration files `. These will be used to build the environment needed to run the repository. 2. Builds a Docker image with an environment specified in these :ref:`configuration files `. -3. Runs a Jupyter server within the image that lets you explore the - repository interactively (optional) +3. Launches the image to let you explore the + repository interactively via Jupyter notebooks, RStudio, or many other interfaces (optional) 4. Pushes the images to a Docker registry so that it may be accessed remotely (optional) @@ -27,10 +28,15 @@ Calling repo2docker repo2docker is called with this command:: - jupyter-repo2docker + jupyter-repo2docker -where ```` is a URL or path to the source repository -for which you'd like to build an image. +where ```` is: + + * a URL of a Git repository (``https://github.com/binder-examples/requirements``), + * a Zenodo DOI (``10.5281/zenodo.1211089``), or + * a path to a local directory (``a/local/directory``) + +of the source repository you want to build. For example, the following command will build an image of Peter Norvig's Pytudes_ repository:: From 71a78986c22429cf0018c207ff7b87fddc01c53a Mon Sep 17 00:00:00 2001 From: Tim Head Date: Wed, 29 May 2019 22:22:29 +0200 Subject: [PATCH 9/9] Add external test for Zennodo provider --- tests/conftest.py | 12 +++++++----- tests/external/reproductions.repos.yaml | 4 ++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 72df137d..4a1e8315 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -213,11 +213,13 @@ class RemoteRepoList(pytest.File): with self.fspath.open() as f: repos = yaml.safe_load(f) for repo in repos: + args = [] + if "ref" in repo: + args += ['--ref', repo['ref']] + args += [repo['url'], + '--', + ] + shlex.split(repo['verify']) yield Repo2DockerTest( repo['name'], self, - args=[ - '--ref', repo['ref'], - repo['url'], - '--', - ] + shlex.split(repo['verify']), + args=args, ) diff --git a/tests/external/reproductions.repos.yaml b/tests/external/reproductions.repos.yaml index 6ba3f5a7..3aa223db 100644 --- a/tests/external/reproductions.repos.yaml +++ b/tests/external/reproductions.repos.yaml @@ -30,3 +30,7 @@ url: https://github.com/QuantStack/xeus-cling ref: 0.4.5 verify: jupyter kernelspec list +# Zenodo record of https://github.com/mbcxqcw2/EEModel/tree/v1.03 +- name: 10.5281/zenodo.1211089 + url: 10.5281/zenodo.1211089 + verify: python2 -c 'import matplotlib'