diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py index 07db4a22..84fd88a6 100644 --- a/repo2docker/contentproviders/zenodo.py +++ b/repo2docker/contentproviders/zenodo.py @@ -15,18 +15,34 @@ class Zenodo(ContentProvider): """Provide contents of a Zenodo deposit.""" def detect(self, doi, ref=None, extra_args=None): + doi = doi.lower() # 10.5281 is the Zenodo DOI prefix - if doi.startswith('10.5281'): + if doi.startswith("10.5281/"): resp = urlopen("https://doi.org/{}".format(doi)) self.record_id = resp.url.rsplit("/", maxsplit=1)[1] - return {'record': self.record_id} + return {"record": self.record_id} + + elif doi.startswith("https://doi.org/10.5281/") or doi.startswith( + "http://doi.org/10.5281/" + ): + resp = urlopen(doi) + self.record_id = resp.url.rsplit("/", maxsplit=1)[1] + return {"record": self.record_id} + + elif doi.startswith("https://zenodo.org/record/") or doi.startswith( + "http://zenodo.org/record/" + ): + self.record_id = doi.rsplit("/", maxsplit=1)[1] + return {"record": self.record_id} def fetch(self, spec, output_dir, yield_output=False): - record_id = spec['record'] + record_id = spec["record"] yield "Fetching Zenodo record {}.\n".format(record_id) - req = Request("https://zenodo.org/api/records/{}".format(record_id), - headers={"accept": "application/json"}) + req = Request( + "https://zenodo.org/api/records/{}".format(record_id), + headers={"accept": "application/json"}, + ) resp = urlopen(req) record = json.loads(resp.read().decode("utf-8")) @@ -39,7 +55,7 @@ class Zenodo(ContentProvider): if path.dirname(fname): sub_dir = path.join(output_dir, path.dirname(fname)) if not path.exists(sub_dir): - yield 'Creating {}\n'.format(sub_dir) + yield "Creating {}\n".format(sub_dir) makedirs(sub_dir, exist_ok=True) dst_fname = path.join(output_dir, fname) @@ -71,7 +87,7 @@ class Zenodo(ContentProvider): is_software = record["metadata"]["upload_type"] == "software" only_one_file = len(record["files"]) == 1 - for file_ref in record['files']: + for file_ref in record["files"]: for line in _fetch(file_ref, unzip=is_software and only_one_file): yield line diff --git a/tests/unit/contentproviders/test_zenodo.py b/tests/unit/contentproviders/test_zenodo.py new file mode 100644 index 00000000..55e32895 --- /dev/null +++ b/tests/unit/contentproviders/test_zenodo.py @@ -0,0 +1,159 @@ +import json +import os + +from contextlib import contextmanager +from io import BytesIO +from tempfile import TemporaryDirectory, NamedTemporaryFile +from unittest.mock import patch +from urllib.request import urlopen, Request +from zipfile import ZipFile + +from repo2docker.contentproviders import Zenodo + + +def test_content_id(): + zen = Zenodo() + + zen.detect("10.5281/zenodo.3232985") + assert zen.content_id == "3232985" + + +def test_detect(): + # valid Zenodo DOIs trigger this content provider + assert Zenodo().detect("10.5281/zenodo.3232985") == {"record": "3232985"} + assert Zenodo().detect("https://doi.org/10.5281/zenodo.3232985") == {"record": "3232985"} + assert Zenodo().detect("https://zenodo.org/record/3232985") == {"record": "3232985"} + + # Don't trigger the Zenodo content provider + assert Zenodo().detect("/some/path/here") is None + assert Zenodo().detect("https://example.com/path/here") is None + # donn't handle DOIs that aren't from Zenodo + assert Zenodo().detect("https://doi.org/10.21105/joss.01277") is None + + +@contextmanager +def zenodo_archive(prefix="a_directory"): + with NamedTemporaryFile(suffix=".zip") as zfile: + with ZipFile(zfile.name, mode="w") as zip: + zip.writestr("{}/some-file.txt".format(prefix), "some content") + zip.writestr("{}/some-other-file.txt".format(prefix), "some more content") + + yield zfile.name + + +def test_fetch_software_from_github_archive(): + # we "fetch" a local ZIP file to simulate a Zenodo record created from a + # GitHub repository via the Zenodo-GitHub integration + with zenodo_archive() as zen_path: + mock_response = BytesIO( + json.dumps( + { + "files": [ + { + "filename": "some_dir/afake.zip", + "links": {"download": "file://{}".format(zen_path)}, + } + ], + "metadata": {"upload_type": "software"}, + } + ).encode("utf-8") + ) + + def mock_urlopen(req_or_path): + if isinstance(req_or_path, Request): + return mock_response + else: + return urlopen(req_or_path) + + with patch("repo2docker.contentproviders.zenodo.urlopen", new=mock_urlopen): + with TemporaryDirectory() as d: + zen = Zenodo() + + output = [] + for l in zen.fetch({"record": "1234"}, d): + output.append(l) + + unpacked_files = os.listdir(d) + expected = ["some-other-file.txt", "some-file.txt"] + assert expected == unpacked_files + + +def test_fetch_software(): + # we "fetch" a local ZIP file to simulate a Zenodo software record with a + # ZIP file in it + with zenodo_archive() as zen_path: + mock_response = BytesIO( + json.dumps( + { + "files": [ + { + # this is the difference to the GitHub generated one, + # the ZIP file isn't in a directory + "filename": "afake.zip", + "links": {"download": "file://{}".format(zen_path)}, + } + ], + "metadata": {"upload_type": "software"}, + } + ).encode("utf-8") + ) + + def mock_urlopen(req_or_path): + if isinstance(req_or_path, Request): + return mock_response + else: + return urlopen(req_or_path) + + with patch("repo2docker.contentproviders.zenodo.urlopen", new=mock_urlopen): + with TemporaryDirectory() as d: + zen = Zenodo() + + output = [] + for l in zen.fetch({"record": "1234"}, d): + output.append(l) + + unpacked_files = os.listdir(d) + expected = ["some-other-file.txt", "some-file.txt"] + assert expected == unpacked_files + + +def test_fetch_data(): + # we "fetch" a local ZIP file to simulate a Zenodo data record + with zenodo_archive() as a_zen_path: + with zenodo_archive() as b_zen_path: + mock_response = BytesIO( + json.dumps( + { + "files": [ + { + "filename": "afake.zip", + "links": {"download": "file://{}".format(a_zen_path)}, + }, + { + "filename": "bfake.zip", + "links": {"download": "file://{}".format(b_zen_path)}, + } + ], + "metadata": {"upload_type": "data"}, + } + ).encode("utf-8") + ) + + def mock_urlopen(req_or_path): + if isinstance(req_or_path, Request): + return mock_response + else: + return urlopen(req_or_path) + + with patch("repo2docker.contentproviders.zenodo.urlopen", new=mock_urlopen): + with TemporaryDirectory() as d: + zen = Zenodo() + + output = [] + for l in zen.fetch({"record": "1234"}, d): + output.append(l) + + unpacked_files = os.listdir(d) + # ZIP files shouldn't have been unpacked + expected = ['bfake.zip', 'afake.zip'] + assert expected == unpacked_files