2019-05-28 17:10:32 +00:00
|
|
|
import json
|
|
|
|
import os
|
2019-06-20 20:22:17 +00:00
|
|
|
import pytest
|
2019-05-28 17:10:32 +00:00
|
|
|
|
|
|
|
from contextlib import contextmanager
|
|
|
|
from io import BytesIO
|
|
|
|
from tempfile import TemporaryDirectory, NamedTemporaryFile
|
|
|
|
from unittest.mock import patch
|
|
|
|
from urllib.request import urlopen, Request
|
|
|
|
from zipfile import ZipFile
|
|
|
|
|
|
|
|
from repo2docker.contentproviders import Zenodo
|
|
|
|
|
|
|
|
|
|
|
|
def test_content_id():
|
2019-05-29 06:17:22 +00:00
|
|
|
with patch.object(Zenodo, "_urlopen") as fake_urlopen:
|
|
|
|
fake_urlopen.return_value.url = "https://zenodo.org/record/3232985"
|
|
|
|
zen = Zenodo()
|
2019-05-28 17:10:32 +00:00
|
|
|
|
2019-05-29 06:17:22 +00:00
|
|
|
zen.detect("10.5281/zenodo.3232985")
|
|
|
|
assert zen.content_id == "3232985"
|
2019-05-28 17:10:32 +00:00
|
|
|
|
|
|
|
|
2019-06-20 20:22:17 +00:00
|
|
|
test_hosts = [
|
|
|
|
(
|
|
|
|
[
|
|
|
|
"https://zenodo.org/record/3232985",
|
|
|
|
"10.5281/zenodo.3232985",
|
|
|
|
"https://doi.org/10.5281/zenodo.3232985",
|
|
|
|
],
|
|
|
|
{
|
2019-06-18 22:11:06 +00:00
|
|
|
"host": {
|
|
|
|
"hostname": ["https://zenodo.org/record/", "http://zenodo.org/record/"],
|
|
|
|
"api": "https://zenodo.org/api/records/",
|
|
|
|
"filepath": "files",
|
|
|
|
"filename": "filename",
|
|
|
|
"download": "links.download",
|
|
|
|
"type": "metadata.upload_type",
|
|
|
|
},
|
|
|
|
"record": "3232985",
|
2019-06-20 20:22:17 +00:00
|
|
|
},
|
|
|
|
),
|
|
|
|
(
|
|
|
|
[
|
|
|
|
"https://data.caltech.edu/records/1235",
|
|
|
|
"10.22002/d1.1235",
|
|
|
|
"https://doi.org/10.22002/d1.1235",
|
|
|
|
],
|
|
|
|
{
|
2019-06-18 22:30:37 +00:00
|
|
|
"host": {
|
2019-06-18 22:11:06 +00:00
|
|
|
"hostname": [
|
|
|
|
"https://data.caltech.edu/records/",
|
|
|
|
"http://data.caltech.edu/records/",
|
|
|
|
],
|
|
|
|
"api": "https://data.caltech.edu/api/record/",
|
|
|
|
"filepath": "metadata.electronic_location_and_access",
|
|
|
|
"filename": "electronic_name.0",
|
|
|
|
"download": "uniform_resource_identifier",
|
|
|
|
"type": "metadata.resourceType.resourceTypeGeneral",
|
|
|
|
},
|
|
|
|
"record": "1235",
|
2019-06-20 20:22:17 +00:00
|
|
|
},
|
|
|
|
),
|
|
|
|
]
|
2019-05-28 17:10:32 +00:00
|
|
|
|
2019-06-20 20:22:17 +00:00
|
|
|
|
|
|
|
@pytest.mark.parametrize("test_input,expected", test_hosts)
|
|
|
|
def test_detect_zenodo(test_input, expected):
|
|
|
|
with patch.object(Zenodo, "_urlopen") as fake_urlopen:
|
|
|
|
fake_urlopen.return_value.url = test_input[0]
|
|
|
|
# valid Zenodo DOIs trigger this content provider
|
|
|
|
assert Zenodo().detect(test_input[0]) == expected
|
|
|
|
assert Zenodo().detect(test_input[1]) == expected
|
|
|
|
assert Zenodo().detect(test_input[2]) == expected
|
2019-05-29 05:00:10 +00:00
|
|
|
# only two of the three calls above have to resolve a DOI
|
|
|
|
assert fake_urlopen.call_count == 2
|
|
|
|
|
2019-05-29 06:17:22 +00:00
|
|
|
with patch.object(Zenodo, "_urlopen") as fake_urlopen:
|
2019-05-29 05:00:10 +00:00
|
|
|
# Don't trigger the Zenodo content provider
|
|
|
|
assert Zenodo().detect("/some/path/here") is None
|
|
|
|
assert Zenodo().detect("https://example.com/path/here") is None
|
2019-06-20 20:22:17 +00:00
|
|
|
# don't handle DOIs that aren't from Zenodo
|
|
|
|
fake_urlopen.return_value.url = (
|
|
|
|
"http://joss.theoj.org/papers/10.21105/joss.01277"
|
|
|
|
)
|
2019-05-29 05:00:10 +00:00
|
|
|
assert Zenodo().detect("https://doi.org/10.21105/joss.01277") is None
|
|
|
|
|
2019-05-28 17:10:32 +00:00
|
|
|
|
|
|
|
@contextmanager
|
|
|
|
def zenodo_archive(prefix="a_directory"):
|
|
|
|
with NamedTemporaryFile(suffix=".zip") as zfile:
|
|
|
|
with ZipFile(zfile.name, mode="w") as zip:
|
|
|
|
zip.writestr("{}/some-file.txt".format(prefix), "some content")
|
|
|
|
zip.writestr("{}/some-other-file.txt".format(prefix), "some more content")
|
|
|
|
|
|
|
|
yield zfile.name
|
|
|
|
|
|
|
|
|
|
|
|
def test_fetch_software_from_github_archive():
|
|
|
|
# we "fetch" a local ZIP file to simulate a Zenodo record created from a
|
|
|
|
# GitHub repository via the Zenodo-GitHub integration
|
|
|
|
with zenodo_archive() as zen_path:
|
|
|
|
mock_response = BytesIO(
|
|
|
|
json.dumps(
|
|
|
|
{
|
|
|
|
"files": [
|
|
|
|
{
|
|
|
|
"filename": "some_dir/afake.zip",
|
|
|
|
"links": {"download": "file://{}".format(zen_path)},
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {"upload_type": "software"},
|
|
|
|
}
|
|
|
|
).encode("utf-8")
|
|
|
|
)
|
|
|
|
|
2019-05-29 06:17:22 +00:00
|
|
|
def mock_urlopen(self, req):
|
|
|
|
if isinstance(req, Request):
|
2019-05-28 17:10:32 +00:00
|
|
|
return mock_response
|
|
|
|
else:
|
2019-05-29 06:17:22 +00:00
|
|
|
return urlopen(req)
|
2019-05-28 17:10:32 +00:00
|
|
|
|
2019-05-31 09:10:17 +00:00
|
|
|
with patch.object(Zenodo, "_urlopen", new=mock_urlopen):
|
2019-05-29 06:17:22 +00:00
|
|
|
zen = Zenodo()
|
2019-06-18 22:11:06 +00:00
|
|
|
spec = {
|
|
|
|
"host": {
|
|
|
|
"hostname": [
|
|
|
|
"https://zenodo.org/record/",
|
|
|
|
"http://zenodo.org/record/",
|
|
|
|
],
|
|
|
|
"api": "https://zenodo.org/api/records/",
|
|
|
|
"filepath": "files",
|
|
|
|
"filename": "filename",
|
|
|
|
"download": "links.download",
|
|
|
|
"type": "metadata.upload_type",
|
|
|
|
},
|
|
|
|
"record": "1234",
|
|
|
|
}
|
2019-05-28 17:10:32 +00:00
|
|
|
|
2019-05-29 06:17:22 +00:00
|
|
|
with TemporaryDirectory() as d:
|
2019-05-28 17:10:32 +00:00
|
|
|
output = []
|
2019-06-18 22:11:06 +00:00
|
|
|
for l in zen.fetch(spec, d):
|
2019-05-28 17:10:32 +00:00
|
|
|
output.append(l)
|
|
|
|
|
2019-05-29 05:00:10 +00:00
|
|
|
unpacked_files = set(os.listdir(d))
|
|
|
|
expected = set(["some-other-file.txt", "some-file.txt"])
|
2019-05-28 17:10:32 +00:00
|
|
|
assert expected == unpacked_files
|
|
|
|
|
|
|
|
|
|
|
|
def test_fetch_software():
|
|
|
|
# we "fetch" a local ZIP file to simulate a Zenodo software record with a
|
|
|
|
# ZIP file in it
|
|
|
|
with zenodo_archive() as zen_path:
|
|
|
|
mock_response = BytesIO(
|
|
|
|
json.dumps(
|
|
|
|
{
|
|
|
|
"files": [
|
|
|
|
{
|
|
|
|
# this is the difference to the GitHub generated one,
|
|
|
|
# the ZIP file isn't in a directory
|
|
|
|
"filename": "afake.zip",
|
|
|
|
"links": {"download": "file://{}".format(zen_path)},
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {"upload_type": "software"},
|
|
|
|
}
|
|
|
|
).encode("utf-8")
|
|
|
|
)
|
|
|
|
|
2019-05-29 06:17:22 +00:00
|
|
|
def mock_urlopen(self, req):
|
|
|
|
if isinstance(req, Request):
|
2019-05-28 17:10:32 +00:00
|
|
|
return mock_response
|
|
|
|
else:
|
2019-05-29 06:17:22 +00:00
|
|
|
return urlopen(req)
|
2019-05-28 17:10:32 +00:00
|
|
|
|
2019-05-31 09:10:17 +00:00
|
|
|
with patch.object(Zenodo, "_urlopen", new=mock_urlopen):
|
2019-05-28 17:10:32 +00:00
|
|
|
with TemporaryDirectory() as d:
|
|
|
|
zen = Zenodo()
|
2019-06-18 22:11:06 +00:00
|
|
|
spec = spec = {
|
|
|
|
"host": {
|
|
|
|
"hostname": [
|
|
|
|
"https://zenodo.org/record/",
|
|
|
|
"http://zenodo.org/record/",
|
|
|
|
],
|
|
|
|
"api": "https://zenodo.org/api/records/",
|
|
|
|
"filepath": "files",
|
|
|
|
"filename": "filename",
|
|
|
|
"download": "links.download",
|
|
|
|
"type": "metadata.upload_type",
|
|
|
|
},
|
|
|
|
"record": "1234",
|
|
|
|
}
|
2019-05-28 17:10:32 +00:00
|
|
|
output = []
|
2019-06-18 22:11:06 +00:00
|
|
|
for l in zen.fetch(spec, d):
|
2019-05-28 17:10:32 +00:00
|
|
|
output.append(l)
|
|
|
|
|
2019-05-29 05:00:10 +00:00
|
|
|
unpacked_files = set(os.listdir(d))
|
|
|
|
expected = set(["some-other-file.txt", "some-file.txt"])
|
2019-05-28 17:10:32 +00:00
|
|
|
assert expected == unpacked_files
|
|
|
|
|
|
|
|
|
|
|
|
def test_fetch_data():
|
|
|
|
# we "fetch" a local ZIP file to simulate a Zenodo data record
|
|
|
|
with zenodo_archive() as a_zen_path:
|
|
|
|
with zenodo_archive() as b_zen_path:
|
|
|
|
mock_response = BytesIO(
|
|
|
|
json.dumps(
|
|
|
|
{
|
|
|
|
"files": [
|
|
|
|
{
|
|
|
|
"filename": "afake.zip",
|
|
|
|
"links": {"download": "file://{}".format(a_zen_path)},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"filename": "bfake.zip",
|
|
|
|
"links": {"download": "file://{}".format(b_zen_path)},
|
2019-05-31 09:10:17 +00:00
|
|
|
},
|
2019-05-28 17:10:32 +00:00
|
|
|
],
|
|
|
|
"metadata": {"upload_type": "data"},
|
|
|
|
}
|
|
|
|
).encode("utf-8")
|
|
|
|
)
|
|
|
|
|
2019-05-29 06:17:22 +00:00
|
|
|
def mock_urlopen(self, req):
|
|
|
|
if isinstance(req, Request):
|
2019-05-28 17:10:32 +00:00
|
|
|
return mock_response
|
|
|
|
else:
|
2019-05-29 06:17:22 +00:00
|
|
|
return urlopen(req)
|
2019-05-28 17:10:32 +00:00
|
|
|
|
2019-05-31 09:10:17 +00:00
|
|
|
with patch.object(Zenodo, "_urlopen", new=mock_urlopen):
|
2019-05-28 17:10:32 +00:00
|
|
|
with TemporaryDirectory() as d:
|
|
|
|
zen = Zenodo()
|
2019-06-18 22:11:06 +00:00
|
|
|
spec = {
|
|
|
|
"host": {
|
|
|
|
"hostname": [
|
|
|
|
"https://zenodo.org/record/",
|
|
|
|
"http://zenodo.org/record/",
|
|
|
|
],
|
|
|
|
"api": "https://zenodo.org/api/records/",
|
|
|
|
"filepath": "files",
|
|
|
|
"filename": "filename",
|
|
|
|
"download": "links.download",
|
|
|
|
"type": "metadata.upload_type",
|
|
|
|
},
|
|
|
|
"record": "1234",
|
|
|
|
}
|
2019-05-28 17:10:32 +00:00
|
|
|
output = []
|
2019-06-18 22:11:06 +00:00
|
|
|
for l in zen.fetch(spec, d):
|
2019-05-28 17:10:32 +00:00
|
|
|
output.append(l)
|
|
|
|
|
2019-05-29 05:00:10 +00:00
|
|
|
unpacked_files = set(os.listdir(d))
|
2019-05-28 17:10:32 +00:00
|
|
|
# ZIP files shouldn't have been unpacked
|
2019-05-31 09:10:17 +00:00
|
|
|
expected = {"bfake.zip", "afake.zip"}
|
2019-05-28 17:10:32 +00:00
|
|
|
assert expected == unpacked_files
|