repo2docker/tests/unit/contentproviders/test_dataverse.py

122 wiersze
4.2 KiB
Python
Czysty Zwykły widok Historia

2019-09-13 16:59:36 +00:00
import json
import os
import pytest
from contextlib import contextmanager
from io import BytesIO
from tempfile import TemporaryDirectory, NamedTemporaryFile
from unittest.mock import patch
from urllib.request import urlopen, Request
from zipfile import ZipFile
from repo2docker.contentproviders import Dataverse
test_dv = Dataverse()
harvard_dv = next((_ for _ in test_dv.hosts if _["id"] == 1745))
test_hosts = [
(
[
"doi:10.7910/DVN/6ZXAGT/3YRRYJ",
"10.7910/DVN/6ZXAGT",
"https://dataverse.harvard.edu/api/access/datafile/3323458",
],
{"host": harvard_dv, "record": "doi:10.7910/DVN/6ZXAGT"},
)
]
test_responses = {
"doi:10.7910/DVN/6ZXAGT/3YRRYJ": (
"https://dataverse.harvard.edu/file.xhtml"
"?persistentId=doi:10.7910/DVN/6ZXAGT/3YRRYJ"
),
"doi:10.7910/DVN/6ZXAGT": (
"https://dataverse.harvard.edu/dataset.xhtml"
"?persistentId=doi:10.7910/DVN/6ZXAGT"
),
"10.7910/DVN/6ZXAGT": (
"https://dataverse.harvard.edu/dataset.xhtml"
"?persistentId=doi:10.7910/DVN/6ZXAGT"
),
"https://dataverse.harvard.edu/api/access/datafile/3323458": "https://dataverse.harvard.edu/api/access/datafile/3323458",
}
test_search = {
"data": {
"count_in_response": 1,
"items": [{"dataset_persistent_id": "doi:10.7910/DVN/6ZXAGT"}],
}
}
@pytest.mark.parametrize("test_input, expected", test_hosts)
def test_detect_dataverse(test_input, expected):
def doi_resolver(url):
return test_responses.get(url)
with patch.object(Dataverse, "urlopen") as fake_urlopen, patch.object(
Dataverse, "doi2url", side_effect=doi_resolver
) as fake_doi2url:
fake_urlopen.return_value.read.return_value = json.dumps(test_search).encode()
# valid Dataverse DOIs trigger this content provider
assert Dataverse().detect(test_input[0]) == expected
assert fake_doi2url.call_count == 2 # File, then dataset
assert Dataverse().detect(test_input[1]) == expected
assert Dataverse().detect(test_input[2]) == expected
# only two of the three calls above have to resolve a DOI
assert fake_urlopen.call_count == 1
with patch.object(Dataverse, "urlopen") as fake_urlopen:
# Don't trigger the Dataverse content provider
assert Dataverse().detect("/some/path/here") is None
assert Dataverse().detect("https://example.com/path/here") is None
# don't handle DOIs that aren't from Dataverse
fake_urlopen.return_value.url = (
"http://joss.theoj.org/papers/10.21105/joss.01277"
)
assert Dataverse().detect("https://doi.org/10.21105/joss.01277") is None
@contextmanager
def dv_archive(prefix="a_directory"):
with NamedTemporaryFile(suffix=".zip") as zfile:
with ZipFile(zfile.name, mode="w") as zip:
zip.writestr("{}/some-file.txt".format(prefix), "some content")
zip.writestr("{}/some-other-file.txt".format(prefix), "some more content")
yield zfile.name
def test_dataverse_fetch():
mock_response_ds_query = BytesIO(
json.dumps(
{
"data": {
"latestVersion": {
"files": [{"dataFile": {"id": 1}}, {"dataFile": {"id": 2}}]
}
}
}
).encode("utf-8")
)
spec = {"host": harvard_dv, "record": "doi:10.7910/DVN/6ZXAGT"}
dv = Dataverse()
with dv_archive() as data_local_path:
def mock_urlopen(self, req):
if isinstance(req, Request):
if "/api/datasets" in req.full_url:
return mock_response_ds_query
elif "/api/access/datafiles" in req.full_url:
assert req.full_url.endswith("1,2")
return urlopen("file://{}".format(data_local_path))
with patch.object(Dataverse, "urlopen", new=mock_urlopen):
with TemporaryDirectory() as d:
output = []
for l in dv.fetch(spec, d):
output.append(l)
unpacked_files = set(os.listdir(d))
expected = set(["some-other-file.txt", "some-file.txt"])
assert expected == unpacked_files