pull/739/head
Kacper Kowalik (Xarthisius) 2019-09-13 11:59:36 -05:00
rodzic 331a610324
commit 8edafd0d07
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 5D21B852895192F9
1 zmienionych plików z 121 dodań i 0 usunięć

Wyświetl plik

@ -0,0 +1,121 @@
import json
import os
import pytest
from contextlib import contextmanager
from io import BytesIO
from tempfile import TemporaryDirectory, NamedTemporaryFile
from unittest.mock import patch
from urllib.request import urlopen, Request
from zipfile import ZipFile
from repo2docker.contentproviders import Dataverse
test_dv = Dataverse()
harvard_dv = next((_ for _ in test_dv.hosts if _["id"] == 1745))
test_hosts = [
(
[
"doi:10.7910/DVN/6ZXAGT/3YRRYJ",
"10.7910/DVN/6ZXAGT",
"https://dataverse.harvard.edu/api/access/datafile/3323458",
],
{"host": harvard_dv, "record": "doi:10.7910/DVN/6ZXAGT"},
)
]
test_responses = {
"doi:10.7910/DVN/6ZXAGT/3YRRYJ": (
"https://dataverse.harvard.edu/file.xhtml"
"?persistentId=doi:10.7910/DVN/6ZXAGT/3YRRYJ"
),
"doi:10.7910/DVN/6ZXAGT": (
"https://dataverse.harvard.edu/dataset.xhtml"
"?persistentId=doi:10.7910/DVN/6ZXAGT"
),
"10.7910/DVN/6ZXAGT": (
"https://dataverse.harvard.edu/dataset.xhtml"
"?persistentId=doi:10.7910/DVN/6ZXAGT"
),
"https://dataverse.harvard.edu/api/access/datafile/3323458": "https://dataverse.harvard.edu/api/access/datafile/3323458",
}
test_search = {
"data": {
"count_in_response": 1,
"items": [{"dataset_persistent_id": "doi:10.7910/DVN/6ZXAGT"}],
}
}
@pytest.mark.parametrize("test_input, expected", test_hosts)
def test_detect_dataverse(test_input, expected):
def doi_resolver(url):
return test_responses.get(url)
with patch.object(Dataverse, "urlopen") as fake_urlopen, patch.object(
Dataverse, "doi2url", side_effect=doi_resolver
) as fake_doi2url:
fake_urlopen.return_value.read.return_value = json.dumps(test_search).encode()
# valid Dataverse DOIs trigger this content provider
assert Dataverse().detect(test_input[0]) == expected
assert fake_doi2url.call_count == 2 # File, then dataset
assert Dataverse().detect(test_input[1]) == expected
assert Dataverse().detect(test_input[2]) == expected
# only two of the three calls above have to resolve a DOI
assert fake_urlopen.call_count == 1
with patch.object(Dataverse, "urlopen") as fake_urlopen:
# Don't trigger the Dataverse content provider
assert Dataverse().detect("/some/path/here") is None
assert Dataverse().detect("https://example.com/path/here") is None
# don't handle DOIs that aren't from Dataverse
fake_urlopen.return_value.url = (
"http://joss.theoj.org/papers/10.21105/joss.01277"
)
assert Dataverse().detect("https://doi.org/10.21105/joss.01277") is None
@contextmanager
def dv_archive(prefix="a_directory"):
with NamedTemporaryFile(suffix=".zip") as zfile:
with ZipFile(zfile.name, mode="w") as zip:
zip.writestr("{}/some-file.txt".format(prefix), "some content")
zip.writestr("{}/some-other-file.txt".format(prefix), "some more content")
yield zfile.name
def test_dataverse_fetch():
mock_response_ds_query = BytesIO(
json.dumps(
{
"data": {
"latestVersion": {
"files": [{"dataFile": {"id": 1}}, {"dataFile": {"id": 2}}]
}
}
}
).encode("utf-8")
)
spec = {"host": harvard_dv, "record": "doi:10.7910/DVN/6ZXAGT"}
dv = Dataverse()
with dv_archive() as data_local_path:
def mock_urlopen(self, req):
if isinstance(req, Request):
if "/api/datasets" in req.full_url:
return mock_response_ds_query
elif "/api/access/datafiles" in req.full_url:
assert req.full_url.endswith("1,2")
return urlopen("file://{}".format(data_local_path))
with patch.object(Dataverse, "urlopen", new=mock_urlopen):
with TemporaryDirectory() as d:
output = []
for l in dv.fetch(spec, d):
output.append(l)
unpacked_files = set(os.listdir(d))
expected = set(["some-other-file.txt", "some-file.txt"])
assert expected == unpacked_files