2019-09-25 15:59:44 +00:00
|
|
|
import os
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
from contextlib import contextmanager
|
|
|
|
from tempfile import TemporaryDirectory, NamedTemporaryFile
|
|
|
|
from unittest.mock import patch
|
|
|
|
from zipfile import ZipFile
|
2020-12-10 17:55:14 +00:00
|
|
|
import re
|
2019-09-25 15:59:44 +00:00
|
|
|
|
|
|
|
from repo2docker.contentproviders import Hydroshare
|
2019-09-25 20:37:19 +00:00
|
|
|
from repo2docker.contentproviders.base import ContentProviderException
|
2019-09-25 15:59:44 +00:00
|
|
|
|
|
|
|
|
2020-12-10 17:55:14 +00:00
|
|
|
doi_responses = {
|
|
|
|
"https://doi.org/10.4211/hs.b8f6eae9d89241cf8b5904033460af61": (
|
|
|
|
"https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61"
|
|
|
|
),
|
|
|
|
"https://doi.org/10.21105/joss.01277": (
|
|
|
|
"https://joss.theoj.org/papers/10.21105/joss.01277"
|
|
|
|
),
|
|
|
|
}
|
2019-09-30 19:50:24 +00:00
|
|
|
|
|
|
|
|
2020-12-10 17:55:14 +00:00
|
|
|
def doi_resolver(req, context):
|
|
|
|
resp = doi_responses.get(req.url)
|
|
|
|
# doi responses are redirects
|
|
|
|
if resp is not None:
|
|
|
|
context.status_code = 302
|
|
|
|
context.headers["Location"] = resp
|
|
|
|
return resp
|
2019-09-25 15:59:44 +00:00
|
|
|
|
|
|
|
|
2020-12-10 17:55:14 +00:00
|
|
|
hydroshare_data = {
|
|
|
|
"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]
|
|
|
|
}
|
2019-09-30 19:50:24 +00:00
|
|
|
|
2019-12-07 20:34:49 +00:00
|
|
|
|
2020-12-10 17:55:14 +00:00
|
|
|
def test_content_id(requests_mock):
|
2019-09-30 19:50:24 +00:00
|
|
|
|
2020-12-10 17:55:14 +00:00
|
|
|
requests_mock.get(re.compile("https://"), json=hydroshare_data)
|
|
|
|
requests_mock.get(re.compile("https://doi.org"), json=doi_resolver)
|
2019-09-30 19:50:24 +00:00
|
|
|
|
2020-12-10 17:55:14 +00:00
|
|
|
hydro = Hydroshare()
|
|
|
|
|
|
|
|
hydro.detect("10.4211/hs.b8f6eae9d89241cf8b5904033460af61")
|
|
|
|
assert hydro.content_id == "b8f6eae9d89241cf8b5904033460af61.v1569427757"
|
|
|
|
|
|
|
|
|
|
|
|
def test_detect_hydroshare(requests_mock):
|
|
|
|
requests_mock.get(re.compile("https://"), json=hydroshare_data)
|
|
|
|
requests_mock.get(re.compile("https://doi.org"), json=doi_resolver)
|
|
|
|
|
|
|
|
# valid Hydroshare DOIs trigger this content provider
|
|
|
|
expected = {
|
|
|
|
"host": {
|
|
|
|
"hostname": [
|
|
|
|
"https://www.hydroshare.org/resource/",
|
|
|
|
"http://www.hydroshare.org/resource/",
|
|
|
|
],
|
|
|
|
"django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
|
|
|
|
"version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements",
|
|
|
|
},
|
|
|
|
"resource": "b8f6eae9d89241cf8b5904033460af61",
|
|
|
|
"version": "1569427757",
|
|
|
|
}
|
|
|
|
|
|
|
|
assert (
|
|
|
|
Hydroshare().detect(
|
|
|
|
"https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61"
|
|
|
|
)
|
|
|
|
== expected
|
|
|
|
)
|
|
|
|
# assert a call to urlopen was called to fetch version
|
|
|
|
assert requests_mock.call_count == 1
|
|
|
|
requests_mock.reset_mock()
|
|
|
|
|
|
|
|
assert (
|
|
|
|
Hydroshare().detect("10.4211/hs.b8f6eae9d89241cf8b5904033460af61") == expected
|
|
|
|
)
|
|
|
|
# assert 3 calls were made, 2 to resolve the DOI (302 + 200) and another to fetch the version
|
|
|
|
assert requests_mock.call_count == 3
|
|
|
|
requests_mock.reset_mock()
|
|
|
|
|
|
|
|
assert (
|
|
|
|
Hydroshare().detect(
|
|
|
|
"https://doi.org/10.4211/hs.b8f6eae9d89241cf8b5904033460af61"
|
|
|
|
)
|
|
|
|
== expected
|
|
|
|
)
|
|
|
|
# assert 3 more calls were made, 2 to resolve the DOI and another to fetch the version
|
|
|
|
assert requests_mock.call_count == 3
|
|
|
|
requests_mock.reset_mock()
|
|
|
|
|
|
|
|
# Don't trigger the Hydroshare content provider
|
|
|
|
assert Hydroshare().detect("/some/path/here") is None
|
|
|
|
assert Hydroshare().detect("https://example.com/path/here") is None
|
|
|
|
|
|
|
|
# don't handle DOIs that aren't from Hydroshare
|
|
|
|
assert Hydroshare().detect("https://doi.org/10.21105/joss.01277") is None
|
2019-09-25 15:59:44 +00:00
|
|
|
|
2019-09-30 19:50:24 +00:00
|
|
|
|
2019-09-25 15:59:44 +00:00
|
|
|
@contextmanager
|
|
|
|
def hydroshare_archive(prefix="b8f6eae9d89241cf8b5904033460af61/data/contents"):
|
|
|
|
with NamedTemporaryFile(suffix=".zip") as zfile:
|
|
|
|
with ZipFile(zfile.name, mode="w") as zip:
|
|
|
|
zip.writestr("{}/some-file.txt".format(prefix), "some content")
|
|
|
|
zip.writestr("{}/some-other-file.txt".format(prefix), "some more content")
|
|
|
|
|
|
|
|
yield zfile
|
|
|
|
|
2019-09-30 19:50:24 +00:00
|
|
|
|
2019-09-25 20:37:19 +00:00
|
|
|
class MockResponse:
|
|
|
|
def __init__(self, content_type, status_code):
|
|
|
|
self.status_code = status_code
|
2021-04-01 13:31:19 +00:00
|
|
|
self.headers = dict()
|
|
|
|
self.headers["content-type"] = content_type
|
2019-09-25 20:37:19 +00:00
|
|
|
|
2019-09-30 19:50:24 +00:00
|
|
|
|
2019-09-25 15:59:44 +00:00
|
|
|
def test_fetch_bag():
|
|
|
|
# we "fetch" a local ZIP file to simulate a Hydroshare resource
|
|
|
|
with hydroshare_archive() as hydro_path:
|
2019-09-30 19:50:24 +00:00
|
|
|
with patch.object(
|
|
|
|
Hydroshare,
|
|
|
|
"urlopen",
|
|
|
|
side_effect=[
|
|
|
|
MockResponse("application/html", 200),
|
|
|
|
MockResponse("application/zip", 200),
|
|
|
|
],
|
|
|
|
):
|
|
|
|
with patch.object(
|
|
|
|
Hydroshare, "_urlretrieve", side_effect=[(hydro_path, None)]
|
|
|
|
):
|
2019-09-25 15:59:44 +00:00
|
|
|
hydro = Hydroshare()
|
|
|
|
hydro.resource_id = "b8f6eae9d89241cf8b5904033460af61"
|
|
|
|
spec = {
|
|
|
|
"host": {
|
|
|
|
"hostname": [
|
|
|
|
"https://www.hydroshare.org/resource/",
|
|
|
|
"http://www.hydroshare.org/resource/",
|
|
|
|
],
|
|
|
|
"django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
|
|
|
|
},
|
|
|
|
"resource": "123456789",
|
|
|
|
}
|
2019-09-30 19:50:24 +00:00
|
|
|
|
2019-09-25 15:59:44 +00:00
|
|
|
with TemporaryDirectory() as d:
|
|
|
|
output = []
|
|
|
|
for l in hydro.fetch(spec, d):
|
|
|
|
output.append(l)
|
2019-09-30 19:50:24 +00:00
|
|
|
|
2019-09-25 15:59:44 +00:00
|
|
|
unpacked_files = set(os.listdir(d))
|
|
|
|
expected = set(["some-other-file.txt", "some-file.txt"])
|
|
|
|
assert expected == unpacked_files
|
|
|
|
|
2019-09-30 19:50:24 +00:00
|
|
|
|
2019-09-25 15:59:44 +00:00
|
|
|
def test_fetch_bag_failure():
|
2019-12-07 20:34:49 +00:00
|
|
|
with hydroshare_archive():
|
2019-09-30 19:50:24 +00:00
|
|
|
with patch.object(
|
|
|
|
Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 500)]
|
|
|
|
):
|
2019-09-25 15:59:44 +00:00
|
|
|
hydro = Hydroshare()
|
|
|
|
spec = {
|
|
|
|
"host": {
|
|
|
|
"hostname": [
|
|
|
|
"https://www.hydroshare.org/resource/",
|
|
|
|
"http://www.hydroshare.org/resource/",
|
|
|
|
],
|
|
|
|
"django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
|
|
|
|
},
|
|
|
|
"resource": "123456789",
|
|
|
|
}
|
|
|
|
with TemporaryDirectory() as d:
|
2019-12-07 20:34:49 +00:00
|
|
|
with pytest.raises(
|
|
|
|
ContentProviderException,
|
|
|
|
match=r"Failed to download bag\. status code 500\.",
|
|
|
|
):
|
|
|
|
# loop for yield statements
|
|
|
|
for l in hydro.fetch(spec, d):
|
|
|
|
pass
|
2019-09-25 15:59:44 +00:00
|
|
|
|
2019-09-30 19:50:24 +00:00
|
|
|
|
2019-09-25 15:59:44 +00:00
|
|
|
def test_fetch_bag_timeout():
|
2019-12-07 20:34:49 +00:00
|
|
|
with hydroshare_archive():
|
2019-09-30 19:50:24 +00:00
|
|
|
with patch.object(
|
|
|
|
Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 200)]
|
|
|
|
):
|
2019-09-25 15:59:44 +00:00
|
|
|
hydro = Hydroshare()
|
|
|
|
spec = {
|
|
|
|
"host": {
|
|
|
|
"hostname": [
|
|
|
|
"https://www.hydroshare.org/resource/",
|
|
|
|
"http://www.hydroshare.org/resource/",
|
|
|
|
],
|
|
|
|
"django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
|
|
|
|
},
|
|
|
|
"resource": "123456789",
|
|
|
|
}
|
|
|
|
with TemporaryDirectory() as d:
|
2019-12-07 20:34:49 +00:00
|
|
|
with pytest.raises(
|
|
|
|
ContentProviderException,
|
|
|
|
match=r"Bag taking too long to prepare, exiting now, try again later\.",
|
|
|
|
):
|
|
|
|
# loop for yield statements
|
|
|
|
for l in hydro.fetch(spec, d, timeout=0):
|
|
|
|
pass
|