add hydroshare resource versioning and raise exception fetch fails

pull/800/head
Scott Black 2019-09-25 14:37:19 -06:00
rodzic 7fcb67f06d
commit 21e61f3099
2 zmienionych plików z 52 dodań i 30 usunięć

Wyświetl plik

@ -29,11 +29,11 @@ class Hydroshare(DoiProvider):
]
def fetch_version(resource_id, host):
"""Fetch resource modified date and convert to epoch int"""
"""Fetch resource modified date and convert to epoch"""
json_response = json.loads(self.urlopen(host["version"].format(self.resource_id)).read())
date = next(item for item in json_response["dates"] if item["type"] == "modified")["start_date"]
date = date.split(".")[0]
return int(time.mktime(time.strptime(date, "%Y-%m-%dT%H:%M:%S")))
return str(int(time.mktime(time.strptime(date, "%Y-%m-%dT%H:%M:%S"))))
url = self.doi2url(doi)

Wyświetl plik

@ -10,16 +10,19 @@ from urllib.request import urlopen, Request, urlretrieve
from zipfile import ZipFile
from repo2docker.contentproviders import Hydroshare
from repo2docker.contentproviders.base import ContentProviderException
def test_content_id():
with patch.object(Hydroshare, "urlopen") as fake_urlopen:
fake_urlopen.return_value.url = "https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61"
def read():
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
fake_urlopen.return_value.read = read
hydro = Hydroshare()
hydro.detect("10.4211/hs.b8f6eae9d89241cf8b5904033460af61")
assert hydro.content_id == "b8f6eae9d89241cf8b5904033460af61"
assert hydro.content_id == "b8f6eae9d89241cf8b5904033460af61.v1569449357"
test_hosts = [
(
@ -32,41 +35,27 @@ test_hosts = [
"host": {
"hostname": ["https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/"],
"django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
"version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements",
},
"resource": "b8f6eae9d89241cf8b5904033460af61",
"version": "1569449357"
},
),
]
class MockInfo:
def __init__(self, content_type):
self.content_type = content_type
def get_content_type(self):
return self.content_type
class MockResponse:
def __init__(self, content_type, status_code):
self.content_type = content_type
self.status_code = status_code
self.mock_info = MockInfo(self.content_type)
def getcode(self):
return self.status_code
def info(self):
return self.mock_info
@pytest.mark.parametrize("test_input,expected", test_hosts)
def test_detect_hydroshare(test_input, expected):
with patch.object(Hydroshare, "urlopen") as fake_urlopen:
fake_urlopen.return_value.url = test_input[0]
def read():
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
fake_urlopen.return_value.read = read
# valid Hydroshare DOIs trigger this content provider
assert Hydroshare().detect(test_input[0]) == expected
assert Hydroshare().detect(test_input[1]) == expected
assert Hydroshare().detect(test_input[2]) == expected
# only two of the three calls above have to resolve a DOI
assert fake_urlopen.call_count == 2
assert fake_urlopen.call_count == 5
with patch.object(Hydroshare, "urlopen") as fake_urlopen:
# Don't trigger the Hydroshare content provider
@ -76,6 +65,9 @@ def test_detect_hydroshare(test_input, expected):
fake_urlopen.return_value.url = (
"http://joss.theoj.org/papers/10.21105/joss.01277"
)
def read():
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
fake_urlopen.return_value.read = read
assert Hydroshare().detect("https://doi.org/10.21105/joss.01277") is None
@contextmanager
@ -87,6 +79,28 @@ def hydroshare_archive(prefix="b8f6eae9d89241cf8b5904033460af61/data/contents"):
yield zfile
class MockInfo:
def __init__(self, content_type):
self.content_type = content_type
def get_content_type(self):
return self.content_type
class MockResponse:
def __init__(self, content_type, status_code):
self.content_type = content_type
self.status_code = status_code
self.mock_info = MockInfo(self.content_type)
def getcode(self):
return self.status_code
def info(self):
return self.mock_info
def read():
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
def test_fetch_bag():
# we "fetch" a local ZIP file to simulate a Hydroshare resource
with hydroshare_archive() as hydro_path:
@ -130,9 +144,13 @@ def test_fetch_bag_failure():
}
with TemporaryDirectory() as d:
output = []
for l in hydro.fetch(spec, d):
output.append(l)
assert "Failed to download bag. status code 500.\n" == output[-1]
try:
for l in hydro.fetch(spec, d):
output.append(l)
print("ContentProviderException should have been thrown")
assert False
except ContentProviderException:
assert "Failed to download bag. status code 500.\n" == output[-1]
def test_fetch_bag_timeout():
with hydroshare_archive() as hydro_path:
@ -150,7 +168,11 @@ def test_fetch_bag_timeout():
}
with TemporaryDirectory() as d:
output = []
for l in hydro.fetch(spec, d, timeout=0):
output.append(l)
assert "Bag taking too long to prepare, exiting now, try again later." == output[-1]
try:
for l in hydro.fetch(spec, d, timeout=0):
output.append(l)
print("ContentProviderException should have been thrown")
assert False
except ContentProviderException:
assert "Bag taking too long to prepare, exiting now, try again later." == output[-1]