reformatting with lint

pull/800/head
Scott Black 2019-09-30 13:50:24 -06:00
rodzic 21e61f3099
commit dddc45acfb
2 zmienionych plików z 72 dodań i 25 usunięć

Wyświetl plik

@ -16,22 +16,26 @@ class Hydroshare(DoiProvider):
"""Provide contents of a Hydroshare resource."""
def detect(self, doi, ref=None, extra_args=None):
"""Trigger this provider for things that resolve to a Zenodo/Invenio record"""
# We need the hostname (url where records are), api url (for metadata),
# filepath (path to files in metadata), filename (path to filename in
# metadata), download (path to file download URL), and type (path to item type in metadata)
"""Trigger this provider for things that resolve to a Hydroshare resource"""
hosts = [
{
"hostname": ["https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/"],
"hostname": [
"https://www.hydroshare.org/resource/",
"http://www.hydroshare.org/resource/",
],
"django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
"version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements"
},
"version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements",
}
]
def fetch_version(resource_id, host):
"""Fetch resource modified date and convert to epoch"""
json_response = json.loads(self.urlopen(host["version"].format(self.resource_id)).read())
date = next(item for item in json_response["dates"] if item["type"] == "modified")["start_date"]
json_response = json.loads(
self.urlopen(host["version"].format(self.resource_id)).read()
)
date = next(
item for item in json_response["dates"] if item["type"] == "modified"
)["start_date"]
date = date.split(".")[0]
return str(int(time.mktime(time.strptime(date, "%Y-%m-%dT%H:%M:%S"))))
@ -41,7 +45,11 @@ class Hydroshare(DoiProvider):
if any([url.startswith(s) for s in host["hostname"]]):
self.resource_id = url.strip("/").rsplit("/", maxsplit=1)[1]
self.version = fetch_version(self.resource_id, host)
return {"resource": self.resource_id, "host": host, "version": self.version}
return {
"resource": self.resource_id,
"host": host,
"version": self.version,
}
def _urlretrieve(self, bag_url):
return urlretrieve(bag_url)
@ -58,14 +66,19 @@ class Hydroshare(DoiProvider):
# bag downloads are prepared on demand and may need some time
conn = self.urlopen(bag_url)
total_wait_time = 0
while conn.getcode() == 200 and conn.info().get_content_type() != "application/zip":
while (
conn.getcode() == 200
and conn.info().get_content_type() != "application/zip"
):
wait_time = 10
total_wait_time += wait_time
if total_wait_time > timeout:
msg = "Bag taking too long to prepare, exiting now, try again later."
yield msg
raise ContentProviderException(msg)
yield "Bag is being prepared, requesting again in {} seconds.\n".format(wait_time)
yield "Bag is being prepared, requesting again in {} seconds.\n".format(
wait_time
)
time.sleep(wait_time)
conn = self.urlopen(bag_url)
if conn.getcode() != 200:
@ -75,7 +88,7 @@ class Hydroshare(DoiProvider):
# Bag creation seems to need a small time buffer after it says it's ready.
time.sleep(1)
filehandle, _ = self._urlretrieve(bag_url)
zip_file_object = zipfile.ZipFile(filehandle, 'r')
zip_file_object = zipfile.ZipFile(filehandle, "r")
yield "Downloaded, unpacking contents.\n"
zip_file_object.extractall("temp")
# resources store the contents in the data/contents directory, which is all we want to keep

Wyświetl plik

@ -15,15 +15,20 @@ from repo2docker.contentproviders.base import ContentProviderException
def test_content_id():
with patch.object(Hydroshare, "urlopen") as fake_urlopen:
fake_urlopen.return_value.url = "https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61"
fake_urlopen.return_value.url = (
"https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61"
)
def read():
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
fake_urlopen.return_value.read = read
hydro = Hydroshare()
hydro.detect("10.4211/hs.b8f6eae9d89241cf8b5904033460af61")
assert hydro.content_id == "b8f6eae9d89241cf8b5904033460af61.v1569449357"
test_hosts = [
(
[
@ -33,22 +38,28 @@ test_hosts = [
],
{
"host": {
"hostname": ["https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/"],
"hostname": [
"https://www.hydroshare.org/resource/",
"http://www.hydroshare.org/resource/",
],
"django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
"version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements",
},
"resource": "b8f6eae9d89241cf8b5904033460af61",
"version": "1569449357"
"version": "1569449357",
},
),
)
]
@pytest.mark.parametrize("test_input,expected", test_hosts)
def test_detect_hydroshare(test_input, expected):
with patch.object(Hydroshare, "urlopen") as fake_urlopen:
fake_urlopen.return_value.url = test_input[0]
def read():
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
fake_urlopen.return_value.read = read
# valid Hydroshare DOIs trigger this content provider
assert Hydroshare().detect(test_input[0]) == expected
@ -65,11 +76,14 @@ def test_detect_hydroshare(test_input, expected):
fake_urlopen.return_value.url = (
"http://joss.theoj.org/papers/10.21105/joss.01277"
)
def read():
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
fake_urlopen.return_value.read = read
assert Hydroshare().detect("https://doi.org/10.21105/joss.01277") is None
@contextmanager
def hydroshare_archive(prefix="b8f6eae9d89241cf8b5904033460af61/data/contents"):
with NamedTemporaryFile(suffix=".zip") as zfile:
@ -79,6 +93,7 @@ def hydroshare_archive(prefix="b8f6eae9d89241cf8b5904033460af61/data/contents"):
yield zfile
class MockInfo:
def __init__(self, content_type):
self.content_type = content_type
@ -86,6 +101,7 @@ class MockInfo:
def get_content_type(self):
return self.content_type
class MockResponse:
def __init__(self, content_type, status_code):
self.content_type = content_type
@ -101,11 +117,21 @@ class MockResponse:
def read():
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
def test_fetch_bag():
# we "fetch" a local ZIP file to simulate a Hydroshare resource
with hydroshare_archive() as hydro_path:
with patch.object(Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 200), MockResponse("application/zip", 200)]):
with patch.object(Hydroshare, "_urlretrieve", side_effect=[(hydro_path, None)]):
with patch.object(
Hydroshare,
"urlopen",
side_effect=[
MockResponse("application/html", 200),
MockResponse("application/zip", 200),
],
):
with patch.object(
Hydroshare, "_urlretrieve", side_effect=[(hydro_path, None)]
):
hydro = Hydroshare()
hydro.resource_id = "b8f6eae9d89241cf8b5904033460af61"
spec = {
@ -118,19 +144,22 @@ def test_fetch_bag():
},
"resource": "123456789",
}
with TemporaryDirectory() as d:
output = []
for l in hydro.fetch(spec, d):
output.append(l)
unpacked_files = set(os.listdir(d))
expected = set(["some-other-file.txt", "some-file.txt"])
assert expected == unpacked_files
def test_fetch_bag_failure():
with hydroshare_archive() as hydro_path:
with patch.object(Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 500)]):
with patch.object(
Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 500)]
):
hydro = Hydroshare()
spec = {
"host": {
@ -152,9 +181,12 @@ def test_fetch_bag_failure():
except ContentProviderException:
assert "Failed to download bag. status code 500.\n" == output[-1]
def test_fetch_bag_timeout():
with hydroshare_archive() as hydro_path:
with patch.object(Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 200)]):
with patch.object(
Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 200)]
):
hydro = Hydroshare()
spec = {
"host": {
@ -174,5 +206,7 @@ def test_fetch_bag_timeout():
print("ContentProviderException should have been thrown")
assert False
except ContentProviderException:
assert "Bag taking too long to prepare, exiting now, try again later." == output[-1]
assert (
"Bag taking too long to prepare, exiting now, try again later."
== output[-1]
)