reformatting with lint

pull/800/head
Scott Black 2019-09-30 13:50:24 -06:00
rodzic 21e61f3099
commit dddc45acfb
2 zmienionych plików z 72 dodań i 25 usunięć

Wyświetl plik

@ -16,22 +16,26 @@ class Hydroshare(DoiProvider):
"""Provide contents of a Hydroshare resource.""" """Provide contents of a Hydroshare resource."""
def detect(self, doi, ref=None, extra_args=None): def detect(self, doi, ref=None, extra_args=None):
"""Trigger this provider for things that resolve to a Zenodo/Invenio record""" """Trigger this provider for things that resolve to a Hydroshare resource"""
# We need the hostname (url where records are), api url (for metadata),
# filepath (path to files in metadata), filename (path to filename in
# metadata), download (path to file download URL), and type (path to item type in metadata)
hosts = [ hosts = [
{ {
"hostname": ["https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/"], "hostname": [
"https://www.hydroshare.org/resource/",
"http://www.hydroshare.org/resource/",
],
"django_irods": "https://www.hydroshare.org/django_irods/download/bags/", "django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
"version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements" "version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements",
}, }
] ]
def fetch_version(resource_id, host): def fetch_version(resource_id, host):
"""Fetch resource modified date and convert to epoch""" """Fetch resource modified date and convert to epoch"""
json_response = json.loads(self.urlopen(host["version"].format(self.resource_id)).read()) json_response = json.loads(
date = next(item for item in json_response["dates"] if item["type"] == "modified")["start_date"] self.urlopen(host["version"].format(self.resource_id)).read()
)
date = next(
item for item in json_response["dates"] if item["type"] == "modified"
)["start_date"]
date = date.split(".")[0] date = date.split(".")[0]
return str(int(time.mktime(time.strptime(date, "%Y-%m-%dT%H:%M:%S")))) return str(int(time.mktime(time.strptime(date, "%Y-%m-%dT%H:%M:%S"))))
@ -41,7 +45,11 @@ class Hydroshare(DoiProvider):
if any([url.startswith(s) for s in host["hostname"]]): if any([url.startswith(s) for s in host["hostname"]]):
self.resource_id = url.strip("/").rsplit("/", maxsplit=1)[1] self.resource_id = url.strip("/").rsplit("/", maxsplit=1)[1]
self.version = fetch_version(self.resource_id, host) self.version = fetch_version(self.resource_id, host)
return {"resource": self.resource_id, "host": host, "version": self.version} return {
"resource": self.resource_id,
"host": host,
"version": self.version,
}
def _urlretrieve(self, bag_url): def _urlretrieve(self, bag_url):
return urlretrieve(bag_url) return urlretrieve(bag_url)
@ -58,14 +66,19 @@ class Hydroshare(DoiProvider):
# bag downloads are prepared on demand and may need some time # bag downloads are prepared on demand and may need some time
conn = self.urlopen(bag_url) conn = self.urlopen(bag_url)
total_wait_time = 0 total_wait_time = 0
while conn.getcode() == 200 and conn.info().get_content_type() != "application/zip": while (
conn.getcode() == 200
and conn.info().get_content_type() != "application/zip"
):
wait_time = 10 wait_time = 10
total_wait_time += wait_time total_wait_time += wait_time
if total_wait_time > timeout: if total_wait_time > timeout:
msg = "Bag taking too long to prepare, exiting now, try again later." msg = "Bag taking too long to prepare, exiting now, try again later."
yield msg yield msg
raise ContentProviderException(msg) raise ContentProviderException(msg)
yield "Bag is being prepared, requesting again in {} seconds.\n".format(wait_time) yield "Bag is being prepared, requesting again in {} seconds.\n".format(
wait_time
)
time.sleep(wait_time) time.sleep(wait_time)
conn = self.urlopen(bag_url) conn = self.urlopen(bag_url)
if conn.getcode() != 200: if conn.getcode() != 200:
@ -75,7 +88,7 @@ class Hydroshare(DoiProvider):
# Bag creation seems to need a small time buffer after it says it's ready. # Bag creation seems to need a small time buffer after it says it's ready.
time.sleep(1) time.sleep(1)
filehandle, _ = self._urlretrieve(bag_url) filehandle, _ = self._urlretrieve(bag_url)
zip_file_object = zipfile.ZipFile(filehandle, 'r') zip_file_object = zipfile.ZipFile(filehandle, "r")
yield "Downloaded, unpacking contents.\n" yield "Downloaded, unpacking contents.\n"
zip_file_object.extractall("temp") zip_file_object.extractall("temp")
# resources store the contents in the data/contents directory, which is all we want to keep # resources store the contents in the data/contents directory, which is all we want to keep

Wyświetl plik

@ -15,15 +15,20 @@ from repo2docker.contentproviders.base import ContentProviderException
def test_content_id(): def test_content_id():
with patch.object(Hydroshare, "urlopen") as fake_urlopen: with patch.object(Hydroshare, "urlopen") as fake_urlopen:
fake_urlopen.return_value.url = "https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61" fake_urlopen.return_value.url = (
"https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61"
)
def read(): def read():
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}' return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
fake_urlopen.return_value.read = read fake_urlopen.return_value.read = read
hydro = Hydroshare() hydro = Hydroshare()
hydro.detect("10.4211/hs.b8f6eae9d89241cf8b5904033460af61") hydro.detect("10.4211/hs.b8f6eae9d89241cf8b5904033460af61")
assert hydro.content_id == "b8f6eae9d89241cf8b5904033460af61.v1569449357" assert hydro.content_id == "b8f6eae9d89241cf8b5904033460af61.v1569449357"
test_hosts = [ test_hosts = [
( (
[ [
@ -33,22 +38,28 @@ test_hosts = [
], ],
{ {
"host": { "host": {
"hostname": ["https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/"], "hostname": [
"https://www.hydroshare.org/resource/",
"http://www.hydroshare.org/resource/",
],
"django_irods": "https://www.hydroshare.org/django_irods/download/bags/", "django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
"version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements", "version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements",
}, },
"resource": "b8f6eae9d89241cf8b5904033460af61", "resource": "b8f6eae9d89241cf8b5904033460af61",
"version": "1569449357" "version": "1569449357",
}, },
), )
] ]
@pytest.mark.parametrize("test_input,expected", test_hosts) @pytest.mark.parametrize("test_input,expected", test_hosts)
def test_detect_hydroshare(test_input, expected): def test_detect_hydroshare(test_input, expected):
with patch.object(Hydroshare, "urlopen") as fake_urlopen: with patch.object(Hydroshare, "urlopen") as fake_urlopen:
fake_urlopen.return_value.url = test_input[0] fake_urlopen.return_value.url = test_input[0]
def read(): def read():
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}' return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
fake_urlopen.return_value.read = read fake_urlopen.return_value.read = read
# valid Hydroshare DOIs trigger this content provider # valid Hydroshare DOIs trigger this content provider
assert Hydroshare().detect(test_input[0]) == expected assert Hydroshare().detect(test_input[0]) == expected
@ -65,11 +76,14 @@ def test_detect_hydroshare(test_input, expected):
fake_urlopen.return_value.url = ( fake_urlopen.return_value.url = (
"http://joss.theoj.org/papers/10.21105/joss.01277" "http://joss.theoj.org/papers/10.21105/joss.01277"
) )
def read(): def read():
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}' return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
fake_urlopen.return_value.read = read fake_urlopen.return_value.read = read
assert Hydroshare().detect("https://doi.org/10.21105/joss.01277") is None assert Hydroshare().detect("https://doi.org/10.21105/joss.01277") is None
@contextmanager @contextmanager
def hydroshare_archive(prefix="b8f6eae9d89241cf8b5904033460af61/data/contents"): def hydroshare_archive(prefix="b8f6eae9d89241cf8b5904033460af61/data/contents"):
with NamedTemporaryFile(suffix=".zip") as zfile: with NamedTemporaryFile(suffix=".zip") as zfile:
@ -79,6 +93,7 @@ def hydroshare_archive(prefix="b8f6eae9d89241cf8b5904033460af61/data/contents"):
yield zfile yield zfile
class MockInfo: class MockInfo:
def __init__(self, content_type): def __init__(self, content_type):
self.content_type = content_type self.content_type = content_type
@ -86,6 +101,7 @@ class MockInfo:
def get_content_type(self): def get_content_type(self):
return self.content_type return self.content_type
class MockResponse: class MockResponse:
def __init__(self, content_type, status_code): def __init__(self, content_type, status_code):
self.content_type = content_type self.content_type = content_type
@ -101,11 +117,21 @@ class MockResponse:
def read(): def read():
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}' return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
def test_fetch_bag(): def test_fetch_bag():
# we "fetch" a local ZIP file to simulate a Hydroshare resource # we "fetch" a local ZIP file to simulate a Hydroshare resource
with hydroshare_archive() as hydro_path: with hydroshare_archive() as hydro_path:
with patch.object(Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 200), MockResponse("application/zip", 200)]): with patch.object(
with patch.object(Hydroshare, "_urlretrieve", side_effect=[(hydro_path, None)]): Hydroshare,
"urlopen",
side_effect=[
MockResponse("application/html", 200),
MockResponse("application/zip", 200),
],
):
with patch.object(
Hydroshare, "_urlretrieve", side_effect=[(hydro_path, None)]
):
hydro = Hydroshare() hydro = Hydroshare()
hydro.resource_id = "b8f6eae9d89241cf8b5904033460af61" hydro.resource_id = "b8f6eae9d89241cf8b5904033460af61"
spec = { spec = {
@ -118,19 +144,22 @@ def test_fetch_bag():
}, },
"resource": "123456789", "resource": "123456789",
} }
with TemporaryDirectory() as d: with TemporaryDirectory() as d:
output = [] output = []
for l in hydro.fetch(spec, d): for l in hydro.fetch(spec, d):
output.append(l) output.append(l)
unpacked_files = set(os.listdir(d)) unpacked_files = set(os.listdir(d))
expected = set(["some-other-file.txt", "some-file.txt"]) expected = set(["some-other-file.txt", "some-file.txt"])
assert expected == unpacked_files assert expected == unpacked_files
def test_fetch_bag_failure(): def test_fetch_bag_failure():
with hydroshare_archive() as hydro_path: with hydroshare_archive() as hydro_path:
with patch.object(Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 500)]): with patch.object(
Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 500)]
):
hydro = Hydroshare() hydro = Hydroshare()
spec = { spec = {
"host": { "host": {
@ -152,9 +181,12 @@ def test_fetch_bag_failure():
except ContentProviderException: except ContentProviderException:
assert "Failed to download bag. status code 500.\n" == output[-1] assert "Failed to download bag. status code 500.\n" == output[-1]
def test_fetch_bag_timeout(): def test_fetch_bag_timeout():
with hydroshare_archive() as hydro_path: with hydroshare_archive() as hydro_path:
with patch.object(Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 200)]): with patch.object(
Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 200)]
):
hydro = Hydroshare() hydro = Hydroshare()
spec = { spec = {
"host": { "host": {
@ -174,5 +206,7 @@ def test_fetch_bag_timeout():
print("ContentProviderException should have been thrown") print("ContentProviderException should have been thrown")
assert False assert False
except ContentProviderException: except ContentProviderException:
assert "Bag taking too long to prepare, exiting now, try again later." == output[-1] assert (
"Bag taking too long to prepare, exiting now, try again later."
== output[-1]
)