kopia lustrzana https://github.com/jupyterhub/repo2docker
reformatting with lint
rodzic
21e61f3099
commit
dddc45acfb
|
@ -16,22 +16,26 @@ class Hydroshare(DoiProvider):
|
||||||
"""Provide contents of a Hydroshare resource."""
|
"""Provide contents of a Hydroshare resource."""
|
||||||
|
|
||||||
def detect(self, doi, ref=None, extra_args=None):
|
def detect(self, doi, ref=None, extra_args=None):
|
||||||
"""Trigger this provider for things that resolve to a Zenodo/Invenio record"""
|
"""Trigger this provider for things that resolve to a Hydroshare resource"""
|
||||||
# We need the hostname (url where records are), api url (for metadata),
|
|
||||||
# filepath (path to files in metadata), filename (path to filename in
|
|
||||||
# metadata), download (path to file download URL), and type (path to item type in metadata)
|
|
||||||
hosts = [
|
hosts = [
|
||||||
{
|
{
|
||||||
"hostname": ["https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/"],
|
"hostname": [
|
||||||
|
"https://www.hydroshare.org/resource/",
|
||||||
|
"http://www.hydroshare.org/resource/",
|
||||||
|
],
|
||||||
"django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
|
"django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
|
||||||
"version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements"
|
"version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements",
|
||||||
},
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def fetch_version(resource_id, host):
|
def fetch_version(resource_id, host):
|
||||||
"""Fetch resource modified date and convert to epoch"""
|
"""Fetch resource modified date and convert to epoch"""
|
||||||
json_response = json.loads(self.urlopen(host["version"].format(self.resource_id)).read())
|
json_response = json.loads(
|
||||||
date = next(item for item in json_response["dates"] if item["type"] == "modified")["start_date"]
|
self.urlopen(host["version"].format(self.resource_id)).read()
|
||||||
|
)
|
||||||
|
date = next(
|
||||||
|
item for item in json_response["dates"] if item["type"] == "modified"
|
||||||
|
)["start_date"]
|
||||||
date = date.split(".")[0]
|
date = date.split(".")[0]
|
||||||
return str(int(time.mktime(time.strptime(date, "%Y-%m-%dT%H:%M:%S"))))
|
return str(int(time.mktime(time.strptime(date, "%Y-%m-%dT%H:%M:%S"))))
|
||||||
|
|
||||||
|
@ -41,7 +45,11 @@ class Hydroshare(DoiProvider):
|
||||||
if any([url.startswith(s) for s in host["hostname"]]):
|
if any([url.startswith(s) for s in host["hostname"]]):
|
||||||
self.resource_id = url.strip("/").rsplit("/", maxsplit=1)[1]
|
self.resource_id = url.strip("/").rsplit("/", maxsplit=1)[1]
|
||||||
self.version = fetch_version(self.resource_id, host)
|
self.version = fetch_version(self.resource_id, host)
|
||||||
return {"resource": self.resource_id, "host": host, "version": self.version}
|
return {
|
||||||
|
"resource": self.resource_id,
|
||||||
|
"host": host,
|
||||||
|
"version": self.version,
|
||||||
|
}
|
||||||
|
|
||||||
def _urlretrieve(self, bag_url):
|
def _urlretrieve(self, bag_url):
|
||||||
return urlretrieve(bag_url)
|
return urlretrieve(bag_url)
|
||||||
|
@ -58,14 +66,19 @@ class Hydroshare(DoiProvider):
|
||||||
# bag downloads are prepared on demand and may need some time
|
# bag downloads are prepared on demand and may need some time
|
||||||
conn = self.urlopen(bag_url)
|
conn = self.urlopen(bag_url)
|
||||||
total_wait_time = 0
|
total_wait_time = 0
|
||||||
while conn.getcode() == 200 and conn.info().get_content_type() != "application/zip":
|
while (
|
||||||
|
conn.getcode() == 200
|
||||||
|
and conn.info().get_content_type() != "application/zip"
|
||||||
|
):
|
||||||
wait_time = 10
|
wait_time = 10
|
||||||
total_wait_time += wait_time
|
total_wait_time += wait_time
|
||||||
if total_wait_time > timeout:
|
if total_wait_time > timeout:
|
||||||
msg = "Bag taking too long to prepare, exiting now, try again later."
|
msg = "Bag taking too long to prepare, exiting now, try again later."
|
||||||
yield msg
|
yield msg
|
||||||
raise ContentProviderException(msg)
|
raise ContentProviderException(msg)
|
||||||
yield "Bag is being prepared, requesting again in {} seconds.\n".format(wait_time)
|
yield "Bag is being prepared, requesting again in {} seconds.\n".format(
|
||||||
|
wait_time
|
||||||
|
)
|
||||||
time.sleep(wait_time)
|
time.sleep(wait_time)
|
||||||
conn = self.urlopen(bag_url)
|
conn = self.urlopen(bag_url)
|
||||||
if conn.getcode() != 200:
|
if conn.getcode() != 200:
|
||||||
|
@ -75,7 +88,7 @@ class Hydroshare(DoiProvider):
|
||||||
# Bag creation seems to need a small time buffer after it says it's ready.
|
# Bag creation seems to need a small time buffer after it says it's ready.
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
filehandle, _ = self._urlretrieve(bag_url)
|
filehandle, _ = self._urlretrieve(bag_url)
|
||||||
zip_file_object = zipfile.ZipFile(filehandle, 'r')
|
zip_file_object = zipfile.ZipFile(filehandle, "r")
|
||||||
yield "Downloaded, unpacking contents.\n"
|
yield "Downloaded, unpacking contents.\n"
|
||||||
zip_file_object.extractall("temp")
|
zip_file_object.extractall("temp")
|
||||||
# resources store the contents in the data/contents directory, which is all we want to keep
|
# resources store the contents in the data/contents directory, which is all we want to keep
|
||||||
|
|
|
@ -15,15 +15,20 @@ from repo2docker.contentproviders.base import ContentProviderException
|
||||||
|
|
||||||
def test_content_id():
|
def test_content_id():
|
||||||
with patch.object(Hydroshare, "urlopen") as fake_urlopen:
|
with patch.object(Hydroshare, "urlopen") as fake_urlopen:
|
||||||
fake_urlopen.return_value.url = "https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61"
|
fake_urlopen.return_value.url = (
|
||||||
|
"https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61"
|
||||||
|
)
|
||||||
|
|
||||||
def read():
|
def read():
|
||||||
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
|
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
|
||||||
|
|
||||||
fake_urlopen.return_value.read = read
|
fake_urlopen.return_value.read = read
|
||||||
hydro = Hydroshare()
|
hydro = Hydroshare()
|
||||||
|
|
||||||
hydro.detect("10.4211/hs.b8f6eae9d89241cf8b5904033460af61")
|
hydro.detect("10.4211/hs.b8f6eae9d89241cf8b5904033460af61")
|
||||||
assert hydro.content_id == "b8f6eae9d89241cf8b5904033460af61.v1569449357"
|
assert hydro.content_id == "b8f6eae9d89241cf8b5904033460af61.v1569449357"
|
||||||
|
|
||||||
|
|
||||||
test_hosts = [
|
test_hosts = [
|
||||||
(
|
(
|
||||||
[
|
[
|
||||||
|
@ -33,22 +38,28 @@ test_hosts = [
|
||||||
],
|
],
|
||||||
{
|
{
|
||||||
"host": {
|
"host": {
|
||||||
"hostname": ["https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/"],
|
"hostname": [
|
||||||
|
"https://www.hydroshare.org/resource/",
|
||||||
|
"http://www.hydroshare.org/resource/",
|
||||||
|
],
|
||||||
"django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
|
"django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
|
||||||
"version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements",
|
"version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements",
|
||||||
},
|
},
|
||||||
"resource": "b8f6eae9d89241cf8b5904033460af61",
|
"resource": "b8f6eae9d89241cf8b5904033460af61",
|
||||||
"version": "1569449357"
|
"version": "1569449357",
|
||||||
},
|
},
|
||||||
),
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("test_input,expected", test_hosts)
|
@pytest.mark.parametrize("test_input,expected", test_hosts)
|
||||||
def test_detect_hydroshare(test_input, expected):
|
def test_detect_hydroshare(test_input, expected):
|
||||||
with patch.object(Hydroshare, "urlopen") as fake_urlopen:
|
with patch.object(Hydroshare, "urlopen") as fake_urlopen:
|
||||||
fake_urlopen.return_value.url = test_input[0]
|
fake_urlopen.return_value.url = test_input[0]
|
||||||
|
|
||||||
def read():
|
def read():
|
||||||
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
|
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
|
||||||
|
|
||||||
fake_urlopen.return_value.read = read
|
fake_urlopen.return_value.read = read
|
||||||
# valid Hydroshare DOIs trigger this content provider
|
# valid Hydroshare DOIs trigger this content provider
|
||||||
assert Hydroshare().detect(test_input[0]) == expected
|
assert Hydroshare().detect(test_input[0]) == expected
|
||||||
|
@ -65,11 +76,14 @@ def test_detect_hydroshare(test_input, expected):
|
||||||
fake_urlopen.return_value.url = (
|
fake_urlopen.return_value.url = (
|
||||||
"http://joss.theoj.org/papers/10.21105/joss.01277"
|
"http://joss.theoj.org/papers/10.21105/joss.01277"
|
||||||
)
|
)
|
||||||
|
|
||||||
def read():
|
def read():
|
||||||
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
|
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
|
||||||
|
|
||||||
fake_urlopen.return_value.read = read
|
fake_urlopen.return_value.read = read
|
||||||
assert Hydroshare().detect("https://doi.org/10.21105/joss.01277") is None
|
assert Hydroshare().detect("https://doi.org/10.21105/joss.01277") is None
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def hydroshare_archive(prefix="b8f6eae9d89241cf8b5904033460af61/data/contents"):
|
def hydroshare_archive(prefix="b8f6eae9d89241cf8b5904033460af61/data/contents"):
|
||||||
with NamedTemporaryFile(suffix=".zip") as zfile:
|
with NamedTemporaryFile(suffix=".zip") as zfile:
|
||||||
|
@ -79,6 +93,7 @@ def hydroshare_archive(prefix="b8f6eae9d89241cf8b5904033460af61/data/contents"):
|
||||||
|
|
||||||
yield zfile
|
yield zfile
|
||||||
|
|
||||||
|
|
||||||
class MockInfo:
|
class MockInfo:
|
||||||
def __init__(self, content_type):
|
def __init__(self, content_type):
|
||||||
self.content_type = content_type
|
self.content_type = content_type
|
||||||
|
@ -86,6 +101,7 @@ class MockInfo:
|
||||||
def get_content_type(self):
|
def get_content_type(self):
|
||||||
return self.content_type
|
return self.content_type
|
||||||
|
|
||||||
|
|
||||||
class MockResponse:
|
class MockResponse:
|
||||||
def __init__(self, content_type, status_code):
|
def __init__(self, content_type, status_code):
|
||||||
self.content_type = content_type
|
self.content_type = content_type
|
||||||
|
@ -101,11 +117,21 @@ class MockResponse:
|
||||||
def read():
|
def read():
|
||||||
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
|
return '{"dates": [{"type": "modified", "start_date": "2019-09-25T16:09:17.006152Z"}]}'
|
||||||
|
|
||||||
|
|
||||||
def test_fetch_bag():
|
def test_fetch_bag():
|
||||||
# we "fetch" a local ZIP file to simulate a Hydroshare resource
|
# we "fetch" a local ZIP file to simulate a Hydroshare resource
|
||||||
with hydroshare_archive() as hydro_path:
|
with hydroshare_archive() as hydro_path:
|
||||||
with patch.object(Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 200), MockResponse("application/zip", 200)]):
|
with patch.object(
|
||||||
with patch.object(Hydroshare, "_urlretrieve", side_effect=[(hydro_path, None)]):
|
Hydroshare,
|
||||||
|
"urlopen",
|
||||||
|
side_effect=[
|
||||||
|
MockResponse("application/html", 200),
|
||||||
|
MockResponse("application/zip", 200),
|
||||||
|
],
|
||||||
|
):
|
||||||
|
with patch.object(
|
||||||
|
Hydroshare, "_urlretrieve", side_effect=[(hydro_path, None)]
|
||||||
|
):
|
||||||
hydro = Hydroshare()
|
hydro = Hydroshare()
|
||||||
hydro.resource_id = "b8f6eae9d89241cf8b5904033460af61"
|
hydro.resource_id = "b8f6eae9d89241cf8b5904033460af61"
|
||||||
spec = {
|
spec = {
|
||||||
|
@ -118,19 +144,22 @@ def test_fetch_bag():
|
||||||
},
|
},
|
||||||
"resource": "123456789",
|
"resource": "123456789",
|
||||||
}
|
}
|
||||||
|
|
||||||
with TemporaryDirectory() as d:
|
with TemporaryDirectory() as d:
|
||||||
output = []
|
output = []
|
||||||
for l in hydro.fetch(spec, d):
|
for l in hydro.fetch(spec, d):
|
||||||
output.append(l)
|
output.append(l)
|
||||||
|
|
||||||
unpacked_files = set(os.listdir(d))
|
unpacked_files = set(os.listdir(d))
|
||||||
expected = set(["some-other-file.txt", "some-file.txt"])
|
expected = set(["some-other-file.txt", "some-file.txt"])
|
||||||
assert expected == unpacked_files
|
assert expected == unpacked_files
|
||||||
|
|
||||||
|
|
||||||
def test_fetch_bag_failure():
|
def test_fetch_bag_failure():
|
||||||
with hydroshare_archive() as hydro_path:
|
with hydroshare_archive() as hydro_path:
|
||||||
with patch.object(Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 500)]):
|
with patch.object(
|
||||||
|
Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 500)]
|
||||||
|
):
|
||||||
hydro = Hydroshare()
|
hydro = Hydroshare()
|
||||||
spec = {
|
spec = {
|
||||||
"host": {
|
"host": {
|
||||||
|
@ -152,9 +181,12 @@ def test_fetch_bag_failure():
|
||||||
except ContentProviderException:
|
except ContentProviderException:
|
||||||
assert "Failed to download bag. status code 500.\n" == output[-1]
|
assert "Failed to download bag. status code 500.\n" == output[-1]
|
||||||
|
|
||||||
|
|
||||||
def test_fetch_bag_timeout():
|
def test_fetch_bag_timeout():
|
||||||
with hydroshare_archive() as hydro_path:
|
with hydroshare_archive() as hydro_path:
|
||||||
with patch.object(Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 200)]):
|
with patch.object(
|
||||||
|
Hydroshare, "urlopen", side_effect=[MockResponse("application/html", 200)]
|
||||||
|
):
|
||||||
hydro = Hydroshare()
|
hydro = Hydroshare()
|
||||||
spec = {
|
spec = {
|
||||||
"host": {
|
"host": {
|
||||||
|
@ -174,5 +206,7 @@ def test_fetch_bag_timeout():
|
||||||
print("ContentProviderException should have been thrown")
|
print("ContentProviderException should have been thrown")
|
||||||
assert False
|
assert False
|
||||||
except ContentProviderException:
|
except ContentProviderException:
|
||||||
assert "Bag taking too long to prepare, exiting now, try again later." == output[-1]
|
assert (
|
||||||
|
"Bag taking too long to prepare, exiting now, try again later."
|
||||||
|
== output[-1]
|
||||||
|
)
|
||||||
|
|
Ładowanie…
Reference in New Issue