kopia lustrzana https://github.com/jupyterhub/repo2docker
Fix content_id for dataverse URLs
rodzic
b7050ba096
commit
fde74efc2e
|
@ -1,6 +1,7 @@
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
import hashlib
|
||||||
from urllib.parse import parse_qs, urlparse, urlunparse
|
from urllib.parse import parse_qs, urlparse, urlunparse
|
||||||
|
|
||||||
from ..utils import copytree, deep_get, is_doi
|
from ..utils import copytree, deep_get, is_doi
|
||||||
|
@ -56,6 +57,9 @@ class Dataverse(DoiProvider):
|
||||||
if host is None:
|
if host is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Used only for content_id
|
||||||
|
self.url = url
|
||||||
|
|
||||||
# At this point, we *know* this is a dataverse URL, because:
|
# At this point, we *know* this is a dataverse URL, because:
|
||||||
# 1. The DOI resolved to a particular host (if using DOI)
|
# 1. The DOI resolved to a particular host (if using DOI)
|
||||||
# 2. The host is in the list of known dataverse installations
|
# 2. The host is in the list of known dataverse installations
|
||||||
|
@ -84,9 +88,9 @@ class Dataverse(DoiProvider):
|
||||||
data = resp.json()["data"]
|
data = resp.json()["data"]
|
||||||
return data["datasetVersion"]["datasetPersistentId"]
|
return data["datasetVersion"]["datasetPersistentId"]
|
||||||
|
|
||||||
def get_persistent_id_from_url(self, url: str) -> str:
|
def get_datafiles(self, dataverse_host: str, url: str) -> list[dict]:
|
||||||
"""
|
"""
|
||||||
Return the persistentId for given dataverse URL.
|
Return a list of dataFiles for given persistent_id
|
||||||
|
|
||||||
Supports the following *dataset* URL styles:
|
Supports the following *dataset* URL styles:
|
||||||
- /citation: https://dataverse.harvard.edu/citation?persistentId=doi:10.7910/DVN/TJCLKP
|
- /citation: https://dataverse.harvard.edu/citation?persistentId=doi:10.7910/DVN/TJCLKP
|
||||||
|
@ -101,11 +105,6 @@ class Dataverse(DoiProvider):
|
||||||
If a URL can not be parsed, throw an exception
|
If a URL can not be parsed, throw an exception
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def get_datafiles(self, dataverse_host: str, url: str) -> list[dict]:
|
|
||||||
"""
|
|
||||||
Return a list of dataFiles for given persistent_id
|
|
||||||
"""
|
|
||||||
|
|
||||||
parsed_url = urlparse(url)
|
parsed_url = urlparse(url)
|
||||||
path = parsed_url.path
|
path = parsed_url.path
|
||||||
qs = parse_qs(parsed_url.query)
|
qs = parse_qs(parsed_url.query)
|
||||||
|
@ -156,9 +155,7 @@ class Dataverse(DoiProvider):
|
||||||
url = spec["url"]
|
url = spec["url"]
|
||||||
host = spec["host"]
|
host = spec["host"]
|
||||||
|
|
||||||
persistent_id = self.get_persistent_id_from_url(url)
|
yield f"Fetching Dataverse record {url}.\n"
|
||||||
|
|
||||||
yield f"Fetching Dataverse record {persistent_id}.\n"
|
|
||||||
|
|
||||||
for fobj in self.get_datafiles(host["url"], url):
|
for fobj in self.get_datafiles(host["url"], url):
|
||||||
file_url = (
|
file_url = (
|
||||||
|
@ -186,10 +183,7 @@ class Dataverse(DoiProvider):
|
||||||
copytree(os.path.join(output_dir, d), output_dir)
|
copytree(os.path.join(output_dir, d), output_dir)
|
||||||
shutil.rmtree(os.path.join(output_dir, d))
|
shutil.rmtree(os.path.join(output_dir, d))
|
||||||
|
|
||||||
# Save persistent id
|
|
||||||
self.persitent_id = persistent_id
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def content_id(self):
|
def content_id(self):
|
||||||
"""The Dataverse persistent identifier."""
|
"""The Dataverse persistent identifier."""
|
||||||
return self.persistent_id
|
return self.url
|
||||||
|
|
Ładowanie…
Reference in New Issue