repo2docker/repo2docker/contentproviders/zenodo.py

84 wiersze
3.0 KiB
Python
Czysty Zwykły widok Historia

2019-05-27 15:32:03 +00:00
import json
import os
2019-05-27 15:32:03 +00:00
import shutil
from os import makedirs, path
2019-06-20 20:22:17 +00:00
from urllib.error import HTTPError
from urllib.request import Request
2019-05-27 15:32:03 +00:00
2019-06-20 20:22:17 +00:00
from ..utils import copytree, deep_get
from .doi import DoiProvider
2019-05-27 15:32:03 +00:00
class Zenodo(DoiProvider):
2019-05-27 15:32:03 +00:00
"""Provide contents of a Zenodo deposit."""
2019-09-08 19:39:07 +00:00
def __init__(self):
super().__init__()
2019-09-08 19:39:07 +00:00
# We need the hostname (url where records are), api url (for metadata),
# filepath (path to files in metadata), filename (path to filename in
# metadata), download (path to file download URL), and type (path to item type in metadata)
self.hosts = [
{
"hostname": [
"https://sandbox.zenodo.org/record/",
"http://sandbox.zenodo.org/record/",
],
"api": "https://sandbox.zenodo.org/api/records/",
"filepath": "files",
"filename": "filename",
"download": "links.download",
"type": "metadata.upload_type",
},
2019-09-08 19:39:07 +00:00
{
"hostname": ["https://zenodo.org/record/", "http://zenodo.org/record/"],
"api": "https://zenodo.org/api/records/",
"filepath": "files",
"filename": "filename",
"download": "links.download",
"type": "metadata.upload_type",
},
{
"hostname": [
"https://data.caltech.edu/records/",
"http://data.caltech.edu/records/",
],
"api": "https://data.caltech.edu/api/record/",
"filepath": "metadata.electronic_location_and_access",
"filename": "electronic_name.0",
"download": "uniform_resource_identifier",
"type": "metadata.resourceType.resourceTypeGeneral",
},
]
2019-09-08 09:54:06 +00:00
2019-05-27 15:32:03 +00:00
def detect(self, doi, ref=None, extra_args=None):
2019-06-13 18:09:34 +00:00
"""Trigger this provider for things that resolve to a Zenodo/Invenio record"""
url = self.doi2url(doi)
2019-05-27 15:32:03 +00:00
2019-09-08 09:54:06 +00:00
for host in self.hosts:
2019-06-13 18:14:16 +00:00
if any([url.startswith(s) for s in host["hostname"]]):
2019-06-13 18:27:24 +00:00
self.record_id = url.rsplit("/", maxsplit=1)[1]
2019-06-13 18:09:34 +00:00
return {"record": self.record_id, "host": host}
2019-06-13 18:14:16 +00:00
2019-05-27 15:32:03 +00:00
def fetch(self, spec, output_dir, yield_output=False):
2019-05-28 17:28:05 +00:00
"""Fetch and unpack a Zenodo record"""
2019-05-28 17:10:32 +00:00
record_id = spec["record"]
2019-06-13 18:09:34 +00:00
host = spec["host"]
2019-05-27 15:32:03 +00:00
yield f"Fetching Zenodo record {record_id}.\n"
resp = self.urlopen(
f'{host["api"]}{record_id}',
2019-05-28 17:10:32 +00:00
headers={"accept": "application/json"},
)
2019-05-27 15:32:03 +00:00
record = resp.json()
2019-05-27 15:32:03 +00:00
2019-06-20 20:22:17 +00:00
files = deep_get(record, host["filepath"])
only_one_file = len(files) == 1
for file_ref in files:
yield from self.fetch_file(file_ref, host, output_dir, unzip=only_one_file)
2019-05-27 15:32:03 +00:00
@property
def content_id(self):
2019-05-28 17:28:05 +00:00
"""The Zenodo record ID as the content of a record is immutable"""
2019-05-27 15:32:03 +00:00
return self.record_id