repo2docker/repo2docker/contentproviders/zenodo.py

86 wiersze
3.0 KiB
Python

import os
import json
import shutil
from os import makedirs
from os import path
from urllib.request import Request
from urllib.error import HTTPError
from .doi import DoiProvider
from ..utils import copytree, deep_get
class Zenodo(DoiProvider):
"""Provide contents of a Zenodo deposit."""
def __init__(self):
super().__init__()
# We need the hostname (url where records are), api url (for metadata),
# filepath (path to files in metadata), filename (path to filename in
# metadata), download (path to file download URL), and type (path to item type in metadata)
self.hosts = [
{
"hostname": [
"https://sandbox.zenodo.org/record/",
"http://sandbox.zenodo.org/record/",
],
"api": "https://sandbox.zenodo.org/api/records/",
"filepath": "files",
"filename": "filename",
"download": "links.download",
"type": "metadata.upload_type",
},
{
"hostname": ["https://zenodo.org/record/", "http://zenodo.org/record/"],
"api": "https://zenodo.org/api/records/",
"filepath": "files",
"filename": "filename",
"download": "links.download",
"type": "metadata.upload_type",
},
{
"hostname": [
"https://data.caltech.edu/records/",
"http://data.caltech.edu/records/",
],
"api": "https://data.caltech.edu/api/record/",
"filepath": "metadata.electronic_location_and_access",
"filename": "electronic_name.0",
"download": "uniform_resource_identifier",
"type": "metadata.resourceType.resourceTypeGeneral",
},
]
def detect(self, doi, ref=None, extra_args=None):
"""Trigger this provider for things that resolve to a Zenodo/Invenio record"""
url = self.doi2url(doi)
for host in self.hosts:
if any([url.startswith(s) for s in host["hostname"]]):
self.record_id = url.rsplit("/", maxsplit=1)[1]
return {"record": self.record_id, "host": host}
def fetch(self, spec, output_dir, yield_output=False):
"""Fetch and unpack a Zenodo record"""
record_id = spec["record"]
host = spec["host"]
yield f"Fetching Zenodo record {record_id}.\n"
resp = self.urlopen(
f'{host["api"]}{record_id}',
headers={"accept": "application/json"},
)
record = resp.json()
files = deep_get(record, host["filepath"])
only_one_file = len(files) == 1
for file_ref in files:
yield from self.fetch_file(file_ref, host, output_dir, unzip=only_one_file)
@property
def content_id(self):
"""The Zenodo record ID as the content of a record is immutable"""
return self.record_id