kopia lustrzana https://github.com/jupyterhub/repo2docker
Add basic Zenodo content provider
rodzic
51898274f8
commit
dce6c1e8d7
|
@ -136,6 +136,7 @@ class Repo2Docker(Application):
|
||||||
content_providers = List(
|
content_providers = List(
|
||||||
[
|
[
|
||||||
contentproviders.Local,
|
contentproviders.Local,
|
||||||
|
contentproviders.Zenodo,
|
||||||
contentproviders.Git,
|
contentproviders.Git,
|
||||||
],
|
],
|
||||||
config=True,
|
config=True,
|
||||||
|
|
|
@ -1,2 +1,3 @@
|
||||||
from .git import Git
|
from .git import Git
|
||||||
from .base import Local
|
from .base import Local
|
||||||
|
from .zenodo import Zenodo
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
import json
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
from os import makedirs
|
||||||
|
from os import path
|
||||||
|
from urllib.request import urlopen, Request
|
||||||
|
from zipfile import ZipFile, is_zipfile
|
||||||
|
|
||||||
|
from .base import ContentProvider
|
||||||
|
from ..utils import copytree
|
||||||
|
|
||||||
|
|
||||||
|
class Zenodo(ContentProvider):
|
||||||
|
"""Provide contents of a Zenodo deposit."""
|
||||||
|
|
||||||
|
def detect(self, doi, ref=None, extra_args=None):
|
||||||
|
# 10.5281 is the Zenodo DOI prefix
|
||||||
|
if doi.startswith('10.5281'):
|
||||||
|
resp = urlopen("https://doi.org/{}".format(doi))
|
||||||
|
self.record_id = resp.url.rsplit("/", maxsplit=1)[1]
|
||||||
|
return {'record': self.record_id}
|
||||||
|
|
||||||
|
def fetch(self, spec, output_dir, yield_output=False):
|
||||||
|
record_id = spec['record']
|
||||||
|
|
||||||
|
yield "Fetching Zenodo record {}.\n".format(record_id)
|
||||||
|
req = Request("https://zenodo.org/api/records/{}".format(record_id),
|
||||||
|
headers={"accept": "application/json"})
|
||||||
|
resp = urlopen(req)
|
||||||
|
|
||||||
|
record = json.loads(resp.read().decode("utf-8"))
|
||||||
|
|
||||||
|
def _fetch(file_ref, unzip=False):
|
||||||
|
with urlopen(file_ref["links"]["download"]) as src:
|
||||||
|
fname = file_ref["filename"]
|
||||||
|
sub_dir = path.join(output_dir, path.dirname(fname))
|
||||||
|
if not path.exists(sub_dir):
|
||||||
|
print("Creating", sub_dir)
|
||||||
|
makedirs(sub_dir, exist_ok=True)
|
||||||
|
|
||||||
|
dst_fname = path.join(output_dir, fname)
|
||||||
|
with open(dst_fname, "wb") as dst:
|
||||||
|
yield "Fetching {}\n".format(fname)
|
||||||
|
shutil.copyfileobj(src, dst)
|
||||||
|
|
||||||
|
# first close the newly written file, then continue
|
||||||
|
# processing it
|
||||||
|
if unzip and is_zipfile(dst_fname):
|
||||||
|
zfile = ZipFile(dst_fname)
|
||||||
|
zfile.extractall(path=output_dir)
|
||||||
|
zfile.close()
|
||||||
|
import os
|
||||||
|
d = os.listdir(output_dir)[0]
|
||||||
|
print(output_dir)
|
||||||
|
print(os.listdir(output_dir))
|
||||||
|
copytree(path.join(output_dir, d), output_dir)
|
||||||
|
shutil.rmtree(sub_dir)
|
||||||
|
shutil.rmtree(path.join(output_dir, d))
|
||||||
|
|
||||||
|
is_software = record["metadata"]["upload_type"] == "software"
|
||||||
|
only_one_file = len(record["files"]) == 1
|
||||||
|
for file_ref in record['files']:
|
||||||
|
for line in _fetch(file_ref, unzip=is_software and only_one_file):
|
||||||
|
yield line
|
||||||
|
|
||||||
|
import pdb; pdb.set_trace()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def content_id(self):
|
||||||
|
"""A unique ID to represent the version of the content.
|
||||||
|
Uses the first seven characters of the git commit ID of the repository.
|
||||||
|
"""
|
||||||
|
return self.record_id
|
|
@ -4,6 +4,8 @@ import os
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
|
from shutil import copystat, copy2
|
||||||
|
|
||||||
from traitlets import Integer, TraitError
|
from traitlets import Integer, TraitError
|
||||||
|
|
||||||
|
|
||||||
|
@ -287,3 +289,91 @@ def check_ref(ref, cwd=None):
|
||||||
# We'll throw an error later if no refs resolve
|
# We'll throw an error later if no refs resolve
|
||||||
pass
|
pass
|
||||||
return hash
|
return hash
|
||||||
|
|
||||||
|
|
||||||
|
class Error(OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# a copy of shutil.copytree() that is ok with the target directory
|
||||||
|
# already existing
|
||||||
|
def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
|
||||||
|
ignore_dangling_symlinks=False):
|
||||||
|
"""Recursively copy a directory tree.
|
||||||
|
The destination directory must not already exist.
|
||||||
|
If exception(s) occur, an Error is raised with a list of reasons.
|
||||||
|
If the optional symlinks flag is true, symbolic links in the
|
||||||
|
source tree result in symbolic links in the destination tree; if
|
||||||
|
it is false, the contents of the files pointed to by symbolic
|
||||||
|
links are copied. If the file pointed by the symlink doesn't
|
||||||
|
exist, an exception will be added in the list of errors raised in
|
||||||
|
an Error exception at the end of the copy process.
|
||||||
|
You can set the optional ignore_dangling_symlinks flag to true if you
|
||||||
|
want to silence this exception. Notice that this has no effect on
|
||||||
|
platforms that don't support os.symlink.
|
||||||
|
The optional ignore argument is a callable. If given, it
|
||||||
|
is called with the `src` parameter, which is the directory
|
||||||
|
being visited by copytree(), and `names` which is the list of
|
||||||
|
`src` contents, as returned by os.listdir():
|
||||||
|
callable(src, names) -> ignored_names
|
||||||
|
Since copytree() is called recursively, the callable will be
|
||||||
|
called once for each directory that is copied. It returns a
|
||||||
|
list of names relative to the `src` directory that should
|
||||||
|
not be copied.
|
||||||
|
The optional copy_function argument is a callable that will be used
|
||||||
|
to copy each file. It will be called with the source path and the
|
||||||
|
destination path as arguments. By default, copy2() is used, but any
|
||||||
|
function that supports the same signature (like copy()) can be used.
|
||||||
|
"""
|
||||||
|
names = os.listdir(src)
|
||||||
|
if ignore is not None:
|
||||||
|
ignored_names = ignore(src, names)
|
||||||
|
else:
|
||||||
|
ignored_names = set()
|
||||||
|
|
||||||
|
os.makedirs(dst, exist_ok=True)
|
||||||
|
errors = []
|
||||||
|
for name in names:
|
||||||
|
if name in ignored_names:
|
||||||
|
continue
|
||||||
|
srcname = os.path.join(src, name)
|
||||||
|
dstname = os.path.join(dst, name)
|
||||||
|
try:
|
||||||
|
if os.path.islink(srcname):
|
||||||
|
linkto = os.readlink(srcname)
|
||||||
|
if symlinks:
|
||||||
|
# We can't just leave it to `copy_function` because legacy
|
||||||
|
# code with a custom `copy_function` may rely on copytree
|
||||||
|
# doing the right thing.
|
||||||
|
os.symlink(linkto, dstname)
|
||||||
|
copystat(srcname, dstname, follow_symlinks=not symlinks)
|
||||||
|
else:
|
||||||
|
# ignore dangling symlink if the flag is on
|
||||||
|
if not os.path.exists(linkto) and ignore_dangling_symlinks:
|
||||||
|
continue
|
||||||
|
# otherwise let the copy occurs. copy2 will raise an error
|
||||||
|
if os.path.isdir(srcname):
|
||||||
|
copytree(srcname, dstname, symlinks, ignore,
|
||||||
|
copy_function)
|
||||||
|
else:
|
||||||
|
copy_function(srcname, dstname)
|
||||||
|
elif os.path.isdir(srcname):
|
||||||
|
copytree(srcname, dstname, symlinks, ignore, copy_function)
|
||||||
|
else:
|
||||||
|
# Will raise a SpecialFileError for unsupported file types
|
||||||
|
copy_function(srcname, dstname)
|
||||||
|
# catch the Error from the recursive copytree so that we can
|
||||||
|
# continue with other files
|
||||||
|
except Error as err:
|
||||||
|
errors.extend(err.args[0])
|
||||||
|
except OSError as why:
|
||||||
|
errors.append((srcname, dstname, str(why)))
|
||||||
|
try:
|
||||||
|
copystat(src, dst)
|
||||||
|
except OSError as why:
|
||||||
|
# Copying file access times may fail on Windows
|
||||||
|
if getattr(why, 'winerror', None) is None:
|
||||||
|
errors.append((src, dst, str(why)))
|
||||||
|
if errors:
|
||||||
|
raise Error(errors)
|
||||||
|
return dst
|
||||||
|
|
Ładowanie…
Reference in New Issue