kopia lustrzana https://github.com/jupyterhub/repo2docker
Add basic Zenodo content provider
rodzic
51898274f8
commit
dce6c1e8d7
|
@ -136,6 +136,7 @@ class Repo2Docker(Application):
|
|||
content_providers = List(
|
||||
[
|
||||
contentproviders.Local,
|
||||
contentproviders.Zenodo,
|
||||
contentproviders.Git,
|
||||
],
|
||||
config=True,
|
||||
|
|
|
@ -1,2 +1,3 @@
|
|||
from .git import Git
|
||||
from .base import Local
|
||||
from .zenodo import Zenodo
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
import json
|
||||
import shutil
|
||||
|
||||
from os import makedirs
|
||||
from os import path
|
||||
from urllib.request import urlopen, Request
|
||||
from zipfile import ZipFile, is_zipfile
|
||||
|
||||
from .base import ContentProvider
|
||||
from ..utils import copytree
|
||||
|
||||
|
||||
class Zenodo(ContentProvider):
|
||||
"""Provide contents of a Zenodo deposit."""
|
||||
|
||||
def detect(self, doi, ref=None, extra_args=None):
|
||||
# 10.5281 is the Zenodo DOI prefix
|
||||
if doi.startswith('10.5281'):
|
||||
resp = urlopen("https://doi.org/{}".format(doi))
|
||||
self.record_id = resp.url.rsplit("/", maxsplit=1)[1]
|
||||
return {'record': self.record_id}
|
||||
|
||||
def fetch(self, spec, output_dir, yield_output=False):
|
||||
record_id = spec['record']
|
||||
|
||||
yield "Fetching Zenodo record {}.\n".format(record_id)
|
||||
req = Request("https://zenodo.org/api/records/{}".format(record_id),
|
||||
headers={"accept": "application/json"})
|
||||
resp = urlopen(req)
|
||||
|
||||
record = json.loads(resp.read().decode("utf-8"))
|
||||
|
||||
def _fetch(file_ref, unzip=False):
|
||||
with urlopen(file_ref["links"]["download"]) as src:
|
||||
fname = file_ref["filename"]
|
||||
sub_dir = path.join(output_dir, path.dirname(fname))
|
||||
if not path.exists(sub_dir):
|
||||
print("Creating", sub_dir)
|
||||
makedirs(sub_dir, exist_ok=True)
|
||||
|
||||
dst_fname = path.join(output_dir, fname)
|
||||
with open(dst_fname, "wb") as dst:
|
||||
yield "Fetching {}\n".format(fname)
|
||||
shutil.copyfileobj(src, dst)
|
||||
|
||||
# first close the newly written file, then continue
|
||||
# processing it
|
||||
if unzip and is_zipfile(dst_fname):
|
||||
zfile = ZipFile(dst_fname)
|
||||
zfile.extractall(path=output_dir)
|
||||
zfile.close()
|
||||
import os
|
||||
d = os.listdir(output_dir)[0]
|
||||
print(output_dir)
|
||||
print(os.listdir(output_dir))
|
||||
copytree(path.join(output_dir, d), output_dir)
|
||||
shutil.rmtree(sub_dir)
|
||||
shutil.rmtree(path.join(output_dir, d))
|
||||
|
||||
is_software = record["metadata"]["upload_type"] == "software"
|
||||
only_one_file = len(record["files"]) == 1
|
||||
for file_ref in record['files']:
|
||||
for line in _fetch(file_ref, unzip=is_software and only_one_file):
|
||||
yield line
|
||||
|
||||
import pdb; pdb.set_trace()
|
||||
|
||||
@property
|
||||
def content_id(self):
|
||||
"""A unique ID to represent the version of the content.
|
||||
Uses the first seven characters of the git commit ID of the repository.
|
||||
"""
|
||||
return self.record_id
|
|
@ -4,6 +4,8 @@ import os
|
|||
import re
|
||||
import subprocess
|
||||
|
||||
from shutil import copystat, copy2
|
||||
|
||||
from traitlets import Integer, TraitError
|
||||
|
||||
|
||||
|
@ -287,3 +289,91 @@ def check_ref(ref, cwd=None):
|
|||
# We'll throw an error later if no refs resolve
|
||||
pass
|
||||
return hash
|
||||
|
||||
|
||||
class Error(OSError):
|
||||
pass
|
||||
|
||||
|
||||
# a copy of shutil.copytree() that is ok with the target directory
|
||||
# already existing
|
||||
def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
|
||||
ignore_dangling_symlinks=False):
|
||||
"""Recursively copy a directory tree.
|
||||
The destination directory must not already exist.
|
||||
If exception(s) occur, an Error is raised with a list of reasons.
|
||||
If the optional symlinks flag is true, symbolic links in the
|
||||
source tree result in symbolic links in the destination tree; if
|
||||
it is false, the contents of the files pointed to by symbolic
|
||||
links are copied. If the file pointed by the symlink doesn't
|
||||
exist, an exception will be added in the list of errors raised in
|
||||
an Error exception at the end of the copy process.
|
||||
You can set the optional ignore_dangling_symlinks flag to true if you
|
||||
want to silence this exception. Notice that this has no effect on
|
||||
platforms that don't support os.symlink.
|
||||
The optional ignore argument is a callable. If given, it
|
||||
is called with the `src` parameter, which is the directory
|
||||
being visited by copytree(), and `names` which is the list of
|
||||
`src` contents, as returned by os.listdir():
|
||||
callable(src, names) -> ignored_names
|
||||
Since copytree() is called recursively, the callable will be
|
||||
called once for each directory that is copied. It returns a
|
||||
list of names relative to the `src` directory that should
|
||||
not be copied.
|
||||
The optional copy_function argument is a callable that will be used
|
||||
to copy each file. It will be called with the source path and the
|
||||
destination path as arguments. By default, copy2() is used, but any
|
||||
function that supports the same signature (like copy()) can be used.
|
||||
"""
|
||||
names = os.listdir(src)
|
||||
if ignore is not None:
|
||||
ignored_names = ignore(src, names)
|
||||
else:
|
||||
ignored_names = set()
|
||||
|
||||
os.makedirs(dst, exist_ok=True)
|
||||
errors = []
|
||||
for name in names:
|
||||
if name in ignored_names:
|
||||
continue
|
||||
srcname = os.path.join(src, name)
|
||||
dstname = os.path.join(dst, name)
|
||||
try:
|
||||
if os.path.islink(srcname):
|
||||
linkto = os.readlink(srcname)
|
||||
if symlinks:
|
||||
# We can't just leave it to `copy_function` because legacy
|
||||
# code with a custom `copy_function` may rely on copytree
|
||||
# doing the right thing.
|
||||
os.symlink(linkto, dstname)
|
||||
copystat(srcname, dstname, follow_symlinks=not symlinks)
|
||||
else:
|
||||
# ignore dangling symlink if the flag is on
|
||||
if not os.path.exists(linkto) and ignore_dangling_symlinks:
|
||||
continue
|
||||
# otherwise let the copy occurs. copy2 will raise an error
|
||||
if os.path.isdir(srcname):
|
||||
copytree(srcname, dstname, symlinks, ignore,
|
||||
copy_function)
|
||||
else:
|
||||
copy_function(srcname, dstname)
|
||||
elif os.path.isdir(srcname):
|
||||
copytree(srcname, dstname, symlinks, ignore, copy_function)
|
||||
else:
|
||||
# Will raise a SpecialFileError for unsupported file types
|
||||
copy_function(srcname, dstname)
|
||||
# catch the Error from the recursive copytree so that we can
|
||||
# continue with other files
|
||||
except Error as err:
|
||||
errors.extend(err.args[0])
|
||||
except OSError as why:
|
||||
errors.append((srcname, dstname, str(why)))
|
||||
try:
|
||||
copystat(src, dst)
|
||||
except OSError as why:
|
||||
# Copying file access times may fail on Windows
|
||||
if getattr(why, 'winerror', None) is None:
|
||||
errors.append((src, dst, str(why)))
|
||||
if errors:
|
||||
raise Error(errors)
|
||||
return dst
|
||||
|
|
Ładowanie…
Reference in New Issue