diff --git a/repo2docker/app.py b/repo2docker/app.py index 254c8d5d..4b02a628 100644 --- a/repo2docker/app.py +++ b/repo2docker/app.py @@ -136,6 +136,7 @@ class Repo2Docker(Application): content_providers = List( [ contentproviders.Local, + contentproviders.Zenodo, contentproviders.Git, ], config=True, diff --git a/repo2docker/contentproviders/__init__.py b/repo2docker/contentproviders/__init__.py index cfe334b5..d648731d 100644 --- a/repo2docker/contentproviders/__init__.py +++ b/repo2docker/contentproviders/__init__.py @@ -1,2 +1,3 @@ from .git import Git from .base import Local +from .zenodo import Zenodo diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py new file mode 100644 index 00000000..3ad06a02 --- /dev/null +++ b/repo2docker/contentproviders/zenodo.py @@ -0,0 +1,73 @@ +import json +import shutil + +from os import makedirs +from os import path +from urllib.request import urlopen, Request +from zipfile import ZipFile, is_zipfile + +from .base import ContentProvider +from ..utils import copytree + + +class Zenodo(ContentProvider): + """Provide contents of a Zenodo deposit.""" + + def detect(self, doi, ref=None, extra_args=None): + # 10.5281 is the Zenodo DOI prefix + if doi.startswith('10.5281'): + resp = urlopen("https://doi.org/{}".format(doi)) + self.record_id = resp.url.rsplit("/", maxsplit=1)[1] + return {'record': self.record_id} + + def fetch(self, spec, output_dir, yield_output=False): + record_id = spec['record'] + + yield "Fetching Zenodo record {}.\n".format(record_id) + req = Request("https://zenodo.org/api/records/{}".format(record_id), + headers={"accept": "application/json"}) + resp = urlopen(req) + + record = json.loads(resp.read().decode("utf-8")) + + def _fetch(file_ref, unzip=False): + with urlopen(file_ref["links"]["download"]) as src: + fname = file_ref["filename"] + sub_dir = path.join(output_dir, path.dirname(fname)) + if not path.exists(sub_dir): + print("Creating", sub_dir) + makedirs(sub_dir, exist_ok=True) + + dst_fname = path.join(output_dir, fname) + with open(dst_fname, "wb") as dst: + yield "Fetching {}\n".format(fname) + shutil.copyfileobj(src, dst) + + # first close the newly written file, then continue + # processing it + if unzip and is_zipfile(dst_fname): + zfile = ZipFile(dst_fname) + zfile.extractall(path=output_dir) + zfile.close() + import os + d = os.listdir(output_dir)[0] + print(output_dir) + print(os.listdir(output_dir)) + copytree(path.join(output_dir, d), output_dir) + shutil.rmtree(sub_dir) + shutil.rmtree(path.join(output_dir, d)) + + is_software = record["metadata"]["upload_type"] == "software" + only_one_file = len(record["files"]) == 1 + for file_ref in record['files']: + for line in _fetch(file_ref, unzip=is_software and only_one_file): + yield line + + import pdb; pdb.set_trace() + + @property + def content_id(self): + """A unique ID to represent the version of the content. + Uses the first seven characters of the git commit ID of the repository. + """ + return self.record_id diff --git a/repo2docker/utils.py b/repo2docker/utils.py index 22c19068..d703d612 100644 --- a/repo2docker/utils.py +++ b/repo2docker/utils.py @@ -4,6 +4,8 @@ import os import re import subprocess +from shutil import copystat, copy2 + from traitlets import Integer, TraitError @@ -287,3 +289,91 @@ def check_ref(ref, cwd=None): # We'll throw an error later if no refs resolve pass return hash + + +class Error(OSError): + pass + + +# a copy of shutil.copytree() that is ok with the target directory +# already existing +def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, + ignore_dangling_symlinks=False): + """Recursively copy a directory tree. + The destination directory must not already exist. + If exception(s) occur, an Error is raised with a list of reasons. + If the optional symlinks flag is true, symbolic links in the + source tree result in symbolic links in the destination tree; if + it is false, the contents of the files pointed to by symbolic + links are copied. If the file pointed by the symlink doesn't + exist, an exception will be added in the list of errors raised in + an Error exception at the end of the copy process. + You can set the optional ignore_dangling_symlinks flag to true if you + want to silence this exception. Notice that this has no effect on + platforms that don't support os.symlink. + The optional ignore argument is a callable. If given, it + is called with the `src` parameter, which is the directory + being visited by copytree(), and `names` which is the list of + `src` contents, as returned by os.listdir(): + callable(src, names) -> ignored_names + Since copytree() is called recursively, the callable will be + called once for each directory that is copied. It returns a + list of names relative to the `src` directory that should + not be copied. + The optional copy_function argument is a callable that will be used + to copy each file. It will be called with the source path and the + destination path as arguments. By default, copy2() is used, but any + function that supports the same signature (like copy()) can be used. + """ + names = os.listdir(src) + if ignore is not None: + ignored_names = ignore(src, names) + else: + ignored_names = set() + + os.makedirs(dst, exist_ok=True) + errors = [] + for name in names: + if name in ignored_names: + continue + srcname = os.path.join(src, name) + dstname = os.path.join(dst, name) + try: + if os.path.islink(srcname): + linkto = os.readlink(srcname) + if symlinks: + # We can't just leave it to `copy_function` because legacy + # code with a custom `copy_function` may rely on copytree + # doing the right thing. + os.symlink(linkto, dstname) + copystat(srcname, dstname, follow_symlinks=not symlinks) + else: + # ignore dangling symlink if the flag is on + if not os.path.exists(linkto) and ignore_dangling_symlinks: + continue + # otherwise let the copy occurs. copy2 will raise an error + if os.path.isdir(srcname): + copytree(srcname, dstname, symlinks, ignore, + copy_function) + else: + copy_function(srcname, dstname) + elif os.path.isdir(srcname): + copytree(srcname, dstname, symlinks, ignore, copy_function) + else: + # Will raise a SpecialFileError for unsupported file types + copy_function(srcname, dstname) + # catch the Error from the recursive copytree so that we can + # continue with other files + except Error as err: + errors.extend(err.args[0]) + except OSError as why: + errors.append((srcname, dstname, str(why))) + try: + copystat(src, dst) + except OSError as why: + # Copying file access times may fail on Windows + if getattr(why, 'winerror', None) is None: + errors.append((src, dst, str(why))) + if errors: + raise Error(errors) + return dst