kopia lustrzana https://github.com/jupyterhub/repo2docker
commit
80fbc63925
|
@ -2,11 +2,14 @@ jupyter-repo2docker
|
|||
===================
|
||||
|
||||
``jupyter-repo2docker`` is a tool to **build, run, and push Docker
|
||||
images from source code repositories** that run via a Jupyter server.
|
||||
images from source code repositories**.
|
||||
|
||||
``repo2docker`` fetches a repository
|
||||
(from GitHub, GitLab or other locations) and builds a container image
|
||||
based on the configuration files found in the repository. It can be
|
||||
(from GitHub, GitLab, Zenodo, a Git repository or a local directory)
|
||||
and builds a container image inn which the code can be executed.
|
||||
The image build process is based on the configuration files found in the repository.
|
||||
|
||||
``repo2docker`` can be
|
||||
used to explore a repository locally by building and executing the
|
||||
constructed image of the repository, or as a means of building images that
|
||||
are pushed to a Docker registry.
|
||||
|
|
|
@ -11,14 +11,15 @@ Using ``repo2docker``
|
|||
``repo2docker``, see :ref:`install`.
|
||||
|
||||
``repo2docker`` can build a reproducible computational environment for any repository that
|
||||
follows :ref:`specification`. repo2docker is called with a URL/path to a repository. It then
|
||||
follows :ref:`specification`. repo2docker is called with the URL of a Git repository,
|
||||
a Zenodo DOI or a path to a local directory. It then
|
||||
performs these steps:
|
||||
|
||||
1. Inspects the repository for :ref:`configuration files <config-files>`. These will be used to build
|
||||
the environment needed to run the repository.
|
||||
2. Builds a Docker image with an environment specified in these :ref:`configuration files <config-files>`.
|
||||
3. Runs a Jupyter server within the image that lets you explore the
|
||||
repository interactively (optional)
|
||||
3. Launches the image to let you explore the
|
||||
repository interactively via Jupyter notebooks, RStudio, or many other interfaces (optional)
|
||||
4. Pushes the images to a Docker registry so that it may be accessed remotely
|
||||
(optional)
|
||||
|
||||
|
@ -27,10 +28,15 @@ Calling repo2docker
|
|||
|
||||
repo2docker is called with this command::
|
||||
|
||||
jupyter-repo2docker <URL-or-path to repository>
|
||||
jupyter-repo2docker <source-repository>
|
||||
|
||||
where ``<URL-or-path to repository>`` is a URL or path to the source repository
|
||||
for which you'd like to build an image.
|
||||
where ``<source-repository>`` is:
|
||||
|
||||
* a URL of a Git repository (``https://github.com/binder-examples/requirements``),
|
||||
* a Zenodo DOI (``10.5281/zenodo.1211089``), or
|
||||
* a path to a local directory (``a/local/directory``)
|
||||
|
||||
of the source repository you want to build.
|
||||
|
||||
For example, the following command will build an image of Peter Norvig's
|
||||
Pytudes_ repository::
|
||||
|
|
|
@ -136,6 +136,7 @@ class Repo2Docker(Application):
|
|||
content_providers = List(
|
||||
[
|
||||
contentproviders.Local,
|
||||
contentproviders.Zenodo,
|
||||
contentproviders.Git,
|
||||
],
|
||||
config=True,
|
||||
|
|
|
@ -1,2 +1,3 @@
|
|||
from .git import Git
|
||||
from .base import Local
|
||||
from .zenodo import Zenodo
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
import os
|
||||
import json
|
||||
import shutil
|
||||
|
||||
from os import makedirs
|
||||
from os import path
|
||||
from urllib.request import build_opener, urlopen, Request
|
||||
from zipfile import ZipFile, is_zipfile
|
||||
|
||||
from .base import ContentProvider
|
||||
from ..utils import copytree
|
||||
from .. import __version__
|
||||
|
||||
|
||||
class Zenodo(ContentProvider):
|
||||
"""Provide contents of a Zenodo deposit."""
|
||||
|
||||
def _urlopen(self, req, headers=None):
|
||||
"""A urlopen() helper"""
|
||||
# someone passed a string, not a request
|
||||
if not isinstance(req, Request):
|
||||
req = Request(req)
|
||||
|
||||
req.add_header("User-Agent", "repo2docker {}".format(__version__))
|
||||
if headers is not None:
|
||||
for key, value in headers.items():
|
||||
req.add_header(key, value)
|
||||
|
||||
return urlopen(req)
|
||||
|
||||
def detect(self, doi, ref=None, extra_args=None):
|
||||
"""Trigger this provider for things that resolve to a Zenodo record"""
|
||||
# To support Zenodo instances not hosted at zenodo.org we need to
|
||||
# start maintaining a list of known DOI prefixes and their hostname.
|
||||
# We should also change to returning a complete `record_url` that
|
||||
# fetch() can use instead of constructing a URL there
|
||||
doi = doi.lower()
|
||||
# 10.5281 is the Zenodo DOI prefix
|
||||
if doi.startswith("10.5281/"):
|
||||
resp = self._urlopen("https://doi.org/{}".format(doi))
|
||||
self.record_id = resp.url.rsplit("/", maxsplit=1)[1]
|
||||
return {"record": self.record_id}
|
||||
|
||||
elif doi.startswith("https://doi.org/10.5281/") or doi.startswith(
|
||||
"http://doi.org/10.5281/"
|
||||
):
|
||||
resp = self._urlopen(doi)
|
||||
self.record_id = resp.url.rsplit("/", maxsplit=1)[1]
|
||||
return {"record": self.record_id}
|
||||
|
||||
elif doi.startswith("https://zenodo.org/record/") or doi.startswith(
|
||||
"http://zenodo.org/record/"
|
||||
):
|
||||
self.record_id = doi.rsplit("/", maxsplit=1)[1]
|
||||
return {"record": self.record_id}
|
||||
|
||||
def fetch(self, spec, output_dir, yield_output=False):
|
||||
"""Fetch and unpack a Zenodo record"""
|
||||
record_id = spec["record"]
|
||||
|
||||
yield "Fetching Zenodo record {}.\n".format(record_id)
|
||||
req = Request(
|
||||
"https://zenodo.org/api/records/{}".format(record_id),
|
||||
headers={"accept": "application/json"},
|
||||
)
|
||||
resp = self._urlopen(req)
|
||||
|
||||
record = json.loads(resp.read().decode("utf-8"))
|
||||
|
||||
def _fetch(file_ref, unzip=False):
|
||||
# the assumption is that `unzip=True` means that this is the only
|
||||
# file related to the zenodo record
|
||||
with self._urlopen(file_ref["links"]["download"]) as src:
|
||||
fname = file_ref["filename"]
|
||||
if path.dirname(fname):
|
||||
sub_dir = path.join(output_dir, path.dirname(fname))
|
||||
if not path.exists(sub_dir):
|
||||
yield "Creating {}\n".format(sub_dir)
|
||||
makedirs(sub_dir, exist_ok=True)
|
||||
|
||||
dst_fname = path.join(output_dir, fname)
|
||||
with open(dst_fname, "wb") as dst:
|
||||
yield "Fetching {}\n".format(fname)
|
||||
shutil.copyfileobj(src, dst)
|
||||
# first close the newly written file, then continue
|
||||
# processing it
|
||||
if unzip and is_zipfile(dst_fname):
|
||||
yield "Extracting {}\n".format(fname)
|
||||
zfile = ZipFile(dst_fname)
|
||||
zfile.extractall(path=output_dir)
|
||||
zfile.close()
|
||||
|
||||
# delete downloaded file ...
|
||||
os.remove(dst_fname)
|
||||
# ... and any directories we might have created,
|
||||
# in which case sub_dir will be defined
|
||||
if path.dirname(fname):
|
||||
shutil.rmtree(sub_dir)
|
||||
|
||||
new_subdirs = os.listdir(output_dir)
|
||||
# if there is only one new subdirectory move its contents
|
||||
# to the top level directory
|
||||
if len(new_subdirs) == 1:
|
||||
d = new_subdirs[0]
|
||||
copytree(path.join(output_dir, d), output_dir)
|
||||
shutil.rmtree(path.join(output_dir, d))
|
||||
|
||||
is_software = record["metadata"]["upload_type"] == "software"
|
||||
only_one_file = len(record["files"]) == 1
|
||||
for file_ref in record["files"]:
|
||||
for line in _fetch(file_ref, unzip=is_software and only_one_file):
|
||||
yield line
|
||||
|
||||
@property
|
||||
def content_id(self):
|
||||
"""The Zenodo record ID as the content of a record is immutable"""
|
||||
return self.record_id
|
|
@ -4,6 +4,8 @@ import os
|
|||
import re
|
||||
import subprocess
|
||||
|
||||
from shutil import copystat, copy2
|
||||
|
||||
from traitlets import Integer, TraitError
|
||||
|
||||
|
||||
|
@ -287,3 +289,91 @@ def check_ref(ref, cwd=None):
|
|||
# We'll throw an error later if no refs resolve
|
||||
pass
|
||||
return hash
|
||||
|
||||
|
||||
class Error(OSError):
|
||||
pass
|
||||
|
||||
|
||||
# a copy of shutil.copytree() that is ok with the target directory
|
||||
# already existing
|
||||
def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
|
||||
ignore_dangling_symlinks=False):
|
||||
"""Recursively copy a directory tree.
|
||||
The destination directory must not already exist.
|
||||
If exception(s) occur, an Error is raised with a list of reasons.
|
||||
If the optional symlinks flag is true, symbolic links in the
|
||||
source tree result in symbolic links in the destination tree; if
|
||||
it is false, the contents of the files pointed to by symbolic
|
||||
links are copied. If the file pointed by the symlink doesn't
|
||||
exist, an exception will be added in the list of errors raised in
|
||||
an Error exception at the end of the copy process.
|
||||
You can set the optional ignore_dangling_symlinks flag to true if you
|
||||
want to silence this exception. Notice that this has no effect on
|
||||
platforms that don't support os.symlink.
|
||||
The optional ignore argument is a callable. If given, it
|
||||
is called with the `src` parameter, which is the directory
|
||||
being visited by copytree(), and `names` which is the list of
|
||||
`src` contents, as returned by os.listdir():
|
||||
callable(src, names) -> ignored_names
|
||||
Since copytree() is called recursively, the callable will be
|
||||
called once for each directory that is copied. It returns a
|
||||
list of names relative to the `src` directory that should
|
||||
not be copied.
|
||||
The optional copy_function argument is a callable that will be used
|
||||
to copy each file. It will be called with the source path and the
|
||||
destination path as arguments. By default, copy2() is used, but any
|
||||
function that supports the same signature (like copy()) can be used.
|
||||
"""
|
||||
names = os.listdir(src)
|
||||
if ignore is not None:
|
||||
ignored_names = ignore(src, names)
|
||||
else:
|
||||
ignored_names = set()
|
||||
|
||||
os.makedirs(dst, exist_ok=True)
|
||||
errors = []
|
||||
for name in names:
|
||||
if name in ignored_names:
|
||||
continue
|
||||
srcname = os.path.join(src, name)
|
||||
dstname = os.path.join(dst, name)
|
||||
try:
|
||||
if os.path.islink(srcname):
|
||||
linkto = os.readlink(srcname)
|
||||
if symlinks:
|
||||
# We can't just leave it to `copy_function` because legacy
|
||||
# code with a custom `copy_function` may rely on copytree
|
||||
# doing the right thing.
|
||||
os.symlink(linkto, dstname)
|
||||
copystat(srcname, dstname, follow_symlinks=not symlinks)
|
||||
else:
|
||||
# ignore dangling symlink if the flag is on
|
||||
if not os.path.exists(linkto) and ignore_dangling_symlinks:
|
||||
continue
|
||||
# otherwise let the copy occurs. copy2 will raise an error
|
||||
if os.path.isdir(srcname):
|
||||
copytree(srcname, dstname, symlinks, ignore,
|
||||
copy_function)
|
||||
else:
|
||||
copy_function(srcname, dstname)
|
||||
elif os.path.isdir(srcname):
|
||||
copytree(srcname, dstname, symlinks, ignore, copy_function)
|
||||
else:
|
||||
# Will raise a SpecialFileError for unsupported file types
|
||||
copy_function(srcname, dstname)
|
||||
# catch the Error from the recursive copytree so that we can
|
||||
# continue with other files
|
||||
except Error as err:
|
||||
errors.extend(err.args[0])
|
||||
except OSError as why:
|
||||
errors.append((srcname, dstname, str(why)))
|
||||
try:
|
||||
copystat(src, dst)
|
||||
except OSError as why:
|
||||
# Copying file access times may fail on Windows
|
||||
if getattr(why, 'winerror', None) is None:
|
||||
errors.append((src, dst, str(why)))
|
||||
if errors:
|
||||
raise Error(errors)
|
||||
return dst
|
||||
|
|
|
@ -213,11 +213,13 @@ class RemoteRepoList(pytest.File):
|
|||
with self.fspath.open() as f:
|
||||
repos = yaml.safe_load(f)
|
||||
for repo in repos:
|
||||
args = []
|
||||
if "ref" in repo:
|
||||
args += ['--ref', repo['ref']]
|
||||
args += [repo['url'],
|
||||
'--',
|
||||
] + shlex.split(repo['verify'])
|
||||
yield Repo2DockerTest(
|
||||
repo['name'], self,
|
||||
args=[
|
||||
'--ref', repo['ref'],
|
||||
repo['url'],
|
||||
'--',
|
||||
] + shlex.split(repo['verify']),
|
||||
args=args,
|
||||
)
|
||||
|
|
|
@ -30,3 +30,7 @@
|
|||
url: https://github.com/QuantStack/xeus-cling
|
||||
ref: 0.4.5
|
||||
verify: jupyter kernelspec list
|
||||
# Zenodo record of https://github.com/mbcxqcw2/EEModel/tree/v1.03
|
||||
- name: 10.5281/zenodo.1211089
|
||||
url: 10.5281/zenodo.1211089
|
||||
verify: python2 -c 'import matplotlib'
|
||||
|
|
|
@ -0,0 +1,171 @@
|
|||
import json
|
||||
import os
|
||||
|
||||
from contextlib import contextmanager
|
||||
from io import BytesIO
|
||||
from tempfile import TemporaryDirectory, NamedTemporaryFile
|
||||
from unittest.mock import patch
|
||||
from urllib.request import urlopen, Request
|
||||
from zipfile import ZipFile
|
||||
|
||||
from repo2docker.contentproviders import Zenodo
|
||||
|
||||
|
||||
def test_content_id():
|
||||
with patch.object(Zenodo, "_urlopen") as fake_urlopen:
|
||||
fake_urlopen.return_value.url = "https://zenodo.org/record/3232985"
|
||||
zen = Zenodo()
|
||||
|
||||
zen.detect("10.5281/zenodo.3232985")
|
||||
assert zen.content_id == "3232985"
|
||||
|
||||
|
||||
def test_detect():
|
||||
with patch.object(Zenodo, "_urlopen") as fake_urlopen:
|
||||
fake_urlopen.return_value.url = "https://zenodo.org/record/3232985"
|
||||
# valid Zenodo DOIs trigger this content provider
|
||||
assert Zenodo().detect("10.5281/zenodo.3232985") == {"record": "3232985"}
|
||||
assert Zenodo().detect("https://doi.org/10.5281/zenodo.3232985") == {"record": "3232985"}
|
||||
assert Zenodo().detect("https://zenodo.org/record/3232985") == {"record": "3232985"}
|
||||
|
||||
# only two of the three calls above have to resolve a DOI
|
||||
assert fake_urlopen.call_count == 2
|
||||
|
||||
with patch.object(Zenodo, "_urlopen") as fake_urlopen:
|
||||
# Don't trigger the Zenodo content provider
|
||||
assert Zenodo().detect("/some/path/here") is None
|
||||
assert Zenodo().detect("https://example.com/path/here") is None
|
||||
# donn't handle DOIs that aren't from Zenodo
|
||||
assert Zenodo().detect("https://doi.org/10.21105/joss.01277") is None
|
||||
|
||||
# none of the examples are Zenodo like, so we should not attempt to
|
||||
# resolve a DOI either
|
||||
assert not fake_urlopen.called
|
||||
|
||||
|
||||
@contextmanager
|
||||
def zenodo_archive(prefix="a_directory"):
|
||||
with NamedTemporaryFile(suffix=".zip") as zfile:
|
||||
with ZipFile(zfile.name, mode="w") as zip:
|
||||
zip.writestr("{}/some-file.txt".format(prefix), "some content")
|
||||
zip.writestr("{}/some-other-file.txt".format(prefix), "some more content")
|
||||
|
||||
yield zfile.name
|
||||
|
||||
|
||||
def test_fetch_software_from_github_archive():
|
||||
# we "fetch" a local ZIP file to simulate a Zenodo record created from a
|
||||
# GitHub repository via the Zenodo-GitHub integration
|
||||
with zenodo_archive() as zen_path:
|
||||
mock_response = BytesIO(
|
||||
json.dumps(
|
||||
{
|
||||
"files": [
|
||||
{
|
||||
"filename": "some_dir/afake.zip",
|
||||
"links": {"download": "file://{}".format(zen_path)},
|
||||
}
|
||||
],
|
||||
"metadata": {"upload_type": "software"},
|
||||
}
|
||||
).encode("utf-8")
|
||||
)
|
||||
|
||||
def mock_urlopen(self, req):
|
||||
if isinstance(req, Request):
|
||||
return mock_response
|
||||
else:
|
||||
return urlopen(req)
|
||||
|
||||
with patch.object(Zenodo, '_urlopen', new=mock_urlopen):
|
||||
zen = Zenodo()
|
||||
|
||||
with TemporaryDirectory() as d:
|
||||
output = []
|
||||
for l in zen.fetch({"record": "1234"}, d):
|
||||
output.append(l)
|
||||
|
||||
unpacked_files = set(os.listdir(d))
|
||||
expected = set(["some-other-file.txt", "some-file.txt"])
|
||||
assert expected == unpacked_files
|
||||
|
||||
|
||||
def test_fetch_software():
|
||||
# we "fetch" a local ZIP file to simulate a Zenodo software record with a
|
||||
# ZIP file in it
|
||||
with zenodo_archive() as zen_path:
|
||||
mock_response = BytesIO(
|
||||
json.dumps(
|
||||
{
|
||||
"files": [
|
||||
{
|
||||
# this is the difference to the GitHub generated one,
|
||||
# the ZIP file isn't in a directory
|
||||
"filename": "afake.zip",
|
||||
"links": {"download": "file://{}".format(zen_path)},
|
||||
}
|
||||
],
|
||||
"metadata": {"upload_type": "software"},
|
||||
}
|
||||
).encode("utf-8")
|
||||
)
|
||||
|
||||
def mock_urlopen(self, req):
|
||||
if isinstance(req, Request):
|
||||
return mock_response
|
||||
else:
|
||||
return urlopen(req)
|
||||
|
||||
with patch.object(Zenodo, '_urlopen', new=mock_urlopen):
|
||||
with TemporaryDirectory() as d:
|
||||
zen = Zenodo()
|
||||
|
||||
output = []
|
||||
for l in zen.fetch({"record": "1234"}, d):
|
||||
output.append(l)
|
||||
|
||||
unpacked_files = set(os.listdir(d))
|
||||
expected = set(["some-other-file.txt", "some-file.txt"])
|
||||
assert expected == unpacked_files
|
||||
|
||||
|
||||
def test_fetch_data():
|
||||
# we "fetch" a local ZIP file to simulate a Zenodo data record
|
||||
with zenodo_archive() as a_zen_path:
|
||||
with zenodo_archive() as b_zen_path:
|
||||
mock_response = BytesIO(
|
||||
json.dumps(
|
||||
{
|
||||
"files": [
|
||||
{
|
||||
"filename": "afake.zip",
|
||||
"links": {"download": "file://{}".format(a_zen_path)},
|
||||
},
|
||||
{
|
||||
"filename": "bfake.zip",
|
||||
"links": {"download": "file://{}".format(b_zen_path)},
|
||||
}
|
||||
],
|
||||
"metadata": {"upload_type": "data"},
|
||||
}
|
||||
).encode("utf-8")
|
||||
)
|
||||
|
||||
def mock_urlopen(self, req):
|
||||
if isinstance(req, Request):
|
||||
return mock_response
|
||||
else:
|
||||
return urlopen(req)
|
||||
|
||||
with patch.object(Zenodo, '_urlopen', new=mock_urlopen):
|
||||
with TemporaryDirectory() as d:
|
||||
zen = Zenodo()
|
||||
|
||||
output = []
|
||||
for l in zen.fetch({"record": "1234"}, d):
|
||||
output.append(l)
|
||||
|
||||
unpacked_files = set(os.listdir(d))
|
||||
# ZIP files shouldn't have been unpacked
|
||||
expected = {'bfake.zip', 'afake.zip'}
|
||||
assert expected == unpacked_files
|
Ładowanie…
Reference in New Issue