kopia lustrzana https://github.com/jupyterhub/repo2docker
Merge 9c45888b5c
into 68a0331ea2
commit
827ee628e3
|
@ -5,7 +5,7 @@ jupyter-repo2docker
|
|||
images from source code repositories**.
|
||||
|
||||
``repo2docker`` fetches a repository
|
||||
(from GitHub, GitLab, Zenodo, Figshare, Dataverse installations, a Git repository or a local directory)
|
||||
(from GitHub, GitLab, Zenodo, Figshare, Dataverse installations, a Git repository, an IPFS CID or a local directory)
|
||||
and builds a container image in which the code can be executed.
|
||||
The image build process is based on the configuration files found in the repository.
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@ a `DOI <https://en.wikipedia.org/wiki/Digital_object_identifier>`_ from Zenodo
|
|||
a `Handle <https://en.wikipedia.org/wiki/Handle_System>`_ or DOI from a Dataverse installation,
|
||||
a `SWHID`_ of a directory of a revision archived in the
|
||||
`Software Heritage Archive <https://archive.softwareheritage.org>`_,
|
||||
a `CID`_ from a folder on `IPFS <https://ipfs.io>`_
|
||||
or a path to a local directory.
|
||||
|
||||
It then performs these steps:
|
||||
|
@ -40,6 +41,7 @@ where ``<source-repository>`` is:
|
|||
* a URL of a Git repository (``https://github.com/binder-examples/requirements``),
|
||||
* a Zenodo DOI (``10.5281/zenodo.1211089``),
|
||||
* a SWHID_ (``swh:1:rev:999dd06c7f679a2714dfe5199bdca09522a29649``), or
|
||||
* a CID_ (``QmPjPUTcXeiEdNUMEPusP4rnJNz2YPw1XrYQkp43C96DyS``), or
|
||||
* a path to a local directory (``a/local/directory``)
|
||||
|
||||
of the source repository you want to build.
|
||||
|
@ -136,3 +138,4 @@ Command line API
|
|||
|
||||
.. _Pytudes: https://github.com/norvig/pytudes
|
||||
.. _SWHID: https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html
|
||||
.. _CID: https://docs.ipfs.io/concepts/content-addressing/
|
||||
|
|
|
@ -152,6 +152,7 @@ class Repo2Docker(Application):
|
|||
contentproviders.Dataverse,
|
||||
contentproviders.Hydroshare,
|
||||
contentproviders.Swhid,
|
||||
contentproviders.IPFS,
|
||||
contentproviders.Mercurial,
|
||||
contentproviders.Git,
|
||||
],
|
||||
|
|
|
@ -6,3 +6,4 @@ from .dataverse import Dataverse
|
|||
from .hydroshare import Hydroshare
|
||||
from .mercurial import Mercurial
|
||||
from .swhid import Swhid
|
||||
from .ipfs import IPFS
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
import re
|
||||
from tarfile import TarFile
|
||||
from io import BytesIO
|
||||
|
||||
import requests
|
||||
|
||||
from .base import ContentProvider, ContentProviderException
|
||||
|
||||
# testing well-formedness of CID is not trivial, to do it
|
||||
# properly, one should use py-cid, which can decode all CIDS
|
||||
# that library however has a bunch of dependencies, so for now
|
||||
# we'll go with a reged-based approximation
|
||||
# this regex follows https://stackoverflow.com/a/67176726
|
||||
RE_CID = re.compile(
|
||||
"Qm[1-9A-HJ-NP-Za-km-z]{44,}|"
|
||||
"b[A-Za-z2-7]{58,}|"
|
||||
"B[A-Z2-7]{58,}|"
|
||||
"z[1-9A-HJ-NP-Za-km-z]{48,}|"
|
||||
"F[0-9A-F]{50,}"
|
||||
)
|
||||
|
||||
|
||||
def is_cid(s):
|
||||
return bool(RE_CID.match(s))
|
||||
|
||||
|
||||
class IPFS(ContentProvider):
|
||||
"""Provide contents of an IPFS CID."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.gateways = [
|
||||
"http://127.0.0.1:8080",
|
||||
"https://ipfs.io",
|
||||
"https://dweb.link",
|
||||
"https://gateway.pinata.cloud",
|
||||
"https://cloudflare-ipfs.com",
|
||||
"https://ipfs.fleek.co",
|
||||
]
|
||||
|
||||
def detect(self, cid, ref=None, extra_args=None):
|
||||
if is_cid(cid):
|
||||
return {"cid": cid}
|
||||
|
||||
def fetch(self, spec, output_dir, yield_output=False):
|
||||
"""Fetch and unpack directory tree behind a CID"""
|
||||
cid = spec["cid"]
|
||||
|
||||
for gateway in self.gateways:
|
||||
yield "Fetching CID {} via {}.\n".format(cid, gateway)
|
||||
# the following url may change once ?format=tar
|
||||
# is implemented on the gateway
|
||||
# see also: https://github.com/ipfs/go-ipfs/issues/8234
|
||||
try:
|
||||
resp = requests.get(
|
||||
"{}/api/v0/get?arg={}".format(gateway, cid),
|
||||
)
|
||||
except requests.ConnectionError:
|
||||
yield "could not connect to gateway {}\n".format(gateway)
|
||||
continue
|
||||
|
||||
if resp.ok:
|
||||
# this trick is from https://stackoverflow.com/a/43094365
|
||||
# and get's rid of the root folder in the tar which is named
|
||||
# after the requested CID
|
||||
def members(tf):
|
||||
subfolder = "{}/".format(cid)
|
||||
subfolder_len = len(subfolder)
|
||||
for member in tf.getmembers():
|
||||
if member.path.startswith(subfolder):
|
||||
member.path = member.path[subfolder_len:]
|
||||
yield member
|
||||
|
||||
tar = TarFile(fileobj=BytesIO(resp.content))
|
||||
tar.extractall(output_dir, members=members(tar))
|
||||
break
|
||||
else:
|
||||
yield "could not get CID via {}: {}\n".format(gateway, resp.status_code)
|
||||
else:
|
||||
raise ContentProviderException("could not find any working IPFS gateway")
|
||||
self._cid = cid
|
||||
|
||||
@property
|
||||
def content_id(self):
|
||||
"""
|
||||
On IPFS, the content identifier (CID) is a hash
|
||||
of all of the referenced contents. Thus the CID
|
||||
is a good content_id :-)
|
||||
"""
|
||||
return self._cid
|
|
@ -39,3 +39,6 @@
|
|||
url: https://github.com/binderhub-ci-repos/lfs
|
||||
ref: 9abf54a
|
||||
verify: grep "I am stored in git lfs" in-lfs.dat
|
||||
- name: Binder Examples - Requirements on IPFS
|
||||
url: QmPjPUTcXeiEdNUMEPusP4rnJNz2YPw1XrYQkp43C96DyS
|
||||
verify: python -c 'import matplotlib'
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
import pytest
|
||||
|
||||
from repo2docker.contentproviders import IPFS
|
||||
|
||||
valid_cids = [
|
||||
"QmYjtig7VJQ6XsnUjqqJvj7QaMcCAwtrgNdahSiFofrE7o",
|
||||
"bafkreidon73zkcrwdb5iafqtijxildoonbwnpv7dyd6ef3qdgads2jc4su",
|
||||
"bafybeiasb5vpmaounyilfuxbd3lryvosl4yefqrfahsb2esg46q6tu6y5q",
|
||||
"zdj7WWeQ43G6JJvLWQWZpyHuAMq6uYWRjkBXFad11vE2LHhQ7",
|
||||
]
|
||||
|
||||
not_cids = [
|
||||
"https://github.com/multiformats/cid",
|
||||
"noop",
|
||||
"https://doi.org/10.5281/zenodo.3232985",
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("cid", valid_cids)
|
||||
def test_detect_ipfs_on_valid_cid(cid):
|
||||
assert IPFS().detect(cid) == {"cid": cid}
|
||||
|
||||
|
||||
@pytest.mark.parametrize("no_cid", not_cids)
|
||||
def test_dont_detect_ipfs_on_no_cid(no_cid):
|
||||
assert IPFS().detect(no_cid) is None
|
Ładowanie…
Reference in New Issue