pull/1098/merge
Tobias Kölling 2022-10-30 20:38:55 +01:00 zatwierdzone przez GitHub
commit 827ee628e3
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
7 zmienionych plików z 125 dodań i 1 usunięć

Wyświetl plik

@ -5,7 +5,7 @@ jupyter-repo2docker
images from source code repositories**.
``repo2docker`` fetches a repository
(from GitHub, GitLab, Zenodo, Figshare, Dataverse installations, a Git repository or a local directory)
(from GitHub, GitLab, Zenodo, Figshare, Dataverse installations, a Git repository, an IPFS CID or a local directory)
and builds a container image in which the code can be executed.
The image build process is based on the configuration files found in the repository.

Wyświetl plik

@ -16,6 +16,7 @@ a `DOI <https://en.wikipedia.org/wiki/Digital_object_identifier>`_ from Zenodo
a `Handle <https://en.wikipedia.org/wiki/Handle_System>`_ or DOI from a Dataverse installation,
a `SWHID`_ of a directory of a revision archived in the
`Software Heritage Archive <https://archive.softwareheritage.org>`_,
a `CID`_ from a folder on `IPFS <https://ipfs.io>`_
or a path to a local directory.
It then performs these steps:
@ -40,6 +41,7 @@ where ``<source-repository>`` is:
* a URL of a Git repository (``https://github.com/binder-examples/requirements``),
* a Zenodo DOI (``10.5281/zenodo.1211089``),
* a SWHID_ (``swh:1:rev:999dd06c7f679a2714dfe5199bdca09522a29649``), or
* a CID_ (``QmPjPUTcXeiEdNUMEPusP4rnJNz2YPw1XrYQkp43C96DyS``), or
* a path to a local directory (``a/local/directory``)
of the source repository you want to build.
@ -136,3 +138,4 @@ Command line API
.. _Pytudes: https://github.com/norvig/pytudes
.. _SWHID: https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html
.. _CID: https://docs.ipfs.io/concepts/content-addressing/

Wyświetl plik

@ -152,6 +152,7 @@ class Repo2Docker(Application):
contentproviders.Dataverse,
contentproviders.Hydroshare,
contentproviders.Swhid,
contentproviders.IPFS,
contentproviders.Mercurial,
contentproviders.Git,
],

Wyświetl plik

@ -6,3 +6,4 @@ from .dataverse import Dataverse
from .hydroshare import Hydroshare
from .mercurial import Mercurial
from .swhid import Swhid
from .ipfs import IPFS

Wyświetl plik

@ -0,0 +1,90 @@
import re
from tarfile import TarFile
from io import BytesIO
import requests
from .base import ContentProvider, ContentProviderException
# testing well-formedness of CID is not trivial, to do it
# properly, one should use py-cid, which can decode all CIDS
# that library however has a bunch of dependencies, so for now
# we'll go with a reged-based approximation
# this regex follows https://stackoverflow.com/a/67176726
RE_CID = re.compile(
"Qm[1-9A-HJ-NP-Za-km-z]{44,}|"
"b[A-Za-z2-7]{58,}|"
"B[A-Z2-7]{58,}|"
"z[1-9A-HJ-NP-Za-km-z]{48,}|"
"F[0-9A-F]{50,}"
)
def is_cid(s):
return bool(RE_CID.match(s))
class IPFS(ContentProvider):
"""Provide contents of an IPFS CID."""
def __init__(self):
super().__init__()
self.gateways = [
"http://127.0.0.1:8080",
"https://ipfs.io",
"https://dweb.link",
"https://gateway.pinata.cloud",
"https://cloudflare-ipfs.com",
"https://ipfs.fleek.co",
]
def detect(self, cid, ref=None, extra_args=None):
if is_cid(cid):
return {"cid": cid}
def fetch(self, spec, output_dir, yield_output=False):
"""Fetch and unpack directory tree behind a CID"""
cid = spec["cid"]
for gateway in self.gateways:
yield "Fetching CID {} via {}.\n".format(cid, gateway)
# the following url may change once ?format=tar
# is implemented on the gateway
# see also: https://github.com/ipfs/go-ipfs/issues/8234
try:
resp = requests.get(
"{}/api/v0/get?arg={}".format(gateway, cid),
)
except requests.ConnectionError:
yield "could not connect to gateway {}\n".format(gateway)
continue
if resp.ok:
# this trick is from https://stackoverflow.com/a/43094365
# and get's rid of the root folder in the tar which is named
# after the requested CID
def members(tf):
subfolder = "{}/".format(cid)
subfolder_len = len(subfolder)
for member in tf.getmembers():
if member.path.startswith(subfolder):
member.path = member.path[subfolder_len:]
yield member
tar = TarFile(fileobj=BytesIO(resp.content))
tar.extractall(output_dir, members=members(tar))
break
else:
yield "could not get CID via {}: {}\n".format(gateway, resp.status_code)
else:
raise ContentProviderException("could not find any working IPFS gateway")
self._cid = cid
@property
def content_id(self):
"""
On IPFS, the content identifier (CID) is a hash
of all of the referenced contents. Thus the CID
is a good content_id :-)
"""
return self._cid

Wyświetl plik

@ -39,3 +39,6 @@
url: https://github.com/binderhub-ci-repos/lfs
ref: 9abf54a
verify: grep "I am stored in git lfs" in-lfs.dat
- name: Binder Examples - Requirements on IPFS
url: QmPjPUTcXeiEdNUMEPusP4rnJNz2YPw1XrYQkp43C96DyS
verify: python -c 'import matplotlib'

Wyświetl plik

@ -0,0 +1,26 @@
import pytest
from repo2docker.contentproviders import IPFS
valid_cids = [
"QmYjtig7VJQ6XsnUjqqJvj7QaMcCAwtrgNdahSiFofrE7o",
"bafkreidon73zkcrwdb5iafqtijxildoonbwnpv7dyd6ef3qdgads2jc4su",
"bafybeiasb5vpmaounyilfuxbd3lryvosl4yefqrfahsb2esg46q6tu6y5q",
"zdj7WWeQ43G6JJvLWQWZpyHuAMq6uYWRjkBXFad11vE2LHhQ7",
]
not_cids = [
"https://github.com/multiformats/cid",
"noop",
"https://doi.org/10.5281/zenodo.3232985",
]
@pytest.mark.parametrize("cid", valid_cids)
def test_detect_ipfs_on_valid_cid(cid):
assert IPFS().detect(cid) == {"cid": cid}
@pytest.mark.parametrize("no_cid", not_cids)
def test_dont_detect_ipfs_on_no_cid(no_cid):
assert IPFS().detect(no_cid) is None