Reformatting and better tests

2019-06-20 13:22:17 -07:00 · 2019-06-20 13:22:17 -07:00 · 93147888c5
commit 93147888c5
--- a/repo2docker/contentproviders/zenodo.py
+++ b/repo2docker/contentproviders/zenodo.py
@ -1,15 +1,15 @@
 import os
 import json
 import shutil
-import copy

 from os import makedirs
 from os import path
 from urllib.request import build_opener, urlopen, Request
+from urllib.error import HTTPError
 from zipfile import ZipFile, is_zipfile

 from .base import ContentProvider
-from ..utils import copytree
+from ..utils import copytree, deep_get
 from ..utils import normalize_doi, is_doi
 from .. import __version__

@ -36,35 +36,21 @@ class Zenodo(ContentProvider):
        if is_doi(doi):
            doi = normalize_doi(doi)

-            resp = self._urlopen("https://doi.org/{}".format(doi))
+            try:
+                resp = self._urlopen("https://doi.org/{}".format(doi))
+            # If the DOI doesn't resolve, just return URL
+            except HTTPError:
+                return doi
            return resp.url
        else:
+            # Just return what is actulally just a URL
            return doi

-    def _getfromdict(self, datadict, dotpath):
-        # Use a dotpath (string separated by periods)
-        # to access vaules in a dictionary
-        # data.files.0 returns value at dataDict[data][files][0]
-        split = dotpath.split(".")
-        # We check if we have any digits and convert these to
-        # ints for list access
-        mapList = []
-        for s in split:
-            if s.isdigit():
-                mapList.append(int(s))
-            else:
-                mapList.append(s)
-        values = copy.deepcopy(datadict)
-        for k in mapList:
-            values = values[k]
-        return values
-
    def detect(self, doi, ref=None, extra_args=None):
        """Trigger this provider for things that resolve to a Zenodo/Invenio record"""
        # We need the hostname (url where records are), api url (for metadata),
        # filepath (path to files in metadata), filename (path to filename in
-        # metadata), type (path to type in metadata)
-
+        # metadata), download (path to file download URL), and type (path to item type in metadata)
        hosts = [
            {
                "hostname": ["https://zenodo.org/record/", "http://zenodo.org/record/"],
@ -111,8 +97,8 @@ class Zenodo(ContentProvider):
        def _fetch(file_ref, unzip=False):
            # the assumption is that `unzip=True` means that this is the only
            # file related to the zenodo record
-            with self._urlopen(self._getfromdict(file_ref, host["download"])) as src:
-                fname = self._getfromdict(file_ref, host["filename"])
+            with self._urlopen(deep_get(file_ref, host["download"])) as src:
+                fname = deep_get(file_ref, host["filename"])
                if path.dirname(fname):
                    sub_dir = path.join(output_dir, path.dirname(fname))
                    if not path.exists(sub_dir):
@ -146,11 +132,8 @@ class Zenodo(ContentProvider):
                        copytree(path.join(output_dir, d), output_dir)
                        shutil.rmtree(path.join(output_dir, d))

-        is_software = self._getfromdict(record, host["type"]).lower() == "software"
-        files = self._getfromdict(record, host["filepath"])
-
-        #
-
+        is_software = deep_get(record, host["type"]).lower() == "software"
+        files = deep_get(record, host["filepath"])
        only_one_file = len(files) == 1
        for file_ref in files:
            for line in _fetch(file_ref, unzip=is_software and only_one_file):
--- a/repo2docker/utils.py
+++ b/repo2docker/utils.py
@ -393,22 +393,43 @@ def copytree(
    return dst


-# Code segments below from idutils (https://github.com/inveniosoftware/idutils)
+def deep_get(dikt, path):
+    """Get a value located in `path` from a nested dictionary.
+
+    Use a string separated by periods as the path to access
+    values in a nested dictionary:
+
+    deep_get(data, "data.files.0") == data["data"]["files"][0]
+    """
+    value = dikt
+    for component in path.split("."):
+        if component.isdigit():
+            value = value[int(component)]
+        else:
+            value = value[component]
+    return value
+
+
+# doi_regexp, is_doi, and normalize_doi are from idutils (https://github.com/inveniosoftware/idutils)
 # Copyright (C) 2015-2018 CERN.
 # Copyright (C) 2018 Alan Rubin.
 # Licensed under BSD-3-Clause license
 doi_regexp = re.compile(
    "(doi:\s*|(?:https?://)?(?:dx\.)?doi\.org/)?(10\.\d+(.\d+)*/.+)$", flags=re.I
 )
-"""See http://en.wikipedia.org/wiki/Digital_object_identifier."""


 def is_doi(val):
-    """Test if argument is a DOI."""
+    """Returns None if val doesn't match pattern of a DOI.
+    http://en.wikipedia.org/wiki/Digital_object_identifier."""
+    print(type(val))
+    print(val)
    return doi_regexp.match(val)


 def normalize_doi(val):
-    """Normalize a DOI."""
+    """Return just the DOI (e.g. 10.1234/jshd123)
+    from a val that could include a url or doi 
+    (e.g. https://doi.org/10.1234/jshd123)"""
    m = doi_regexp.match(val)
    return m.group(2)
--- a/tests/unit/contentproviders/test_zenodo.py
+++ b/tests/unit/contentproviders/test_zenodo.py
@ -1,5 +1,6 @@
 import json
 import os
+import pytest

 from contextlib import contextmanager
 from io import BytesIO
@ -20,11 +21,14 @@ def test_content_id():
        assert zen.content_id == "3232985"


-def test_detect():
-    with patch.object(Zenodo, "_urlopen") as fake_urlopen:
-        fake_urlopen.return_value.url = "https://zenodo.org/record/3232985"
-        # valid Zenodo DOIs trigger this content provider
-        assert Zenodo().detect("10.5281/zenodo.3232985") == {
+test_hosts = [
+    (
+        [
+            "https://zenodo.org/record/3232985",
+            "10.5281/zenodo.3232985",
+            "https://doi.org/10.5281/zenodo.3232985",
+        ],
+        {
            "host": {
                "hostname": ["https://zenodo.org/record/", "http://zenodo.org/record/"],
                "api": "https://zenodo.org/api/records/",
@ -34,22 +38,15 @@ def test_detect():
                "type": "metadata.upload_type",
            },
            "record": "3232985",
-        }
-        assert (
-            Zenodo().detect("https://doi.org/10.5281/zenodo.3232985")["record"]
-            == "3232985"
-        )
-        assert (
-            Zenodo().detect("https://zenodo.org/record/3232985")["record"] == "3232985"
-        )
-
-        # only two of the three calls above have to resolve a DOI
-        assert fake_urlopen.call_count == 2
-
-    with patch.object(Zenodo, "_urlopen") as fake_urlopen:
-        fake_urlopen.return_value.url = "https://data.caltech.edu/records/1235"
-        # valid CaltechDATA DOIs trigger this content provider
-        assert Zenodo().detect("10.22002/d1.1235") == {
+        },
+    ),
+    (
+        [
+            "https://data.caltech.edu/records/1235",
+            "10.22002/d1.1235",
+            "https://doi.org/10.22002/d1.1235",
+        ],
+        {
            "host": {
                "hostname": [
                    "https://data.caltech.edu/records/",
@ -62,10 +59,19 @@ def test_detect():
                "type": "metadata.resourceType.resourceTypeGeneral",
            },
            "record": "1235",
-        }
-        assert Zenodo().detect("https://doi.org/10.22002/d1.1235")["record"] == "1235"
-        assert Zenodo().detect("https://data.caltech.edu/records/1235")["record"] == "1235"
+        },
+    ),
+]

+
+@pytest.mark.parametrize("test_input,expected", test_hosts)
+def test_detect_zenodo(test_input, expected):
+    with patch.object(Zenodo, "_urlopen") as fake_urlopen:
+        fake_urlopen.return_value.url = test_input[0]
+        # valid Zenodo DOIs trigger this content provider
+        assert Zenodo().detect(test_input[0]) == expected
+        assert Zenodo().detect(test_input[1]) == expected
+        assert Zenodo().detect(test_input[2]) == expected
        # only two of the three calls above have to resolve a DOI
        assert fake_urlopen.call_count == 2

@ -73,13 +79,12 @@ def test_detect():
        # Don't trigger the Zenodo content provider
        assert Zenodo().detect("/some/path/here") is None
        assert Zenodo().detect("https://example.com/path/here") is None
-        # donn't handle DOIs that aren't from Zenodo
+        # don't handle DOIs that aren't from Zenodo
+        fake_urlopen.return_value.url = (
+            "http://joss.theoj.org/papers/10.21105/joss.01277"
+        )
        assert Zenodo().detect("https://doi.org/10.21105/joss.01277") is None

-        # none of the examples are Zenodo like, so we should not attempt to
-        # resolve a DOI either
-        assert not fake_urlopen.called
-

@contextmanager
 def zenodo_archive(prefix="a_directory"):
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@ -82,3 +82,31 @@ def test_invalid_port_mapping(port_spec):
        utils.validate_and_generate_port_mapping([port_spec])

    assert 'Port specification "{}"'.format(port_spec) in str(e.value)
+
+
+def test_deep_get():
+    data = {"data": {"files": [1, 2, 3]}}
+    assert utils.deep_get(data, "data.files.0") == 1
+    assert utils.deep_get(data, "data.files.1") == 2
+    assert utils.deep_get(data, "data.files") == [1, 2, 3]
+    assert utils.deep_get(data, "data") == {"files": [1, 2, 3]}
+
+
+def test_is_doi():
+    assert utils.is_doi("10.1234/jshd123") != None
+    assert utils.is_doi("10.1234/JSHD.8192") != None
+    assert utils.is_doi("doi.org/10.1234/jshd123") != None
+    assert utils.is_doi("http://doi.org/10.1234/jshd123") != None
+    assert utils.is_doi("https://doi.org/10.1234/jshd123") != None
+    assert utils.is_doi("http://dx.doi.org/10.1234/jshd123") != None
+    assert utils.is_doi("101234/jshd123") == None
+    assert utils.is_doi("https://mybinder.org") == None
+
+
+def test_normalize_doi():
+    assert utils.normalize_doi("10.1234/jshd123") == "10.1234/jshd123"
+    assert utils.normalize_doi("10.1234/JSHD.8192") == "10.1234/JSHD.8192"
+    assert utils.normalize_doi("doi.org/10.1234/jshd123") == "10.1234/jshd123"
+    assert utils.normalize_doi("http://doi.org/10.1234/jshd123") == "10.1234/jshd123"
+    assert utils.normalize_doi("https://doi.org/10.1234/jshd123") == "10.1234/jshd123"
+    assert utils.normalize_doi("http://dx.doi.org/10.1234/jshd123") == "10.1234/jshd123"