Reformatting and better tests

pull/704/head
Tom Morrell 2019-06-20 13:22:17 -07:00
rodzic 6d785404fb
commit 93147888c5
4 zmienionych plików z 100 dodań i 63 usunięć

Wyświetl plik

@ -1,15 +1,15 @@
import os
import json
import shutil
import copy
from os import makedirs
from os import path
from urllib.request import build_opener, urlopen, Request
from urllib.error import HTTPError
from zipfile import ZipFile, is_zipfile
from .base import ContentProvider
from ..utils import copytree
from ..utils import copytree, deep_get
from ..utils import normalize_doi, is_doi
from .. import __version__
@ -36,35 +36,21 @@ class Zenodo(ContentProvider):
if is_doi(doi):
doi = normalize_doi(doi)
resp = self._urlopen("https://doi.org/{}".format(doi))
try:
resp = self._urlopen("https://doi.org/{}".format(doi))
# If the DOI doesn't resolve, just return URL
except HTTPError:
return doi
return resp.url
else:
# Just return what is actulally just a URL
return doi
def _getfromdict(self, datadict, dotpath):
# Use a dotpath (string separated by periods)
# to access vaules in a dictionary
# data.files.0 returns value at dataDict[data][files][0]
split = dotpath.split(".")
# We check if we have any digits and convert these to
# ints for list access
mapList = []
for s in split:
if s.isdigit():
mapList.append(int(s))
else:
mapList.append(s)
values = copy.deepcopy(datadict)
for k in mapList:
values = values[k]
return values
def detect(self, doi, ref=None, extra_args=None):
"""Trigger this provider for things that resolve to a Zenodo/Invenio record"""
# We need the hostname (url where records are), api url (for metadata),
# filepath (path to files in metadata), filename (path to filename in
# metadata), type (path to type in metadata)
# metadata), download (path to file download URL), and type (path to item type in metadata)
hosts = [
{
"hostname": ["https://zenodo.org/record/", "http://zenodo.org/record/"],
@ -111,8 +97,8 @@ class Zenodo(ContentProvider):
def _fetch(file_ref, unzip=False):
# the assumption is that `unzip=True` means that this is the only
# file related to the zenodo record
with self._urlopen(self._getfromdict(file_ref, host["download"])) as src:
fname = self._getfromdict(file_ref, host["filename"])
with self._urlopen(deep_get(file_ref, host["download"])) as src:
fname = deep_get(file_ref, host["filename"])
if path.dirname(fname):
sub_dir = path.join(output_dir, path.dirname(fname))
if not path.exists(sub_dir):
@ -146,11 +132,8 @@ class Zenodo(ContentProvider):
copytree(path.join(output_dir, d), output_dir)
shutil.rmtree(path.join(output_dir, d))
is_software = self._getfromdict(record, host["type"]).lower() == "software"
files = self._getfromdict(record, host["filepath"])
#
is_software = deep_get(record, host["type"]).lower() == "software"
files = deep_get(record, host["filepath"])
only_one_file = len(files) == 1
for file_ref in files:
for line in _fetch(file_ref, unzip=is_software and only_one_file):

Wyświetl plik

@ -393,22 +393,43 @@ def copytree(
return dst
# Code segments below from idutils (https://github.com/inveniosoftware/idutils)
def deep_get(dikt, path):
"""Get a value located in `path` from a nested dictionary.
Use a string separated by periods as the path to access
values in a nested dictionary:
deep_get(data, "data.files.0") == data["data"]["files"][0]
"""
value = dikt
for component in path.split("."):
if component.isdigit():
value = value[int(component)]
else:
value = value[component]
return value
# doi_regexp, is_doi, and normalize_doi are from idutils (https://github.com/inveniosoftware/idutils)
# Copyright (C) 2015-2018 CERN.
# Copyright (C) 2018 Alan Rubin.
# Licensed under BSD-3-Clause license
doi_regexp = re.compile(
"(doi:\s*|(?:https?://)?(?:dx\.)?doi\.org/)?(10\.\d+(.\d+)*/.+)$", flags=re.I
)
"""See http://en.wikipedia.org/wiki/Digital_object_identifier."""
def is_doi(val):
"""Test if argument is a DOI."""
"""Returns None if val doesn't match pattern of a DOI.
http://en.wikipedia.org/wiki/Digital_object_identifier."""
print(type(val))
print(val)
return doi_regexp.match(val)
def normalize_doi(val):
"""Normalize a DOI."""
"""Return just the DOI (e.g. 10.1234/jshd123)
from a val that could include a url or doi
(e.g. https://doi.org/10.1234/jshd123)"""
m = doi_regexp.match(val)
return m.group(2)

Wyświetl plik

@ -1,5 +1,6 @@
import json
import os
import pytest
from contextlib import contextmanager
from io import BytesIO
@ -20,11 +21,14 @@ def test_content_id():
assert zen.content_id == "3232985"
def test_detect():
with patch.object(Zenodo, "_urlopen") as fake_urlopen:
fake_urlopen.return_value.url = "https://zenodo.org/record/3232985"
# valid Zenodo DOIs trigger this content provider
assert Zenodo().detect("10.5281/zenodo.3232985") == {
test_hosts = [
(
[
"https://zenodo.org/record/3232985",
"10.5281/zenodo.3232985",
"https://doi.org/10.5281/zenodo.3232985",
],
{
"host": {
"hostname": ["https://zenodo.org/record/", "http://zenodo.org/record/"],
"api": "https://zenodo.org/api/records/",
@ -34,22 +38,15 @@ def test_detect():
"type": "metadata.upload_type",
},
"record": "3232985",
}
assert (
Zenodo().detect("https://doi.org/10.5281/zenodo.3232985")["record"]
== "3232985"
)
assert (
Zenodo().detect("https://zenodo.org/record/3232985")["record"] == "3232985"
)
# only two of the three calls above have to resolve a DOI
assert fake_urlopen.call_count == 2
with patch.object(Zenodo, "_urlopen") as fake_urlopen:
fake_urlopen.return_value.url = "https://data.caltech.edu/records/1235"
# valid CaltechDATA DOIs trigger this content provider
assert Zenodo().detect("10.22002/d1.1235") == {
},
),
(
[
"https://data.caltech.edu/records/1235",
"10.22002/d1.1235",
"https://doi.org/10.22002/d1.1235",
],
{
"host": {
"hostname": [
"https://data.caltech.edu/records/",
@ -62,10 +59,19 @@ def test_detect():
"type": "metadata.resourceType.resourceTypeGeneral",
},
"record": "1235",
}
assert Zenodo().detect("https://doi.org/10.22002/d1.1235")["record"] == "1235"
assert Zenodo().detect("https://data.caltech.edu/records/1235")["record"] == "1235"
},
),
]
@pytest.mark.parametrize("test_input,expected", test_hosts)
def test_detect_zenodo(test_input, expected):
with patch.object(Zenodo, "_urlopen") as fake_urlopen:
fake_urlopen.return_value.url = test_input[0]
# valid Zenodo DOIs trigger this content provider
assert Zenodo().detect(test_input[0]) == expected
assert Zenodo().detect(test_input[1]) == expected
assert Zenodo().detect(test_input[2]) == expected
# only two of the three calls above have to resolve a DOI
assert fake_urlopen.call_count == 2
@ -73,13 +79,12 @@ def test_detect():
# Don't trigger the Zenodo content provider
assert Zenodo().detect("/some/path/here") is None
assert Zenodo().detect("https://example.com/path/here") is None
# donn't handle DOIs that aren't from Zenodo
# don't handle DOIs that aren't from Zenodo
fake_urlopen.return_value.url = (
"http://joss.theoj.org/papers/10.21105/joss.01277"
)
assert Zenodo().detect("https://doi.org/10.21105/joss.01277") is None
# none of the examples are Zenodo like, so we should not attempt to
# resolve a DOI either
assert not fake_urlopen.called
@contextmanager
def zenodo_archive(prefix="a_directory"):

Wyświetl plik

@ -82,3 +82,31 @@ def test_invalid_port_mapping(port_spec):
utils.validate_and_generate_port_mapping([port_spec])
assert 'Port specification "{}"'.format(port_spec) in str(e.value)
def test_deep_get():
data = {"data": {"files": [1, 2, 3]}}
assert utils.deep_get(data, "data.files.0") == 1
assert utils.deep_get(data, "data.files.1") == 2
assert utils.deep_get(data, "data.files") == [1, 2, 3]
assert utils.deep_get(data, "data") == {"files": [1, 2, 3]}
def test_is_doi():
assert utils.is_doi("10.1234/jshd123") != None
assert utils.is_doi("10.1234/JSHD.8192") != None
assert utils.is_doi("doi.org/10.1234/jshd123") != None
assert utils.is_doi("http://doi.org/10.1234/jshd123") != None
assert utils.is_doi("https://doi.org/10.1234/jshd123") != None
assert utils.is_doi("http://dx.doi.org/10.1234/jshd123") != None
assert utils.is_doi("101234/jshd123") == None
assert utils.is_doi("https://mybinder.org") == None
def test_normalize_doi():
assert utils.normalize_doi("10.1234/jshd123") == "10.1234/jshd123"
assert utils.normalize_doi("10.1234/JSHD.8192") == "10.1234/JSHD.8192"
assert utils.normalize_doi("doi.org/10.1234/jshd123") == "10.1234/jshd123"
assert utils.normalize_doi("http://doi.org/10.1234/jshd123") == "10.1234/jshd123"
assert utils.normalize_doi("https://doi.org/10.1234/jshd123") == "10.1234/jshd123"
assert utils.normalize_doi("http://dx.doi.org/10.1234/jshd123") == "10.1234/jshd123"