kopia lustrzana https://gitlab.com/marnanel/chapeau
find() creates Things marked as remote, rather than some weird
half-arsed replacement of its own. python-mimeparse added to the requirements because fetch() needs it.2019-08-17
rodzic
962e85c0f8
commit
1698ccc357
|
@ -5,133 +5,22 @@ from django.conf import settings
|
||||||
import django.urls
|
import django.urls
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from django.http.request import HttpRequest
|
from django.http.request import HttpRequest
|
||||||
|
from django_kepi.create import create
|
||||||
|
import datetime
|
||||||
import json
|
import json
|
||||||
|
import mimeparse
|
||||||
|
|
||||||
logger = logging.getLogger(name='django_kepi')
|
logger = logging.getLogger(name='django_kepi')
|
||||||
|
|
||||||
class RemoteItem(dict):
|
class Fetch(models.Model):
|
||||||
def __init__(self, body):
|
|
||||||
self.update(json.loads(str(body)))
|
|
||||||
|
|
||||||
class CachedRemoteText(models.Model):
|
url = models.URLField(
|
||||||
|
|
||||||
address = models.URLField(
|
|
||||||
primary_key = True,
|
primary_key = True,
|
||||||
)
|
)
|
||||||
|
|
||||||
content = models.TextField(
|
date = models.DateTimeField(
|
||||||
default = None,
|
default = datetime.datetime.now,
|
||||||
null = True,
|
|
||||||
)
|
)
|
||||||
# XXX We should probably also have a cache timeout
|
|
||||||
|
|
||||||
def is_gone(self):
|
|
||||||
return self.content is None
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
if self.content is not None:
|
|
||||||
return self.content
|
|
||||||
else:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
if self.content is not None:
|
|
||||||
return '(%s: "%s")' % (self.address, self.content[:20])
|
|
||||||
else:
|
|
||||||
return '(%s is GONE)' % (self.address)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def fetch(cls,
|
|
||||||
fetch_url,
|
|
||||||
post_data):
|
|
||||||
"""
|
|
||||||
Fetch a file over HTTPS (and other protocols).
|
|
||||||
This function blocks; don't call it while
|
|
||||||
serving a request.
|
|
||||||
|
|
||||||
fetch_url: the URL of the file you want.
|
|
||||||
FIXME: What happens if fetch_url is local?
|
|
||||||
|
|
||||||
post_data: If this is a dict, then the request
|
|
||||||
will be a POST, with the contents of
|
|
||||||
that dict as parameters to the remote server.
|
|
||||||
If this is None, then the request will
|
|
||||||
be a GET.
|
|
||||||
|
|
||||||
Returns: None, if post_data was a dict.
|
|
||||||
If post_data was None, returns a CachedRemoteText.
|
|
||||||
If fetch_url existed in the cache, this will be the cached
|
|
||||||
record; otherwise it will be a new record, which
|
|
||||||
has already been saved.
|
|
||||||
|
|
||||||
If the request was not successful, the is_gone()
|
|
||||||
method of the returned CachedRemoteText will return True.
|
|
||||||
All error codes, including notably 404, 410, and 500,
|
|
||||||
are handled alike. (Is there any reason not to do this?)
|
|
||||||
|
|
||||||
FIXME: What does it do if the request returned a redirect?
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
if post_data is None:
|
|
||||||
|
|
||||||
# This is a GET, so the answer might be cached.
|
|
||||||
# (FIXME: honour HTTP caching headers etc)
|
|
||||||
|
|
||||||
try:
|
|
||||||
existing = cls.objects.get(address=fetch_url)
|
|
||||||
except cls.DoesNotExist:
|
|
||||||
existing = None
|
|
||||||
|
|
||||||
if existing is not None:
|
|
||||||
logger.info('fetch %s: in cache', fetch_url)
|
|
||||||
|
|
||||||
if existing is not None:
|
|
||||||
return RemoteItem(existing)
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
logger.info('fetch %s: GET', fetch_url)
|
|
||||||
fetch = requests.get(fetch_url)
|
|
||||||
|
|
||||||
else:
|
|
||||||
logger.info('fetch %s: POST', fetch_url)
|
|
||||||
logger.debug('fetch %s: with data: %s',
|
|
||||||
fetch_url, post_data)
|
|
||||||
|
|
||||||
fetch = requests.post(fetch_url,
|
|
||||||
data=post_data)
|
|
||||||
|
|
||||||
logger.info('fetch %s: response code was %d',
|
|
||||||
fetch_url, fetch.status_code)
|
|
||||||
logger.debug('fetch %s: body was %s',
|
|
||||||
fetch_url, fetch.text)
|
|
||||||
|
|
||||||
if post_data is not None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# This was a GET, so cache it
|
|
||||||
# (FIXME: honour HTTP caching headers etc)
|
|
||||||
# XXX: race condition: catch duplicate entry exception and ignore
|
|
||||||
|
|
||||||
if fetch.status_code==200:
|
|
||||||
content = fetch.text
|
|
||||||
else:
|
|
||||||
content = ''
|
|
||||||
|
|
||||||
result = cls(
|
|
||||||
address = fetch_url,
|
|
||||||
content = content,
|
|
||||||
)
|
|
||||||
result.save()
|
|
||||||
|
|
||||||
if content!='':
|
|
||||||
return RemoteItem(content)
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _obviously_belongs_to(self, actor):
|
|
||||||
return self.address.startswith(actor+'#')
|
|
||||||
|
|
||||||
class ThingRequest(HttpRequest):
|
class ThingRequest(HttpRequest):
|
||||||
|
|
||||||
|
@ -174,11 +63,89 @@ def find_local(path):
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def find_remote(url):
|
def find_remote(url):
|
||||||
|
|
||||||
logger.debug('%s: find remote', url)
|
logger.debug('%s: find remote', url)
|
||||||
|
|
||||||
result = CachedRemoteText.fetch(
|
try:
|
||||||
fetch_url=url,
|
fetch = Fetch.objects.get(
|
||||||
post_data=None,
|
url=url,
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO: cache timeouts.
|
||||||
|
# FIXME: honour cache headers etc
|
||||||
|
|
||||||
|
# We fetched it in the past.
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = Thing.objects.get(
|
||||||
|
remote_url = url,
|
||||||
|
)
|
||||||
|
logger.debug('%s: already fetched, and it\'s %s',
|
||||||
|
url, result)
|
||||||
|
|
||||||
|
return result
|
||||||
|
except Thing.DoesNotExist:
|
||||||
|
logger.debug('%s: already fetched, and it wasn\'t there',
|
||||||
|
url, result)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Fetch.DoesNotExist:
|
||||||
|
# We haven't fetched it before.
|
||||||
|
# So we need to fetch it now.
|
||||||
|
pass
|
||||||
|
|
||||||
|
logger.info('%s: performing the GET', url)
|
||||||
|
response = requests.get(url,
|
||||||
|
headers={'Accept': 'application/activity+json'},
|
||||||
|
)
|
||||||
|
|
||||||
|
fetch_record = Fetch(url=url)
|
||||||
|
fetch_record.save()
|
||||||
|
|
||||||
|
if response.status_code!=200:
|
||||||
|
logger.warn('%s: remote server responded %s %s' % (
|
||||||
|
response.status_code, response.reason))
|
||||||
|
return None
|
||||||
|
|
||||||
|
mime_type = mimeparse.parse_mime_type(
|
||||||
|
response.headers['Content-Type'])
|
||||||
|
mime_type = '/'.join(mime_type[0:2])
|
||||||
|
|
||||||
|
if mime_type not in [
|
||||||
|
'application/activity+json',
|
||||||
|
'application/json',
|
||||||
|
'text/json',
|
||||||
|
'text/plain',
|
||||||
|
]:
|
||||||
|
logger.warn('%s: response had the wrong Content-Type, %s' % (
|
||||||
|
url, response.headers['Content-Type'],
|
||||||
|
))
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
content = json.loads(response.text)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.warn('%s: response was not JSON' % (
|
||||||
|
url,
|
||||||
|
))
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not isinstance(content, dict):
|
||||||
|
logger.warn('%s: response was not a JSON dict' % (
|
||||||
|
url,
|
||||||
|
))
|
||||||
|
return None
|
||||||
|
|
||||||
|
content_with_f = dict([
|
||||||
|
('f_'+f, v)
|
||||||
|
for f, v in content.items()
|
||||||
|
if not f.startswith('@')
|
||||||
|
])
|
||||||
|
|
||||||
|
result = create(
|
||||||
|
is_local = False,
|
||||||
|
**content_with_f,
|
||||||
)
|
)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
@ -188,7 +155,8 @@ def is_local(url):
|
||||||
return parsed_url.hostname in settings.ALLOWED_HOSTS
|
return parsed_url.hostname in settings.ALLOWED_HOSTS
|
||||||
|
|
||||||
def find(url,
|
def find(url,
|
||||||
local_only=False):
|
local_only=False,
|
||||||
|
lightweight_for=None):
|
||||||
"""
|
"""
|
||||||
Finds an object.
|
Finds an object.
|
||||||
|
|
||||||
|
@ -221,4 +189,5 @@ def find(url,
|
||||||
if local_only:
|
if local_only:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return find_remote(url)
|
return find_remote(
|
||||||
|
url=url)
|
||||||
|
|
|
@ -9,3 +9,4 @@ httpretty
|
||||||
httpsig
|
httpsig
|
||||||
django-celery-results
|
django-celery-results
|
||||||
django-polymorphic
|
django-polymorphic
|
||||||
|
python-mimeparse
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
from django_kepi.find import find
|
from django_kepi.find import find
|
||||||
from django_kepi.models import Thing, create
|
from django_kepi.models import Thing
|
||||||
|
from django_kepi.create import create
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from . import *
|
from . import *
|
||||||
import httpretty
|
import httpretty
|
||||||
|
@ -11,23 +12,44 @@ logger = logging.getLogger(name='django_kepi')
|
||||||
|
|
||||||
REMOTE_URL = 'https://remote.example.net/fnord'
|
REMOTE_URL = 'https://remote.example.net/fnord'
|
||||||
|
|
||||||
STUFF = {'a': 1, 'b': 2}
|
STUFF = {
|
||||||
|
"@context": "https://www.w3.org/ns/activitystreams",
|
||||||
|
"id": REMOTE_URL,
|
||||||
|
"type": "Note",
|
||||||
|
"to": ["https://altair.example.com/someone"],
|
||||||
|
"attributedTo": "https://europa.example.org/someone-else",
|
||||||
|
"content": "I've got a lovely bunch of coconuts.",
|
||||||
|
}
|
||||||
|
|
||||||
class TestFind(TestCase):
|
class TestFind(TestCase):
|
||||||
|
|
||||||
@httpretty.activate
|
def _mock_remote_stuff(self):
|
||||||
def test_find_remote(self):
|
|
||||||
|
|
||||||
mock_remote_object(
|
mock_remote_object(
|
||||||
REMOTE_URL,
|
REMOTE_URL,
|
||||||
content = json.dumps(STUFF),
|
content = json.dumps(STUFF),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@httpretty.activate
|
||||||
|
def test_find_remote(self):
|
||||||
|
|
||||||
|
self._mock_remote_stuff()
|
||||||
|
|
||||||
found = find(REMOTE_URL)
|
found = find(REMOTE_URL)
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
found.url,
|
||||||
|
REMOTE_URL)
|
||||||
|
|
||||||
|
self.assertFalse(
|
||||||
|
found.is_local,
|
||||||
|
)
|
||||||
|
|
||||||
self.assertDictEqual(
|
self.assertDictEqual(
|
||||||
found,
|
found.activity_form,
|
||||||
STUFF,
|
{'attributedTo': 'https://europa.example.org/someone-else',
|
||||||
|
'id': 'https://remote.example.net/fnord',
|
||||||
|
'to': ['https://altair.example.com/someone'],
|
||||||
|
'type': '"Note"'}
|
||||||
)
|
)
|
||||||
|
|
||||||
@httpretty.activate
|
@httpretty.activate
|
||||||
|
@ -45,7 +67,7 @@ class TestFind(TestCase):
|
||||||
def test_find_local(self):
|
def test_find_local(self):
|
||||||
|
|
||||||
a = create(
|
a = create(
|
||||||
actor = 'https://example.net/users/fred',
|
f_actor = 'https://example.net/users/fred',
|
||||||
f_object = 'https://example.net/articles/i-like-jam',
|
f_object = 'https://example.net/articles/i-like-jam',
|
||||||
f_type = 'Like',
|
f_type = 'Like',
|
||||||
)
|
)
|
||||||
|
|
Ładowanie…
Reference in New Issue