change Protocol.fetch() to return an Object instead of populating one

pull/459/head
Ryan Barrett 2023-03-22 20:49:28 -07:00
rodzic 20f86c7de3
commit 28e8baff6c
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 6BE31FDF4776E9D4
6 zmienionych plików z 78 dodań i 60 usunięć

Wyświetl plik

@ -59,8 +59,8 @@ class ActivityPub(Protocol):
# TODO: return bool or otherwise unify return value with others # TODO: return bool or otherwise unify return value with others
@classmethod @classmethod
def fetch(cls, id, obj): def fetch(cls, id):
"""Tries to fetch an AS2 object and populate it into an :class:`Object`. """Tries to fetch an AS2 object.
Uses HTTP content negotiation via the Content-Type header. If the url is Uses HTTP content negotiation via the Content-Type header. If the url is
HTML and it has a rel-alternate link with an AS2 content type, fetches and HTML and it has a rel-alternate link with an AS2 content type, fetches and
@ -79,7 +79,9 @@ class ActivityPub(Protocol):
Args: Args:
id: str, object's URL id id: str, object's URL id
obj: :class:`Object` to populate the fetched object into
Returns:
obj: :class:`Object` with the fetched object
Raises: Raises:
:class:`requests.HTTPError`, :class:`werkzeug.exceptions.HTTPException` :class:`requests.HTTPError`, :class:`werkzeug.exceptions.HTTPException`
@ -87,7 +89,9 @@ class ActivityPub(Protocol):
If we raise a werkzeug HTTPException, it will have an additional If we raise a werkzeug HTTPException, it will have an additional
requests_response attribute with the last requests.Response we received. requests_response attribute with the last requests.Response we received.
""" """
def _error(resp, extra_msg=None): resp = None
def _error(extra_msg=None):
msg = f"Couldn't fetch {id} as ActivityStreams 2" msg = f"Couldn't fetch {id} as ActivityStreams 2"
if extra_msg: if extra_msg:
msg += ': ' + extra_msg msg += ': ' + extra_msg
@ -98,34 +102,37 @@ class ActivityPub(Protocol):
def _get(url, headers): def _get(url, headers):
"""Returns None if we fetched and populated, resp otherwise.""" """Returns None if we fetched and populated, resp otherwise."""
nonlocal resp
resp = signed_get(url, headers=headers, gateway=True) resp = signed_get(url, headers=headers, gateway=True)
if not resp.content: if not resp.content:
_error(resp, 'empty response') _error('empty response')
elif common.content_type(resp) == as2.CONTENT_TYPE: elif common.content_type(resp) == as2.CONTENT_TYPE:
try: try:
obj.as2 = resp.json() as2_json = resp.json()
return
except requests.JSONDecodeError: except requests.JSONDecodeError:
_error(resp, "Couldn't decode as JSON") _error("Couldn't decode as JSON")
obj = Object.get_or_insert(id)
obj.as2 = as2_json
return obj
return resp obj = _get(id, CONNEG_HEADERS_AS2_HTML)
if obj:
resp = _get(id, CONNEG_HEADERS_AS2_HTML) return obj
if resp is None:
return
# look in HTML to find AS2 link # look in HTML to find AS2 link
if common.content_type(resp) != 'text/html': if common.content_type(resp) != 'text/html':
_error(resp, 'no AS2 available') _error('no AS2 available')
parsed = util.parse_html(resp) parsed = util.parse_html(resp)
link = parsed.find('link', rel=('alternate', 'self'), type=( link = parsed.find('link', rel=('alternate', 'self'), type=(
as2.CONTENT_TYPE, as2.CONTENT_TYPE_LD)) as2.CONTENT_TYPE, as2.CONTENT_TYPE_LD))
if not (link and link['href']): if not (link and link['href']):
_error(resp, 'no AS2 available') _error('no AS2 available')
resp = _get(link['href'], as2.CONNEG_HEADERS) obj = _get(link['href'], as2.CONNEG_HEADERS)
if resp is not None: if obj:
_error(resp) return obj
_error()
@classmethod @classmethod
def verify_signature(cls, activity): def verify_signature(cls, activity):

Wyświetl plik

@ -78,14 +78,18 @@ class Protocol:
raise NotImplementedError() raise NotImplementedError()
@classmethod @classmethod
def fetch(cls, id, obj): def fetch(cls, id):
"""Fetches a protocol-specific object and populates it into an :class:`Object`. """Fetches a protocol-specific object and returns it in an :class:`Object`.
To be implemented by subclasses. To be implemented by subclasses. The returned :class:`Object` is loaded
from the datastore, if it exists there, then updated in memory but not
yet written back to the datastore.
Args: Args:
id: str, object's URL id id: str, object's URL id
obj: :class:`Object` to populate the fetched object into
Returns:
obj: :class:`Object` with the fetched object
Raises: Raises:
:class:`werkzeug.HTTPException` if the fetch fails :class:`werkzeug.HTTPException` if the fetch fails
@ -187,8 +191,8 @@ class Protocol:
# https://github.com/snarfed/bridgy-fed/issues/63 # https://github.com/snarfed/bridgy-fed/issues/63
logger.info(f'Deactivating Followers with src or dest = {inner_obj_id}') logger.info(f'Deactivating Followers with src or dest = {inner_obj_id}')
followers = models.Follower.query(OR(models.Follower.src == inner_obj_id, followers = models.Follower.query(OR(models.Follower.src == inner_obj_id,
models.Follower.dest == inner_obj_id) models.Follower.dest == inner_obj_id)
).fetch() ).fetch()
for f in followers: for f in followers:
f.status = 'inactive' f.status = 'inactive'
obj.status = 'complete' obj.status = 'complete'
@ -243,7 +247,7 @@ class Protocol:
followee_id = followee.get('id') followee_id = followee.get('id')
follower = as1.get_object(obj.as1, 'actor') follower = as1.get_object(obj.as1, 'actor')
if not followee or not followee_id or not follower: if not followee or not followee_id or not follower:
error(f'Follow activity requires object and actor. Got: {follow}') error(f'Follow activity requires object and actor. Got: {obj.as1}')
inbox = follower.get('inbox') inbox = follower.get('inbox')
follower_id = follower.get('id') follower_id = follower.get('id')
@ -385,16 +389,11 @@ class Protocol:
id = util.fragmentless(id) id = util.fragmentless(id)
logger.info(f'Loading Object {id}') logger.info(f'Loading Object {id}')
obj = models.Object.get_by_id(id) obj = models.Object.get_by_id(id)
if obj: if obj and obj.as1:
if obj.as1:
logger.info(' got from datastore') logger.info(' got from datastore')
return obj return obj
else:
obj = models.Object(id=id)
logger.info(f'Object not in datastore or has no data: {id}') logger.info(f'Object not in datastore or has no data: {id}')
obj.clear() obj = cls.fetch(id)
cls.fetch(id, obj)
obj.source_protocol = cls.LABEL obj.source_protocol = cls.LABEL
obj.put()
return obj return obj

Wyświetl plik

@ -1363,10 +1363,7 @@ class ActivityPubUtilsTest(testutil.TestCase):
@patch('requests.get') @patch('requests.get')
def test_fetch_direct(self, mock_get): def test_fetch_direct(self, mock_get):
mock_get.return_value = AS2 mock_get.return_value = AS2
obj = Object() self.assertEqual(AS2_OBJ, ActivityPub.fetch('http://orig').as2)
ActivityPub.fetch('http://orig', obj)
self.assertEqual(AS2_OBJ, obj.as2)
mock_get.assert_has_calls(( mock_get.assert_has_calls((
self.as2_req('http://orig'), self.as2_req('http://orig'),
)) ))
@ -1374,10 +1371,7 @@ class ActivityPubUtilsTest(testutil.TestCase):
@patch('requests.get') @patch('requests.get')
def test_fetch_via_html(self, mock_get): def test_fetch_via_html(self, mock_get):
mock_get.side_effect = [HTML_WITH_AS2, AS2] mock_get.side_effect = [HTML_WITH_AS2, AS2]
obj = Object() self.assertEqual(AS2_OBJ, ActivityPub.fetch('http://orig').as2)
ActivityPub.fetch('http://orig', obj)
self.assertEqual(AS2_OBJ, obj.as2)
mock_get.assert_has_calls(( mock_get.assert_has_calls((
self.as2_req('http://orig'), self.as2_req('http://orig'),
self.as2_req('http://as2', headers=common.as2.CONNEG_HEADERS), self.as2_req('http://as2', headers=common.as2.CONNEG_HEADERS),
@ -1387,34 +1381,34 @@ class ActivityPubUtilsTest(testutil.TestCase):
def test_fetch_only_html(self, mock_get): def test_fetch_only_html(self, mock_get):
mock_get.return_value = HTML mock_get.return_value = HTML
with self.assertRaises(BadGateway): with self.assertRaises(BadGateway):
ActivityPub.fetch('http://orig', Object()) ActivityPub.fetch('http://orig')
@patch('requests.get') @patch('requests.get')
def test_fetch_not_acceptable(self, mock_get): def test_fetch_not_acceptable(self, mock_get):
mock_get.return_value=NOT_ACCEPTABLE mock_get.return_value=NOT_ACCEPTABLE
with self.assertRaises(BadGateway): with self.assertRaises(BadGateway):
ActivityPub.fetch('http://orig', Object()) ActivityPub.fetch('http://orig')
@patch('requests.get') @patch('requests.get')
def test_fetch_ssl_error(self, mock_get): def test_fetch_ssl_error(self, mock_get):
mock_get.side_effect = requests.exceptions.SSLError mock_get.side_effect = requests.exceptions.SSLError
with self.assertRaises(BadGateway): with self.assertRaises(BadGateway):
ActivityPub.fetch('http://orig', Object()) ActivityPub.fetch('http://orig')
@patch('requests.get') @patch('requests.get')
def test_fetch_datastore_no_content(self, mock_get): def test_fetch_no_content(self, mock_get):
mock_get.return_value = self.as2_resp('') mock_get.return_value = self.as2_resp('')
with self.assertRaises(BadGateway): with self.assertRaises(BadGateway):
got = ActivityPub.fetch('http://the/id', Object()) got = ActivityPub.fetch('http://the/id')
mock_get.assert_has_calls([self.as2_req('http://the/id')]) mock_get.assert_has_calls([self.as2_req('http://the/id')])
@patch('requests.get') @patch('requests.get')
def test_fetch_datastore_not_json(self, mock_get): def test_fetch_not_json(self, mock_get):
mock_get.return_value = self.as2_resp('XYZ not JSON') mock_get.return_value = self.as2_resp('XYZ not JSON')
with self.assertRaises(BadGateway): with self.assertRaises(BadGateway):
got = ActivityPub.fetch('http://the/id', Object()) got = ActivityPub.fetch('http://the/id')
mock_get.assert_has_calls([self.as2_req('http://the/id')]) mock_get.assert_has_calls([self.as2_req('http://the/id')])

Wyświetl plik

@ -357,20 +357,28 @@ class WebmentionTest(testutil.TestCase):
return super().assert_object(id, delivered_protocol='activitypub', **props) return super().assert_object(id, delivered_protocol='activitypub', **props)
def test_fetch(self, mock_get, mock_post): def test_fetch(self, mock_get, mock_post):
obj = Object()
mock_get.return_value = self.reply mock_get.return_value = self.reply
Webmention.fetch('https://user.com/post', obj) obj = Webmention.fetch('https://user.com/post')
self.assert_equals(self.reply_as1, obj.as1) self.assert_equals(self.reply_as1, obj.as1)
def test_fetch_redirect(self, mock_get, mock_post):
mock_get.return_value = requests_response(
REPOST_HTML, url='https://orig/url', redirected_url='http://new/url')
obj = Webmention.fetch('https://orig/url')
self.assert_equals(self.repost_mf2, obj.mf2)
self.assert_equals(self.repost_as1, obj.as1)
self.assertIsNone(Object.get_by_id('http://orig/url'))
@skip @skip
def test_fetch_bad_source_url(self, mock_get, mock_post): def test_fetch_bad_source_url(self, mock_get, mock_post):
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
Webmention.fetch('bad', Object()) Webmention.fetch('bad')
def test_fetch_error(self, mock_get, mock_post): def test_fetch_error(self, mock_get, mock_post):
mock_get.return_value = requests_response(self.reply_html, status=405) mock_get.return_value = requests_response(self.reply_html, status=405)
with self.assertRaises(BadGateway) as e: with self.assertRaises(BadGateway) as e:
Webmention.fetch('https://foo', Object()) Webmention.fetch('https://foo')
def test_send(self, mock_get, mock_post): def test_send(self, mock_get, mock_post):
mock_get.return_value = WEBMENTION_REL_LINK mock_get.return_value = WEBMENTION_REL_LINK

Wyświetl plik

@ -34,7 +34,7 @@ class FakeProtocol(protocol.Protocol):
raise NotImplementedError() raise NotImplementedError()
@classmethod @classmethod
def fetch(cls, id, obj): def fetch(cls, id):
raise NotImplementedError() raise NotImplementedError()

Wyświetl plik

@ -36,7 +36,10 @@ class Webmention(View):
@classmethod @classmethod
def send(cls, obj, url): def send(cls, obj, url):
"""Sends a webmention to a given target URL.""" """Sends a webmention to a given target URL.
See :meth:`Protocol.send` for details.
"""
source_url = obj.proxy_url() source_url = obj.proxy_url()
logger.info(f'Sending webmention from {source_url} to {url}') logger.info(f'Sending webmention from {source_url} to {url}')
@ -50,13 +53,20 @@ class Webmention(View):
return False return False
@classmethod @classmethod
def fetch(cls, id, obj): def fetch(cls, url):
"""Fetches a URL over HTTP.""" """Fetches a URL over HTTP and extracts its microformats2.
parsed = util.fetch_mf2(id, gateway=True)
obj.mf2 = mf2util.find_first_entry(parsed, ['h-entry']) See :meth:`Protocol.fetch` for details.
if not obj.mf2: """
error(f'No microformats2 found in {id}') parsed = util.fetch_mf2(url, gateway=True)
logger.info(f'Parsed HTML entry: {json_dumps(obj.mf2, indent=2)}') entry = mf2util.find_first_entry(parsed, ['h-entry'])
if not entry:
error(f'No microformats2 found in {url}')
logger.info(f'Extracted entry: {json_dumps(entry, indent=2)}')
obj = Object.get_or_insert(parsed['url'])
obj.mf2 = entry
return obj
class WebmentionView(View): class WebmentionView(View):