From e194ccb3ad8f7fdf08bb1b16b4de0b7150af2c6e Mon Sep 17 00:00:00 2001 From: Ryan Barrett Date: Fri, 14 Jul 2023 12:45:47 -0700 Subject: [PATCH] Protocol.load/fetch returns True/False (or None) to mean "not this protocol" ie, nothing failed, but the given id eitehr doesn't below to the given protocol, or fetched successfully but its data belongs to a different protocol. vs raising exceptions for fetches that fail or error, eg network connection problems, Web got HTML with microformats2 but no h-entry, ActivityPub got non-AS2, etc. used in Protocol.for_id: when a given Protocol's fetch doesn't work, we move on and try the next protocol _if_ it returned False (ie wasn't fetchable with that protocol, but we fail fast if it raises an exception (ie fetch failed). --- activitypub.py | 56 ++++++++++++++++++++++++++++----------- common.py | 6 ----- convert.py | 4 ++- follow.py | 6 +++++ protocol.py | 43 ++++++++++++++++++++---------- tests/test_activitypub.py | 27 ++++++++++--------- tests/test_protocol.py | 16 ++++++----- tests/test_web.py | 20 ++++++++++++++ tests/testutil.py | 4 +-- web.py | 35 ++++++++++++++---------- 10 files changed, 146 insertions(+), 71 deletions(-) diff --git a/activitypub.py b/activitypub.py index 5fc0ce0..d523818 100644 --- a/activitypub.py +++ b/activitypub.py @@ -25,7 +25,6 @@ from common import ( error, host_url, is_blocklisted, - NoMicroformats, redirect_unwrap, redirect_wrap, TLD_BLOCKLIST, @@ -205,24 +204,37 @@ class ActivityPub(User, Protocol): Signs the request with the current user's key. If not provided, defaults to using @snarfed.org@snarfed.org's key. + See :meth:`Protocol.fetch` for more details. + Args: obj: :class:`Object` with the id to fetch. Fills data into the as2 property. kwargs: ignored + Returns: + True if the object was fetched and populated successfully, + False otherwise + Raises: :class:`requests.HTTPError`, :class:`werkzeug.exceptions.HTTPException` If we raise a werkzeug HTTPException, it will have an additional requests_response attribute with the last requests.Response we received. """ + url = obj.key.id() + if not util.is_web(url): + logger.info(f'{url} is not a URL') + return False + resp = None def _error(extra_msg=None): - msg = f"Couldn't fetch {obj.key.id()} as ActivityStreams 2" + msg = f"Couldn't fetch {url} as ActivityStreams 2" if extra_msg: msg += ': ' + extra_msg logger.warning(msg) + # protocol.for_id depends on us raising this when an AP network + # fetch fails. if we change that, update for_id too! err = BadGateway(msg) err.requests_response = resp raise err @@ -230,7 +242,15 @@ class ActivityPub(User, Protocol): def _get(url, headers): """Returns None if we fetched and populated, resp otherwise.""" nonlocal resp - resp = signed_get(url, headers=headers, gateway=True) + + try: + resp = signed_get(url, headers=headers, gateway=True) + except BadGateway as e: + # ugh, this is ugly, should be something structured + if '406 Client Error' in str(e): + return + raise + if not resp.content: _error('empty response') elif common.content_type(resp) in as2.CONTENT_TYPES: @@ -239,24 +259,30 @@ class ActivityPub(User, Protocol): except requests.JSONDecodeError: _error("Couldn't decode as JSON") - obj.as2 = _get(obj.key.id(), CONNEG_HEADERS_AS2_HTML) + obj.as2 = _get(url, CONNEG_HEADERS_AS2_HTML) + if obj.as2: - return obj + return True + elif not resp: + return False # look in HTML to find AS2 link if common.content_type(resp) != 'text/html': - _error('no AS2 available') + logger.info('no AS2 available') + return False + parsed = util.parse_html(resp) link = parsed.find('link', rel=('alternate', 'self'), type=( as2.CONTENT_TYPE, as2.CONTENT_TYPE_LD)) if not (link and link['href']): - _error('no AS2 available') + logger.info('no AS2 available') + return False obj.as2 = _get(link['href'], as2.CONNEG_HEADERS) if obj.as2: - return obj + return True - _error() + return False @classmethod def serve(cls, obj): @@ -322,8 +348,10 @@ class ActivityPub(User, Protocol): else: raise - if key_actor.deleted: + if key_actor and key_actor.deleted: abort(202, f'Ignoring, signer {keyId} is already deleted') + elif not key_actor: + error(f"Couldn't load {keyId} to verify signature", status=401) key = key_actor.as2.get("publicKey", {}).get('publicKeyPem') logger.info(f'Verifying signature for {request.path} with key {key}') @@ -641,10 +669,7 @@ def actor(protocol, domain): cls = PROTOCOLS[protocol] g.user = cls.get_or_create(domain) if not g.user.obj or not g.user.obj.as1: - try: - g.user.obj = cls.load(f'https://{domain}/', gateway=True) - except NoMicroformats as e: - pass + g.user.obj = cls.load(f'https://{domain}/', gateway=True) # TODO: unify with common.actor() actor = g.user.as2() or { @@ -702,7 +727,8 @@ def inbox(protocol=None, domain=None): # load user if protocol and domain: - g.user = PROTOCOLS[protocol].get_or_create(domain, direct=False) # receiving user + # receiving user + g.user = PROTOCOLS[protocol].get_or_create(domain, direct=False) if not g.user.direct and actor_id: # this is a deliberate interaction with an indirect receiving user; # create a local AP User for the sending user diff --git a/common.py b/common.py index 6ccbc1a..faaa665 100644 --- a/common.py +++ b/common.py @@ -13,7 +13,6 @@ from Crypto.Util import number from flask import abort, g, make_response, request from oauth_dropins.webutil import util, webmention from oauth_dropins.webutil.appengine_info import DEBUG -from werkzeug.exceptions import BadRequest logger = logging.getLogger(__name__) @@ -62,11 +61,6 @@ DOMAIN_BLOCKLIST = frozenset(( CACHE_TIME = timedelta(seconds=60) -class NoMicroformats(BadRequest): - """Raised by :meth:`Web.fetch` when a page has no microformats2.""" - pass - - def base64_to_long(x): """Converts x from URL safe base64 encoding to a long integer. diff --git a/convert.py b/convert.py index 8b11498..2309c94 100644 --- a/convert.py +++ b/convert.py @@ -68,7 +68,9 @@ def convert(dest, _): # load, and maybe fetch. if it's a post/update, redirect to inner object. obj = src_cls.load(url) - if not obj.as1: + if not obj: + error(f"Couldn't load {url}", status=404) + elif not obj.as1: error(f'Stored object for {id} has no data', status=404) type = as1.object_type(obj.as1) diff --git a/follow.py b/follow.py index 319473a..a25a938 100644 --- a/follow.py +++ b/follow.py @@ -116,6 +116,10 @@ class FollowCallback(indieauth.Callback): # TODO(#512): follower will always be Web here, but we should generalize # followee support in UI and here across protocols followee = ActivityPub.load(as2_url) + if not followee: + flash(f"Couldn't load {as2_url} as AS2") + return redirect(g.user.user_page_path('following')) + followee_id = followee.as1.get('id') inbox = followee.as2.get('inbox') if not followee_id or not inbox: @@ -198,6 +202,8 @@ class UnfollowCallback(indieauth.Callback): if not followee.obj or not followee.obj.as1: # fetch to get full followee so we can find its target to deliver to followee.obj = ActivityPub.load(followee_id) + if not followee.obj: + error("Couldn't load {followee_id} as AS2") followee.put() # TODO(#529): generalize diff --git a/protocol.py b/protocol.py index aab933c..b4afa6b 100644 --- a/protocol.py +++ b/protocol.py @@ -210,9 +210,13 @@ class Protocol: for protocol in candidates: logger.info(f'Trying {protocol.__name__}') try: - protocol.load(id, local=False, remote=True) - logger.info(f' {protocol.__name__} owns {id}') - return protocol + if protocol.load(id, local=False, remote=True): + logger.info(f' {protocol.__name__} owns {id}') + return protocol + except werkzeug.exceptions.BadGateway: + # we tried and failed fetching the id over the network. + # this depends on ActivityPub.fetch raising this! + return None except werkzeug.exceptions.HTTPException as e: # internal error we generated ourselves; try next protocol pass @@ -272,6 +276,10 @@ class Protocol: def fetch(cls, obj, **kwargs): """Fetches a protocol-specific object and populates it in an :class:`Object`. + Errors are raised as exceptions. If this method returns False, the fetch + didn't fail but didn't succeed either, eg the id isn't valid for this + protocol, or the fetch didn't return valid data for this protocol. + To be implemented by subclasses. Args: @@ -281,6 +289,10 @@ class Protocol: Raises: :class:`werkzeug.HTTPException` if the fetch fails + + Returns: + True if the object was fetched and populated successfully, + False otherwise """ raise NotImplementedError() @@ -485,7 +497,7 @@ class Protocol: if actor and actor.keys() == set(['id']): logger.info('Fetching actor so we have name, profile photo, etc') actor_obj = cls.load(actor['id']) - if actor_obj.as1: + if actor_obj and actor_obj.as1: obj.our_as1 = {**obj.as1, 'actor': actor_obj.as1} # fetch object if necessary so we can render it in feeds @@ -525,6 +537,8 @@ class Protocol: from_cls = cls from_obj = from_cls.load(from_id) + if not from_obj: + error(f"Couldn't load {from_id}") if not from_obj.as1: from_obj.our_as1 = from_as1 @@ -555,7 +569,7 @@ class Protocol: continue to_obj = to_cls.load(to_id) - if not to_obj.as1: + if to_obj and not to_obj.as1: to_obj.our_as1 = to_as1 to_obj.put() @@ -610,15 +624,12 @@ class Protocol: obj: :class:`Object`, the same one if the input obj is an activity, otherwise a new one """ - obj_actor = as1.get_owner(obj.as1) - if not obj_actor and g.user: - obj_actor = g.user.key.id() - - now = util.now().isoformat() - if obj.type not in ('note', 'article', 'comment'): return obj + obj_actor = as1.get_owner(obj.as1) + now = util.now().isoformat() + # this is a raw post; wrap it in a create or update activity if obj.changed: logger.info(f'Content has changed from last time at {obj.updated}! Redelivering to all inboxes') @@ -877,8 +888,9 @@ class Protocol: datastore after a successful remote fetch. kwargs: passed through to :meth:`fetch()` - Returns: :class:`Object`, or None if it isn't in the datastore and remote - is False + Returns: :class:`Object`, or None if: + * it isn't fetchable, eg a non-URL string for Web + * remote is False and it isn't in the cache or datastore Raises: :class:`requests.HTTPError`, anything else that :meth:`fetch` raises @@ -927,7 +939,10 @@ class Protocol: obj.new = True obj.changed = False - cls.fetch(obj, **kwargs) + fetched = cls.fetch(obj, **kwargs) + if not fetched: + return None + if obj.new is False: obj.changed = obj.activity_changed(orig_as1) diff --git a/tests/test_activitypub.py b/tests/test_activitypub.py index 0ead302..8c48286 100644 --- a/tests/test_activitypub.py +++ b/tests/test_activitypub.py @@ -408,8 +408,6 @@ class ActivityPubTest(TestCase): self.assertEqual(400, resp.status_code) def test_inbox_reply_object(self, mock_head, mock_get, mock_post): - mock_get.side_effect = [self.as2_resp(LIKE_ACTOR)] - self._test_inbox_reply(REPLY_OBJECT, mock_head, mock_get, mock_post) self.assert_object('http://mas.to/reply/id', @@ -419,12 +417,11 @@ class ActivityPubTest(TestCase): # auto-generated post activity self.assert_object( 'http://mas.to/reply/id#bridgy-fed-create', - users=[ndb.Key(ActivityPub, 'user.com'), self.user.key], + users=[self.user.key], source_protocol='activitypub', our_as1={ **as2.to_as1(REPLY), 'id': 'http://mas.to/reply/id#bridgy-fed-create', - 'actor': as2.to_as1(LIKE_ACTOR), 'published': '2022-01-02T03:04:05+00:00', }, status='complete', @@ -434,8 +431,6 @@ class ActivityPubTest(TestCase): ) def test_inbox_reply_object_wrapped(self, mock_head, mock_get, mock_post): - mock_get.side_effect = [self.as2_resp(LIKE_ACTOR)] - self._test_inbox_reply(REPLY_OBJECT_WRAPPED, mock_head, mock_get, mock_post) self.assert_object('http://mas.to/reply/id', @@ -445,12 +440,11 @@ class ActivityPubTest(TestCase): # auto-generated post activity self.assert_object( 'http://mas.to/reply/id#bridgy-fed-create', - users=[ndb.Key(ActivityPub, 'user.com'), self.user.key], + users=[self.user.key], source_protocol='activitypub', our_as1={ **as2.to_as1(REPLY), 'id': 'http://mas.to/reply/id#bridgy-fed-create', - 'actor': as2.to_as1(LIKE_ACTOR), 'published': '2022-01-02T03:04:05+00:00', }, status='complete', @@ -1742,14 +1736,18 @@ class ActivityPubUtilsTest(TestCase): @patch('requests.get') def test_fetch_only_html(self, mock_get): mock_get.return_value = HTML - with self.assertRaises(BadGateway): - ActivityPub.fetch(Object(id='http://orig')) + + obj = Object(id='http://orig') + self.assertFalse(ActivityPub.fetch(obj)) + self.assertIsNone(obj.as1) @patch('requests.get') def test_fetch_not_acceptable(self, mock_get): mock_get.return_value = NOT_ACCEPTABLE - with self.assertRaises(BadGateway): - ActivityPub.fetch(Object(id='http://orig')) + + obj = Object(id='http://orig') + self.assertFalse(ActivityPub.fetch(obj)) + self.assertIsNone(obj.as1) @patch('requests.get') def test_fetch_ssl_error(self, mock_get): @@ -1775,6 +1773,11 @@ class ActivityPubUtilsTest(TestCase): mock_get.assert_has_calls([self.as2_req('http://the/id')]) + def test_fetch_non_url(self): + obj = Object(id='x y z') + self.assertFalse(ActivityPub.fetch(obj)) + self.assertIsNone(obj.as1) + @skip def test_serve(self): obj = Object(id='http://orig', as2=LIKE) diff --git a/tests/test_protocol.py b/tests/test_protocol.py index eeb6659..0e534df 100644 --- a/tests/test_protocol.py +++ b/tests/test_protocol.py @@ -113,6 +113,13 @@ class ProtocolTest(TestCase): self.assertEqual(ActivityPub, Protocol.for_id('http://ap/actor')) self.assertIn(self.as2_req('http://ap/actor'), mock_get.mock_calls) + @patch('requests.get') + def test_for_id_activitypub_fetch_fails(self, mock_get): + mock_get.return_value = requests_response('', status=403) + self.assertIsNone(Protocol.for_id('http://ap/actor')) + self.assertIn(self.as2_req('http://ap/actor'), mock_get.mock_calls) + mock_get.assert_called_once() + @patch('requests.get') def test_for_id_web_fetch(self, mock_get): mock_get.return_value = requests_response(ACTOR_HTML) @@ -231,10 +238,7 @@ class ProtocolTest(TestCase): self.assert_entities_equal(obj, Protocol.load('foo', remote=False)) def test_load_local_false_missing(self): - with self.assertRaises(requests.HTTPError) as e: - Fake.load('foo', local=False) - self.assertEqual(410, e.response.status_code) - + self.assertIsNone(Fake.load('foo', local=False)) self.assertEqual(['foo'], Fake.fetched) def test_load_local_false_existing(self): @@ -412,13 +416,12 @@ class ProtocolReceiveTest(TestCase): 'objectType': 'activity', 'verb': 'update', 'id': update_id, - 'actor': 'fake:user', 'object': post_as1, }, delivered=['shared:target'], type='update', labels=['user', 'activity'], - users=[g.user.key], + users=[], ) self.assertEqual([(obj, 'shared:target')], Fake.sent) @@ -1061,7 +1064,6 @@ class ProtocolReceiveTest(TestCase): 'object': ['other:dan', 'fake:alice'], } - # with self.assertRaises(NoContent) as e: self.assertEqual('OK', OtherFake.receive(follow_as1)) self.assertEqual(2, len(Fake.sent)) diff --git a/tests/test_web.py b/tests/test_web.py index 7a75c7c..c9b9c81 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -1935,6 +1935,26 @@ class WebUtilTest(TestCase): Web.fetch(obj) self.assertEqual(500, e.status_code) + def test_fetch_not_html(self, mock_get, __): + mock_get.return_value = self.as2_resp({}) + + obj = Object(id='https://user.com/post') + self.assertFalse(Web.fetch(obj)) + self.assertIsNone(obj.as1) + + def test_fetch_non_url(self, mock_get, __): + obj = Object(id='x y z') + self.assertFalse(Web.fetch(obj)) + self.assertIsNone(obj.as1) + + def test_fetch_no_mf2(self, mock_get, __): + mock_get.return_value = requests_response( + '\nfoo\n') + + obj = Object(id='https://user.com/post') + self.assertFalse(Web.fetch(obj)) + self.assertIsNone(obj.as1) + def test_send(self, mock_get, mock_post): mock_get.return_value = WEBMENTION_REL_LINK mock_post.return_value = requests_response() diff --git a/tests/testutil.py b/tests/testutil.py index 6d9eaab..26bb105 100644 --- a/tests/testutil.py +++ b/tests/testutil.py @@ -76,9 +76,9 @@ class Fake(User, protocol.Protocol): if id in cls.fetchable: obj.our_as1 = cls.fetchable[id] - return obj + return True - raise requests.HTTPError(response=util.Struct(status_code='410')) + return False @classmethod def serve(cls, obj): diff --git a/web.py b/web.py index edc7254..a99a6b7 100644 --- a/web.py +++ b/web.py @@ -17,7 +17,7 @@ from oauth_dropins.webutil.appengine_info import APP_ID from oauth_dropins.webutil.flask_util import error, flash from oauth_dropins.webutil.util import json_dumps, json_loads from oauth_dropins.webutil import webmention -from requests import HTTPError, RequestException, URLRequired +from requests import HTTPError, RequestException from werkzeug.exceptions import BadGateway, BadRequest, HTTPException, NotFound import common @@ -192,12 +192,14 @@ class Web(User, Protocol): pass # check home page + self.obj = None + self.has_hcard = False try: self.obj = Web.load(self.web_url(), remote=True, gateway=True) - self.has_hcard = True - except (BadRequest, NotFound, common.NoMicroformats): - self.obj = None - self.has_hcard = False + if self.obj: + self.has_hcard = True + except (BadRequest, NotFound): + pass self.put() return self @@ -306,6 +308,10 @@ class Web(User, Protocol): kwargs: ignored """ url = obj.key.id() + if not util.is_web(url): + logger.info(f'{url} is not a URL') + return False + is_homepage = urlparse(url).path.strip('/') == '' require_backlink = (common.host_url().rstrip('/') @@ -315,25 +321,26 @@ class Web(User, Protocol): try: parsed = util.fetch_mf2(url, gateway=gateway, require_backlink=require_backlink) - except (ValueError, URLRequired) as e: + except ValueError as e: error(str(e)) if parsed is None: error(f'id {urlparse(url).fragment} not found in {url}') + elif not parsed.get('items'): + logger.info(f'No microformats2 found in {url}') + return False # find mf2 item if is_homepage: logger.info(f"{url} is user's web url") entry = mf2util.representative_hcard(parsed, parsed['url']) - logger.info(f'Representative h-card: {json_dumps(entry, indent=2)}') if not entry: - msg = f"Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {parsed['url']}" - logging.info(msg) - raise common.NoMicroformats(msg) + error(f"Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {parsed['url']}") + logger.info(f'Representative h-card: {json_dumps(entry, indent=2)}') else: entry = mf2util.find_first_entry(parsed, ['h-entry']) if not entry: - error(f'No microformats2 found in {url}') + error(f'No microformats2 h-entry found in {url}') # store final URL in mf2 object, and also default url property to it, # since that's the fallback for AS1/AS2 id @@ -372,7 +379,7 @@ class Web(User, Protocol): }]) obj.mf2 = entry - return obj + return True @classmethod def serve(cls, obj): @@ -530,8 +537,8 @@ def webmention_task(): 'object': source, }) - if not obj.mf2 and obj.type != 'delete': - error(f'No microformats2 found in {source}', status=304) + if not obj or (not obj.mf2 and obj.type != 'delete'): + error(f"Couldn't load {source} as microformats2 HTML", status=304) elif obj.mf2: # default actor to user props = obj.mf2['properties']