diff --git a/activitypub.py b/activitypub.py index d9365de..5b0cd5e 100644 --- a/activitypub.py +++ b/activitypub.py @@ -59,8 +59,8 @@ class ActivityPub(Protocol): # TODO: return bool or otherwise unify return value with others @classmethod - def fetch(cls, id, obj): - """Tries to fetch an AS2 object and populate it into an :class:`Object`. + def fetch(cls, id): + """Tries to fetch an AS2 object. Uses HTTP content negotiation via the Content-Type header. If the url is HTML and it has a rel-alternate link with an AS2 content type, fetches and @@ -79,7 +79,9 @@ class ActivityPub(Protocol): Args: id: str, object's URL id - obj: :class:`Object` to populate the fetched object into + + Returns: + obj: :class:`Object` with the fetched object Raises: :class:`requests.HTTPError`, :class:`werkzeug.exceptions.HTTPException` @@ -87,7 +89,9 @@ class ActivityPub(Protocol): If we raise a werkzeug HTTPException, it will have an additional requests_response attribute with the last requests.Response we received. """ - def _error(resp, extra_msg=None): + resp = None + + def _error(extra_msg=None): msg = f"Couldn't fetch {id} as ActivityStreams 2" if extra_msg: msg += ': ' + extra_msg @@ -98,34 +102,37 @@ class ActivityPub(Protocol): def _get(url, headers): """Returns None if we fetched and populated, resp otherwise.""" + nonlocal resp resp = signed_get(url, headers=headers, gateway=True) if not resp.content: - _error(resp, 'empty response') + _error('empty response') elif common.content_type(resp) == as2.CONTENT_TYPE: try: - obj.as2 = resp.json() - return + as2_json = resp.json() except requests.JSONDecodeError: - _error(resp, "Couldn't decode as JSON") + _error("Couldn't decode as JSON") + obj = Object.get_or_insert(id) + obj.as2 = as2_json + return obj - return resp - - resp = _get(id, CONNEG_HEADERS_AS2_HTML) - if resp is None: - return + obj = _get(id, CONNEG_HEADERS_AS2_HTML) + if obj: + return obj # look in HTML to find AS2 link if common.content_type(resp) != 'text/html': - _error(resp, 'no AS2 available') + _error('no AS2 available') parsed = util.parse_html(resp) link = parsed.find('link', rel=('alternate', 'self'), type=( as2.CONTENT_TYPE, as2.CONTENT_TYPE_LD)) if not (link and link['href']): - _error(resp, 'no AS2 available') + _error('no AS2 available') - resp = _get(link['href'], as2.CONNEG_HEADERS) - if resp is not None: - _error(resp) + obj = _get(link['href'], as2.CONNEG_HEADERS) + if obj: + return obj + + _error() @classmethod def verify_signature(cls, activity): diff --git a/protocol.py b/protocol.py index bf5cb2d..46060dd 100644 --- a/protocol.py +++ b/protocol.py @@ -78,14 +78,18 @@ class Protocol: raise NotImplementedError() @classmethod - def fetch(cls, id, obj): - """Fetches a protocol-specific object and populates it into an :class:`Object`. + def fetch(cls, id): + """Fetches a protocol-specific object and returns it in an :class:`Object`. - To be implemented by subclasses. + To be implemented by subclasses. The returned :class:`Object` is loaded + from the datastore, if it exists there, then updated in memory but not + yet written back to the datastore. Args: id: str, object's URL id - obj: :class:`Object` to populate the fetched object into + + Returns: + obj: :class:`Object` with the fetched object Raises: :class:`werkzeug.HTTPException` if the fetch fails @@ -187,8 +191,8 @@ class Protocol: # https://github.com/snarfed/bridgy-fed/issues/63 logger.info(f'Deactivating Followers with src or dest = {inner_obj_id}') followers = models.Follower.query(OR(models.Follower.src == inner_obj_id, - models.Follower.dest == inner_obj_id) - ).fetch() + models.Follower.dest == inner_obj_id) + ).fetch() for f in followers: f.status = 'inactive' obj.status = 'complete' @@ -243,7 +247,7 @@ class Protocol: followee_id = followee.get('id') follower = as1.get_object(obj.as1, 'actor') if not followee or not followee_id or not follower: - error(f'Follow activity requires object and actor. Got: {follow}') + error(f'Follow activity requires object and actor. Got: {obj.as1}') inbox = follower.get('inbox') follower_id = follower.get('id') @@ -385,16 +389,11 @@ class Protocol: id = util.fragmentless(id) logger.info(f'Loading Object {id}') obj = models.Object.get_by_id(id) - if obj: - if obj.as1: + if obj and obj.as1: logger.info(' got from datastore') return obj - else: - obj = models.Object(id=id) logger.info(f'Object not in datastore or has no data: {id}') - obj.clear() - cls.fetch(id, obj) + obj = cls.fetch(id) obj.source_protocol = cls.LABEL - obj.put() return obj diff --git a/tests/test_activitypub.py b/tests/test_activitypub.py index 9c0ab96..0d89029 100644 --- a/tests/test_activitypub.py +++ b/tests/test_activitypub.py @@ -1363,10 +1363,7 @@ class ActivityPubUtilsTest(testutil.TestCase): @patch('requests.get') def test_fetch_direct(self, mock_get): mock_get.return_value = AS2 - obj = Object() - - ActivityPub.fetch('http://orig', obj) - self.assertEqual(AS2_OBJ, obj.as2) + self.assertEqual(AS2_OBJ, ActivityPub.fetch('http://orig').as2) mock_get.assert_has_calls(( self.as2_req('http://orig'), )) @@ -1374,10 +1371,7 @@ class ActivityPubUtilsTest(testutil.TestCase): @patch('requests.get') def test_fetch_via_html(self, mock_get): mock_get.side_effect = [HTML_WITH_AS2, AS2] - obj = Object() - - ActivityPub.fetch('http://orig', obj) - self.assertEqual(AS2_OBJ, obj.as2) + self.assertEqual(AS2_OBJ, ActivityPub.fetch('http://orig').as2) mock_get.assert_has_calls(( self.as2_req('http://orig'), self.as2_req('http://as2', headers=common.as2.CONNEG_HEADERS), @@ -1387,34 +1381,34 @@ class ActivityPubUtilsTest(testutil.TestCase): def test_fetch_only_html(self, mock_get): mock_get.return_value = HTML with self.assertRaises(BadGateway): - ActivityPub.fetch('http://orig', Object()) + ActivityPub.fetch('http://orig') @patch('requests.get') def test_fetch_not_acceptable(self, mock_get): mock_get.return_value=NOT_ACCEPTABLE with self.assertRaises(BadGateway): - ActivityPub.fetch('http://orig', Object()) + ActivityPub.fetch('http://orig') @patch('requests.get') def test_fetch_ssl_error(self, mock_get): mock_get.side_effect = requests.exceptions.SSLError with self.assertRaises(BadGateway): - ActivityPub.fetch('http://orig', Object()) + ActivityPub.fetch('http://orig') @patch('requests.get') - def test_fetch_datastore_no_content(self, mock_get): + def test_fetch_no_content(self, mock_get): mock_get.return_value = self.as2_resp('') with self.assertRaises(BadGateway): - got = ActivityPub.fetch('http://the/id', Object()) + got = ActivityPub.fetch('http://the/id') mock_get.assert_has_calls([self.as2_req('http://the/id')]) @patch('requests.get') - def test_fetch_datastore_not_json(self, mock_get): + def test_fetch_not_json(self, mock_get): mock_get.return_value = self.as2_resp('XYZ not JSON') with self.assertRaises(BadGateway): - got = ActivityPub.fetch('http://the/id', Object()) + got = ActivityPub.fetch('http://the/id') mock_get.assert_has_calls([self.as2_req('http://the/id')]) diff --git a/tests/test_webmention.py b/tests/test_webmention.py index 41fb9d7..2ac5267 100644 --- a/tests/test_webmention.py +++ b/tests/test_webmention.py @@ -357,20 +357,28 @@ class WebmentionTest(testutil.TestCase): return super().assert_object(id, delivered_protocol='activitypub', **props) def test_fetch(self, mock_get, mock_post): - obj = Object() mock_get.return_value = self.reply - Webmention.fetch('https://user.com/post', obj) + obj = Webmention.fetch('https://user.com/post') self.assert_equals(self.reply_as1, obj.as1) + def test_fetch_redirect(self, mock_get, mock_post): + mock_get.return_value = requests_response( + REPOST_HTML, url='https://orig/url', redirected_url='http://new/url') + obj = Webmention.fetch('https://orig/url') + + self.assert_equals(self.repost_mf2, obj.mf2) + self.assert_equals(self.repost_as1, obj.as1) + self.assertIsNone(Object.get_by_id('http://orig/url')) + @skip def test_fetch_bad_source_url(self, mock_get, mock_post): with self.assertRaises(ValueError): - Webmention.fetch('bad', Object()) + Webmention.fetch('bad') def test_fetch_error(self, mock_get, mock_post): mock_get.return_value = requests_response(self.reply_html, status=405) with self.assertRaises(BadGateway) as e: - Webmention.fetch('https://foo', Object()) + Webmention.fetch('https://foo') def test_send(self, mock_get, mock_post): mock_get.return_value = WEBMENTION_REL_LINK diff --git a/tests/testutil.py b/tests/testutil.py index 4a34a27..1a69b88 100644 --- a/tests/testutil.py +++ b/tests/testutil.py @@ -34,7 +34,7 @@ class FakeProtocol(protocol.Protocol): raise NotImplementedError() @classmethod - def fetch(cls, id, obj): + def fetch(cls, id): raise NotImplementedError() diff --git a/webmention.py b/webmention.py index 968cc54..f22b5d9 100644 --- a/webmention.py +++ b/webmention.py @@ -36,7 +36,10 @@ class Webmention(View): @classmethod def send(cls, obj, url): - """Sends a webmention to a given target URL.""" + """Sends a webmention to a given target URL. + + See :meth:`Protocol.send` for details. + """ source_url = obj.proxy_url() logger.info(f'Sending webmention from {source_url} to {url}') @@ -50,13 +53,20 @@ class Webmention(View): return False @classmethod - def fetch(cls, id, obj): - """Fetches a URL over HTTP.""" - parsed = util.fetch_mf2(id, gateway=True) - obj.mf2 = mf2util.find_first_entry(parsed, ['h-entry']) - if not obj.mf2: - error(f'No microformats2 found in {id}') - logger.info(f'Parsed HTML entry: {json_dumps(obj.mf2, indent=2)}') + def fetch(cls, url): + """Fetches a URL over HTTP and extracts its microformats2. + + See :meth:`Protocol.fetch` for details. + """ + parsed = util.fetch_mf2(url, gateway=True) + entry = mf2util.find_first_entry(parsed, ['h-entry']) + if not entry: + error(f'No microformats2 found in {url}') + + logger.info(f'Extracted entry: {json_dumps(entry, indent=2)}') + obj = Object.get_or_insert(parsed['url']) + obj.mf2 = entry + return obj class WebmentionView(View):