diff --git a/activitypub.py b/activitypub.py index edc8944..546bfec 100644 --- a/activitypub.py +++ b/activitypub.py @@ -63,7 +63,7 @@ class ActivityPub(Protocol): # TODO: return bool or otherwise unify return value with others @classmethod - def fetch(cls, id): + def fetch(cls, obj): """Tries to fetch an AS2 object. Uses HTTP content negotiation via the Content-Type header. If the url is @@ -82,10 +82,8 @@ class ActivityPub(Protocol): using @snarfed.org@snarfed.org's key. Args: - id: str, object's URL id - - Returns: - obj: :class:`Object` with the fetched object + obj: :class:`Object` with the id to fetch. Fills data into the as2 + property. Raises: :class:`requests.HTTPError`, :class:`werkzeug.exceptions.HTTPException` @@ -96,7 +94,7 @@ class ActivityPub(Protocol): resp = None def _error(extra_msg=None): - msg = f"Couldn't fetch {id} as ActivityStreams 2" + msg = f"Couldn't fetch {obj.key.id()} as ActivityStreams 2" if extra_msg: msg += ': ' + extra_msg logger.warning(msg) @@ -112,23 +110,12 @@ class ActivityPub(Protocol): _error('empty response') elif common.content_type(resp) == as2.CONTENT_TYPE: try: - as2_json = resp.json() + return resp.json() except requests.JSONDecodeError: _error("Couldn't decode as JSON") - obj = Object.get_by_id(id) - if obj: - orig_as1 = obj.as1 - obj.clear() - obj.as2 = as2_json - obj.changed = as1.activity_changed(orig_as1, obj.as1) - obj.new = False - else: - obj = Object(id=id, as2=as2_json) - obj.new = True - return obj - obj = _get(id, CONNEG_HEADERS_AS2_HTML) - if obj: + obj.as2 = _get(obj.key.id(), CONNEG_HEADERS_AS2_HTML) + if obj.as2: return obj # look in HTML to find AS2 link @@ -140,8 +127,8 @@ class ActivityPub(Protocol): if not (link and link['href']): _error('no AS2 available') - obj = _get(link['href'], as2.CONNEG_HEADERS) - if obj: + obj.as2 = _get(link['href'], as2.CONNEG_HEADERS) + if obj.as2: return obj _error() @@ -160,7 +147,7 @@ class ActivityPub(Protocol): if not sig: error('No HTTP Signature', status=401) - logging.info('Verifying HTTP Signature') + logger.info('Verifying HTTP Signature') logger.info(f'Headers: {json_dumps(dict(request.headers), indent=2)}') # parse_signature_header lower-cases all keys @@ -182,7 +169,7 @@ class ActivityPub(Protocol): obj_id = as1.get_object(activity).get('id') if (activity.get('type') == 'Delete' and obj_id and keyId == fragmentless(obj_id)): - logging.info('Object/actor being deleted is also keyId') + logger.info('Object/actor being deleted is also keyId') key_actor = Object(id=keyId, source_protocol='activitypub', deleted=True) key_actor.put() else: diff --git a/follow.py b/follow.py index 282af0c..0f76a12 100644 --- a/follow.py +++ b/follow.py @@ -130,7 +130,7 @@ class FollowCallback(indieauth.Callback): return me = auth_entity.key.id() - logging.info(f'Storing indieauthed-me: {me} in session cookie') + logger.info(f'Storing indieauthed-me: {me} in session cookie') session['indieauthed-me'] = me domain = util.domain_from_link(me) diff --git a/models.py b/models.py index 18714b0..5adca13 100644 --- a/models.py +++ b/models.py @@ -308,7 +308,7 @@ class Object(StringIdModel): # assert (self.as2 is not None) ^ (self.bsky is not None) ^ (self.mf2 is not None), \ # f'{self.as2} {self.bsky} {self.mf2}' if bool(self.as2) + bool(self.bsky) + bool(self.mf2) > 1: - logging.warning(f'{self.key} has multiple! {bool(self.as2)} {bool(self.bsky)} {bool(self.mf2)}') + logger.warning(f'{self.key} has multiple! {bool(self.as2)} {bool(self.bsky)} {bool(self.mf2)}') if self.our_as1 is not None: return common.redirect_unwrap(self.our_as1) @@ -359,7 +359,7 @@ class Object(StringIdModel): for prop in 'as2', 'bsky', 'mf2': val = getattr(self, prop, None) if val: - logging.warning(f'Wiping out {prop}: {json_dumps(val, indent=2)}') + logger.warning(f'Wiping out {prop}: {json_dumps(val, indent=2)}') setattr(self, prop, None) def proxy_url(self): diff --git a/protocol.py b/protocol.py index 0c41927..caaea5d 100644 --- a/protocol.py +++ b/protocol.py @@ -82,7 +82,7 @@ class Protocol: raise NotImplementedError() @classmethod - def fetch(cls, id): + def fetch(cls, obj): """Fetches a protocol-specific object and returns it in an :class:`Object`. To be implemented by subclasses. The returned :class:`Object` is loaded @@ -90,10 +90,8 @@ class Protocol: yet written back to the datastore. Args: - id: str, object's URL id - - Returns: - obj: :class:`Object` with the fetched object + obj: :class:`Object` with the id to fetch. Data is filled into one of + the protocol-specific properties, eg as2, mf2, bsky. Raises: :class:`werkzeug.HTTPException` if the fetch fails @@ -131,7 +129,7 @@ class Protocol: obj.populate(source_protocol=cls.LABEL, **props) obj.put() - logging.info(f'Got AS1: {json_dumps(obj.as1, indent=2)}') + logger.info(f'Got AS1: {json_dumps(obj.as1, indent=2)}') if obj.type not in SUPPORTED_TYPES: error(f'Sorry, {obj.type} activities are not supported yet.', status=501) @@ -161,7 +159,7 @@ class Protocol: follower = models.Follower.get_by_id( models.Follower._id(dest=followee_domain, src=actor_id)) if follower: - logging.info(f'Marking {follower} inactive') + logger.info(f'Marking {follower} inactive') follower.status = 'inactive' follower.put() else: @@ -362,7 +360,7 @@ class Protocol: error(msg, status=int(errors[0][0] or 502)) @classmethod - def load(cls, id): + def load(cls, id, refresh=False): """Loads and returns an Object from memory cache, datastore, or HTTP fetch. Assumes id is a URL. Any fragment at the end is stripped before loading. @@ -379,29 +377,48 @@ class Protocol: Args: id: str + refresh: boolean, whether to fetch the object remotely even if we have + it stored Returns: :class:`Object` Raises: :class:`requests.HTTPError`, anything else that :meth:`fetch` raises """ - id = util.fragmentless(id) - - with objects_cache_lock: - cached = objects_cache.get(id) - if cached: - return cached + if not refresh: + with objects_cache_lock: + cached = objects_cache.get(id) + if cached: + return cached logger.info(f'Loading Object {id}') + orig_as1 = None obj = models.Object.get_by_id(id) if obj and (obj.as1 or obj.deleted): logger.info(' got from datastore') - with objects_cache_lock: - objects_cache[id] = obj - return obj + obj.new = False + orig_as1 = obj.as1 + if not refresh: + with objects_cache_lock: + objects_cache[id] = obj + return obj + + if refresh: + logger.info(' forced refresh requested') + + if obj: + obj.clear() + else: + logger.info(f' not in datastore') + obj = models.Object(id=id) + obj.new = True + obj.changed = False + + cls.fetch(obj) + if orig_as1: + obj.new = False + obj.changed = as1.activity_changed(orig_as1, obj.as1) - logger.info(f'Object not in datastore or has no data: {id}') - obj = cls.fetch(id) obj.source_protocol = cls.LABEL obj.put() diff --git a/tests/test_activitypub.py b/tests/test_activitypub.py index c14701c..155c549 100644 --- a/tests/test_activitypub.py +++ b/tests/test_activitypub.py @@ -1297,12 +1297,12 @@ class ActivityPubUtilsTest(testutil.TestCase): mock_get.assert_not_called() @patch('requests.get') - def test_load_strips_fragment(self, mock_get): - stored = Object(id='http://the/id', as2=AS2_OBJ) + def test_load_preserves_fragment(self, mock_get): + stored = Object(id='http://the/id#frag', as2=AS2_OBJ) stored.put() protocol.objects_cache.clear() - got = ActivityPub.load('http://the/id#ignore') + got = ActivityPub.load('http://the/id#frag') self.assert_entities_equal(stored, got) mock_get.assert_not_called() @@ -1358,11 +1358,10 @@ class ActivityPubUtilsTest(testutil.TestCase): @patch('requests.get') def test_fetch_direct(self, mock_get): mock_get.return_value = AS2 - got = ActivityPub.fetch('http://orig') - self.assertEqual(AS2_OBJ, got.as2) + obj = Object(id='http://orig') + ActivityPub.fetch(obj) + self.assertEqual(AS2_OBJ, obj.as2) - self.assertTrue(got.new) - self.assertFalse(got.changed) mock_get.assert_has_calls(( self.as2_req('http://orig'), )) @@ -1370,11 +1369,10 @@ class ActivityPubUtilsTest(testutil.TestCase): @patch('requests.get') def test_fetch_via_html(self, mock_get): mock_get.side_effect = [HTML_WITH_AS2, AS2] - got = ActivityPub.fetch('http://orig') - self.assertEqual(AS2_OBJ, got.as2) + obj = Object(id='http://orig') + ActivityPub.fetch(obj) + self.assertEqual(AS2_OBJ, obj.as2) - self.assertTrue(got.new) - self.assertFalse(got.changed) mock_get.assert_has_calls(( self.as2_req('http://orig'), self.as2_req('http://as2', headers=common.as2.CONNEG_HEADERS), @@ -1384,26 +1382,26 @@ class ActivityPubUtilsTest(testutil.TestCase): def test_fetch_only_html(self, mock_get): mock_get.return_value = HTML with self.assertRaises(BadGateway): - ActivityPub.fetch('http://orig') + ActivityPub.fetch(Object(id='http://orig')) @patch('requests.get') def test_fetch_not_acceptable(self, mock_get): mock_get.return_value=NOT_ACCEPTABLE with self.assertRaises(BadGateway): - ActivityPub.fetch('http://orig') + ActivityPub.fetch(Object(id='http://orig')) @patch('requests.get') def test_fetch_ssl_error(self, mock_get): mock_get.side_effect = requests.exceptions.SSLError with self.assertRaises(BadGateway): - ActivityPub.fetch('http://orig') + ActivityPub.fetch(Object(id='http://orig')) @patch('requests.get') def test_fetch_no_content(self, mock_get): mock_get.return_value = self.as2_resp('') with self.assertRaises(BadGateway): - got = ActivityPub.fetch('http://the/id') + ActivityPub.fetch(Object(id='http://the/id')) mock_get.assert_has_calls([self.as2_req('http://the/id')]) @@ -1412,39 +1410,10 @@ class ActivityPubUtilsTest(testutil.TestCase): mock_get.return_value = self.as2_resp('XYZ not JSON') with self.assertRaises(BadGateway): - got = ActivityPub.fetch('http://the/id') + ActivityPub.fetch(Object(id='http://the/id')) mock_get.assert_has_calls([self.as2_req('http://the/id')]) - @patch('requests.get') - def test_fetch_content_changed(self, mock_get): - Object(id='http://orig', as2={ - **NOTE_OBJECT, - 'content': 'something else', - }).put() - mock_get.return_value = self.as2_resp(NOTE_OBJECT) - - obj = ActivityPub.fetch('http://orig') - self.assert_equals(NOTE_OBJECT, obj.as2) - self.assertFalse(obj.new) - self.assertTrue(obj.changed) - mock_get.assert_has_calls(( - self.as2_req('http://orig'), - )) - - @patch('requests.get') - def test_fetch_content_unchanged(self, mock_get): - Object(id='http://orig', as2=NOTE_OBJECT).put() - mock_get.return_value = self.as2_resp(NOTE_OBJECT) - - obj = ActivityPub.fetch('http://orig') - self.assert_equals(NOTE_OBJECT, obj.as2) - self.assertFalse(obj.new) - self.assertFalse(obj.changed) - mock_get.assert_has_calls(( - self.as2_req('http://orig'), - )) - def test_postprocess_as2_idempotent(self): g.user = self.make_user('foo.com') diff --git a/tests/test_protocol.py b/tests/test_protocol.py index 594dc5f..c1aff36 100644 --- a/tests/test_protocol.py +++ b/tests/test_protocol.py @@ -60,16 +60,25 @@ class ProtocolTest(testutil.TestCase): ) def test_load(self): - obj = Object(id='foo', our_as1={'x': 'y'}) - FakeProtocol.objects = {'foo': obj} - self.assert_entities_equal(obj, FakeProtocol.load('foo')) + FakeProtocol.objects['foo'] = {'x': 'y'} + + loaded = FakeProtocol.load('foo') + self.assert_equals({'x': 'y'}, loaded.our_as1) + self.assertFalse(loaded.changed) + self.assertTrue(loaded.new) + self.assertIsNotNone(Object.get_by_id('foo')) self.assertEqual(['foo'], FakeProtocol.fetched) def test_load_already_stored(self): stored = Object(id='foo', our_as1={'x': 'y'}) stored.put() - self.assert_entities_equal(stored, FakeProtocol.load('foo')) + + loaded = FakeProtocol.load('foo') + self.assert_equals({'x': 'y'}, loaded.our_as1) + self.assertFalse(loaded.changed) + self.assertFalse(loaded.new) + self.assertEqual([], FakeProtocol.fetched) @patch('requests.get') @@ -77,5 +86,32 @@ class ProtocolTest(testutil.TestCase): stored = Object(id='foo', deleted=True) stored.put() - self.assert_entities_equal(stored, FakeProtocol.load('foo')) - mock_get.assert_not_called() + loaded = FakeProtocol.load('foo') + self.assert_entities_equal(stored, loaded) + self.assertFalse(loaded.changed) + self.assertFalse(loaded.new) + + self.assertEqual([], FakeProtocol.fetched) + + @patch('requests.get') + def test_load_refresh_unchanged(self, mock_get): + obj = Object(id='foo', our_as1={'x': 'stored'}) + obj.put() + FakeProtocol.objects['foo'] = {'x': 'stored'} + + loaded = FakeProtocol.load('foo', refresh=True) + self.assert_entities_equal(obj, loaded) + self.assertFalse(obj.changed) + self.assertFalse(obj.new) + self.assertEqual(['foo'], FakeProtocol.fetched) + + @patch('requests.get') + def test_load_refresh_changed(self, mock_get): + Object(id='foo', our_as1={'content': 'stored'}).put() + FakeProtocol.objects['foo'] = {'content': 'new'} + + loaded = FakeProtocol.load('foo', refresh=True) + self.assert_equals({'content': 'new'}, loaded.our_as1) + self.assertTrue(loaded.changed) + self.assertFalse(loaded.new) + self.assertEqual(['foo'], FakeProtocol.fetched) diff --git a/tests/test_webmention.py b/tests/test_webmention.py index 907cfbf..b5ae5cb 100644 --- a/tests/test_webmention.py +++ b/tests/test_webmention.py @@ -277,7 +277,7 @@ class WebmentionTest(testutil.TestCase): """ self.follow_fragment = requests_response( - self.follow_fragment_html, url='https://user.com/follow#2', + self.follow_fragment_html, url='https://user.com/follow', content_type=CONTENT_TYPE_HTML) self.follow_fragment_mf2 = \ util.parse_mf2(self.follow_fragment_html, id='2')['items'][0] @@ -361,36 +361,34 @@ class WebmentionTest(testutil.TestCase): def test_fetch(self, mock_get, mock_post): mock_get.return_value = self.reply + obj = Object(id='https://user.com/post') with app.test_request_context('/'): - obj = Webmention.fetch('https://user.com/post') + Webmention.fetch(obj) self.assert_equals(self.reply_as1, obj.as1) - self.assertFalse(obj.changed) - self.assertTrue(obj.new) def test_fetch_redirect(self, mock_get, mock_post): mock_get.return_value = requests_response( REPOST_HTML, url='https://orig/url', redirected_url='http://new/url') + obj = Object(id='https://orig/url') with app.test_request_context('/'): - obj = Webmention.fetch('https://orig/url') + Webmention.fetch(obj) self.assert_equals({**self.repost_mf2, 'url': 'http://new/url'}, obj.mf2) self.assert_equals(self.repost_as1, obj.as1) - self.assertFalse(obj.changed) - self.assertTrue(obj.new) - self.assertIsNone(Object.get_by_id('http://orig/url')) + self.assertIsNone(Object.get_by_id('http://new/url')) # TODO @skip def test_fetch_bad_source_url(self, mock_get, mock_post): with app.test_request_context('/'), self.assertRaises(ValueError): - Webmention.fetch('bad') + Webmention.fetch(Object(id='bad')) def test_fetch_error(self, mock_get, mock_post): mock_get.return_value = requests_response(self.reply_html, status=405) with app.test_request_context('/'), self.assertRaises(BadGateway) as e: - Webmention.fetch('https://foo') + Webmention.fetch(Object(id='https://foo')) def test_fetch_run_authorship(self, mock_get, mock_post): mock_get.side_effect = [ @@ -405,34 +403,11 @@ class WebmentionTest(testutil.TestCase): ] return_value = self.reply + obj = Object(id='https://user.com/reply') with app.test_request_context('/'): - obj = Webmention.fetch('https://user.com/reply') + Webmention.fetch(obj) self.assert_equals(self.reply_as1, obj.as1) - def test_fetch_content_changed(self, mock_get, mock_post): - orig_mf2 = copy.deepcopy(self.note_mf2) - orig_mf2['properties']['content'] = ['something else'] - Object(id='https://user.com/post', mf2=orig_mf2).put() - - mock_get.return_value = self.note - with app.test_request_context('/'): - obj = Webmention.fetch('https://user.com/post') - - self.assert_equals({**self.note_mf2, 'url': 'https://user.com/post'}, obj.mf2) - self.assert_equals(self.note_as1, obj.as1) - self.assertTrue(obj.changed) - self.assertFalse(obj.new) - - def test_fetch_content_unchanged(self, mock_get, mock_post): - Object(id='https://user.com/post', mf2=self.note_mf2).put() - - mock_get.return_value = self.note - with app.test_request_context('/'): - obj = Webmention.fetch('https://user.com/post') - - self.assertFalse(obj.changed) - self.assertFalse(obj.new) - def test_send(self, mock_get, mock_post): mock_get.return_value = WEBMENTION_REL_LINK mock_post.return_value = requests_response() @@ -1087,7 +1062,7 @@ class WebmentionTest(testutil.TestCase): self.assert_equals(200, got.status_code) mock_get.assert_has_calls(( - self.req('https://user.com/follow#2'), + self.req('https://user.com/follow'), self.as2_req('https://mas.to/mrs-foo'), )) @@ -1120,7 +1095,7 @@ class WebmentionTest(testutil.TestCase): def test_error_fragment_missing(self, mock_get, mock_post): mock_get.return_value = requests_response( - self.follow_fragment_html, url='https://user.com/follow#a', + self.follow_fragment_html, url='https://user.com/follow', content_type=CONTENT_TYPE_HTML) got = self.client.post('/webmention', data={ @@ -1128,6 +1103,9 @@ class WebmentionTest(testutil.TestCase): 'target': 'https://fed.brid.gy/', }) self.assert_equals(400, got.status_code) + mock_get.assert_has_calls(( + self.req('https://user.com/follow'), + )) def test_error(self, mock_get, mock_post): mock_get.side_effect = [self.follow, self.actor] diff --git a/tests/testutil.py b/tests/testutil.py index 933ee61..704672c 100644 --- a/tests/testutil.py +++ b/tests/testutil.py @@ -48,16 +48,18 @@ class FakeProtocol(protocol.Protocol): @classmethod def send(cls, obj, url, log_data=True): logger.info(f'FakeProtocol.send {url}') - sent.append((obj, url)) + cls.sent.append((obj, url)) cls.objects[obj.key.id()] = obj @classmethod - def fetch(cls, id): - logger.info(f'FakeProtocol.send {id}') + def fetch(cls, obj): + id = obj.key.id() + logger.info(f'FakeProtocol.load {id}') cls.fetched.append(id) if id in cls.objects: - return cls.objects[id] + obj.our_as1 = cls.objects[id] + return obj raise requests.HTTPError(response=util.Struct(status_code='410')) diff --git a/webmention.py b/webmention.py index 39f1d08..8e8e898 100644 --- a/webmention.py +++ b/webmention.py @@ -23,6 +23,7 @@ import activitypub from app import app import common from models import Follower, Object, Target, User +from protocol import Protocol logger = logging.getLogger(__name__) @@ -30,7 +31,7 @@ logger = logging.getLogger(__name__) TASKS_LOCATION = 'us-central1' -class Webmention(View): +class Webmention(Protocol): """Webmention protocol implementation.""" LABEL = 'webmention' @@ -49,16 +50,22 @@ class Webmention(View): return True @classmethod - def fetch(cls, url): + def fetch(cls, obj): """Fetches a URL over HTTP and extracts its microformats2. - See :meth:`Protocol.fetch` for details. + Follows redirects, but doesn't change the original URL in obj's id! The + :class:`Model` class doesn't allow that anyway, but more importantly, we + want to preserve that original URL becase other objects may refer to it + instead of the final redirect destination URL. + + See :meth:`Protocol.fetch` for other background. """ + url = obj.key.id() try: parsed = util.fetch_mf2(url, gateway=True, require_backlink=common.host_url().rstrip('/')) except ValueError as e: - logging.info(str(e)) + logger.info(str(e)) error(str(e)) if parsed is None: @@ -80,10 +87,10 @@ class Webmention(View): # duplicated in microformats2.json_to_object author = util.get_first(props, 'author') if not isinstance(author, dict): - logging.info(f'Fetching full authorship for author {author}') + logger.info(f'Fetching full authorship for author {author}') author = mf2util.find_author({'items': [entry]}, hentry=entry, fetch_mf2_func=util.fetch_mf2) - logging.info(f'Got: {author}') + logger.info(f'Got: {author}') if author: props['author'] = [{ "type": ["h-card"], @@ -93,18 +100,7 @@ class Webmention(View): }, }] - obj = Object.get_by_id(parsed['url']) - if obj: - orig_as1 = obj.as1 - obj.clear() - obj.mf2 = entry - obj.changed = as1.activity_changed(orig_as1, obj.as1) - obj.new = False - else: - obj = Object(id=parsed['url'], mf2=entry) - obj.new = True - obj.changed = False - + obj.mf2 = entry return obj @@ -145,7 +141,7 @@ class WebmentionView(View): # fetch source page try: - obj = Webmention.fetch(source) + obj = Webmention.load(source, refresh=True) except ValueError as e: error(f'Bad source URL: {source}: {e}') @@ -153,7 +149,7 @@ class WebmentionView(View): props = obj.mf2['properties'] author_urls = microformats2.get_string_urls(props.get('author', [])) if author_urls and not g.user.is_homepage(author_urls[0]): - logging.info(f'Overriding author {author_urls[0]} with {g.user.actor_id()}') + logger.info(f'Overriding author {author_urls[0]} with {g.user.actor_id()}') props['author'] = [g.user.actor_id()] logger.info(f'Converted to AS1: {obj.type}: {json_dumps(obj.as1, indent=2)}') @@ -187,7 +183,7 @@ class WebmentionView(View): }, ) msg = f'Enqueued task {task.name} to deliver to followers...' - logging.info(msg) + logger.info(msg) return msg, 202 inboxes_to_targets = self._activitypub_targets(obj)