From fcef6c21ab7832e2364282e6e23cb1ff935b534c Mon Sep 17 00:00:00 2001 From: Ryan Barrett Date: Fri, 26 Jan 2024 11:37:34 -0800 Subject: [PATCH] Revert "heuristic: assume no AP actor id is the root path on its host" This reverts commit b7e890b4bb79db64ca05877bc72f27aa516d08c0. it was a nice idea, but we're now hitting AP actor ids that are the root path on their host. this is evidently how microblog.pub does AP; 0x3b0b's site https://bw3.dev/ is an example. general info on https://docs.microblog.pub/user_guide.html#activitypub but not this specifically. fixes https://console.cloud.google.com/errors/detail/CLSnttKfy4v90wE;time=P30D?project=bridgy-federated --- activitypub.py | 15 +++++++++------ tests/test_activitypub.py | 14 +++++++------- tests/test_convert.py | 20 +++++++++++++++++--- tests/test_follow.py | 5 +---- tests/test_protocol.py | 14 ++++++++++---- tests/test_redirect.py | 5 +++-- tests/test_web.py | 4 +++- tests/test_webfinger.py | 5 ++++- tests/testutil.py | 2 +- 9 files changed, 55 insertions(+), 29 deletions(-) diff --git a/activitypub.py b/activitypub.py index 792f0c4..4bdefbf 100644 --- a/activitypub.py +++ b/activitypub.py @@ -108,13 +108,16 @@ class ActivityPub(User, Protocol): All AP ids are http(s) URLs, but not all http(s) URLs are AP ids. https://www.w3.org/TR/activitypub/#obj-id + + I used to include a heuristic here that no actor is the root path on its + host, which was nice because it let us assume that home pages are Web + users without making any network requests...but then I inevitably ran + into AP actors that _are_ the root path, eg microblog.pub sites like + https://bw3.dev/ . + + https://docs.microblog.pub/user_guide.html#activitypub """ - if (util.is_web(id) - and not cls.is_blocklisted(id) - # heuristic: assume no actor is the root path on its host. this - # lets us assume home pages are Web users without making any - # network requests. - and urlparse(id).path not in ('', '/')): + if util.is_web(id) and not cls.is_blocklisted(id): return None return False diff --git a/tests/test_activitypub.py b/tests/test_activitypub.py index b0e7ec3..d90f08c 100644 --- a/tests/test_activitypub.py +++ b/tests/test_activitypub.py @@ -978,7 +978,6 @@ class ActivityPubTest(TestCase): mock_head.return_value = requests_response(url='https://user.com/') mock_get.side_effect = [ self.as2_resp(ACTOR), # source actor - test_web.ACTOR_HTML_RESP, WEBMENTION_DISCOVERY, ] if not mock_post.return_value and not mock_post.side_effect: @@ -1775,12 +1774,13 @@ class ActivityPubUtilsTest(TestCase): ActivityPub(id=bad).put() def test_owns_id(self): - self.assertIsNone(ActivityPub.owns_id('http://foo/bar')) - self.assertEqual(False, ActivityPub.owns_id('at://did:plc:foo/bar/123')) - self.assertEqual(False, ActivityPub.owns_id('e45fab982')) - self.assertEqual(False, ActivityPub.owns_id('https://example.com/')) - self.assertEqual(False, ActivityPub.owns_id('https://twitter.com/foo')) - self.assertEqual(False, ActivityPub.owns_id('https://fed.brid.gy/foo')) + self.assertIsNone(ActivityPub.owns_id('http://foo')) + self.assertIsNone(ActivityPub.owns_id('https://bar/baz')) + self.assertFalse(ActivityPub.owns_id('at://did:plc:foo/bar/123')) + self.assertFalse(ActivityPub.owns_id('e45fab982')) + + self.assertFalse(ActivityPub.owns_id('https://twitter.com/foo')) + self.assertFalse(ActivityPub.owns_id('https://fed.brid.gy/foo')) def test_owns_handle(self): for handle in ('@user@instance', 'user@instance.com', 'user.com@instance.com', diff --git a/tests/test_convert.py b/tests/test_convert.py index 574029b..9f1fde9 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -296,13 +296,27 @@ A ☕ reply @patch('requests.get') def test_web_to_activitypub_no_user(self, mock_get): - mock_get.return_value = requests_response(HTML) # protocol inference + hcard = requests_response(""" + + + + + +""", url='https://nope.com/') + mock_get.side_effect = [ + # post protocol inference + requests_response(''), + hcard, + hcard, + hcard, + ] - resp = self.client.get(f'/convert/ap/http://nope.com/post', + resp = self.client.get(f'/convert/ap/https://nope.com/post', base_url='https://web.brid.gy/') self.assertEqual(200, resp.status_code) self.assert_equals({ - **COMMENT_AS2, + 'type': 'Note', + 'id': 'https://web.brid.gy/r/https://nope.com/post', 'attributedTo': 'https://web.brid.gy/nope.com', }, resp.json, ignore=['to']) diff --git a/tests/test_follow.py b/tests/test_follow.py index 91b3764..e5aeac9 100644 --- a/tests/test_follow.py +++ b/tests/test_follow.py @@ -619,10 +619,7 @@ class UnfollowTest(TestCase): follow=Object(id=FOLLOW_ADDRESS['id'], as2=FOLLOW_ADDRESS).put(), status='active') - mock_get.side_effect = ( - requests_response(''), - self.as2_resp(FOLLOWEE), - ) + mock_get.return_value = requests_response('') mock_post.side_effect = ( requests_response('me=https://alice.com'), requests_response('OK'), # AP Undo Follow to inbox diff --git a/tests/test_protocol.py b/tests/test_protocol.py index fac0d86..312f9d4 100644 --- a/tests/test_protocol.py +++ b/tests/test_protocol.py @@ -125,10 +125,16 @@ class ProtocolTest(TestCase): mock_get.assert_called_once() @patch('requests.get') - def test_for_id_web_no_fetch(self, mock_get): - self.assertEqual(Web, Protocol.for_id('http://web/')) - # we don't allow ActivityPub actors on root paths, so Web must be it - mock_get.assert_not_called() + def test_for_id_web_fetch(self, mock_get): + mock_get.return_value = ACTOR_HTML_RESP + self.assertEqual(Web, Protocol.for_id('http://web.site/')) + self.assertIn(self.req('http://web.site/'), mock_get.mock_calls) + + @patch('requests.get') + def test_for_id_web_fetch_no_mf2(self, mock_get): + mock_get.return_value = requests_response('') + self.assertIsNone(Protocol.for_id('http://web.site/')) + self.assertIn(self.req('http://web.site/'), mock_get.mock_calls) def test_for_handle_deterministic(self): for handle, expected in [ diff --git a/tests/test_redirect.py b/tests/test_redirect.py index 19f456a..1b80af3 100644 --- a/tests/test_redirect.py +++ b/tests/test_redirect.py @@ -11,6 +11,7 @@ from . import testutil from flask_app import app, cache from models import Object +import protocol from web import Web from .test_activitypub import ACTOR_BASE_FULL @@ -98,7 +99,6 @@ class RedirectTest(testutil.TestCase): mock_get.side_effect = [ requests_response(REPOST_HTML), TOOT_AS2, - TOOT_AS2, ] resp = self.client.get('/r/https://user.com/repost', @@ -113,7 +113,6 @@ class RedirectTest(testutil.TestCase): requests_response( REPOST_HTML.replace('', '')), TOOT_AS2, - TOOT_AS2, ] resp = self.client.get('/r/https://user.com/repost', @@ -126,6 +125,8 @@ class RedirectTest(testutil.TestCase): def test_as2_no_user_fetch_homepage(self, mock_get): mock_get.return_value = requests_response(ACTOR_HTML) self.user.key.delete() + self.user.obj_key.delete() + protocol.objects_cache.clear() resp = self.client.get('/r/https://user.com/', headers={'Accept': as2.CONTENT_TYPE}) diff --git a/tests/test_web.py b/tests/test_web.py index 0f509a8..b68b969 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -2814,7 +2814,9 @@ class WebUtilTest(TestCase): 'target': 'https://user.com/post', }, kwargs['data']) - def test_convert(self, _, __): + def test_convert(self, mock_get, __): + mock_get.return_value = ACTOR_HTML_RESP + obj = Object(id='http://orig', mf2=ACTOR_MF2) self.assert_multiline_equals("""\ diff --git a/tests/test_webfinger.py b/tests/test_webfinger.py index d966072..adb196b 100644 --- a/tests/test_webfinger.py +++ b/tests/test_webfinger.py @@ -8,6 +8,7 @@ from oauth_dropins.webutil.testutil import requests_response # import first so that Fake is defined before URL routes are registered from .testutil import Fake, TestCase +import protocol from web import Web from webfinger import fetch, fetch_actor_url @@ -318,8 +319,10 @@ class WebfingerTest(TestCase): @patch('requests.get') def test_create_user(self, mock_get): self.user.key.delete() - mock_get.return_value = requests_response(test_web.ACTOR_HTML) + self.user.obj_key.delete() + protocol.objects_cache.clear() + mock_get.return_value = requests_response(test_web.ACTOR_HTML) expected = copy.deepcopy(WEBFINGER_NO_HCARD) expected['subject'] = 'acct:user.com@web.brid.gy' diff --git a/tests/testutil.py b/tests/testutil.py index 3214d32..a901503 100644 --- a/tests/testutil.py +++ b/tests/testutil.py @@ -291,7 +291,7 @@ class TestCase(unittest.TestCase, testutil.Asserts): if not obj_id: obj_id = ((obj_as2 or {}).get('id') or util.get_url((obj_mf2 or {}), 'properties') - or (f'http://{id}/' if cls == Web else id)) + or (f'https://{id}/' if cls == Web else id)) # unused right now # or f'fake:{str(self.last_make_user_id)}') self.last_make_user_id += 1