From 6a63820cd84a6b492e525d5934e5b66d56e0fdde Mon Sep 17 00:00:00 2001 From: Ryan Barrett Date: Thu, 8 Feb 2024 14:40:59 -0800 Subject: [PATCH] tighten Object.owns_id to reject blocklisted domains fixes https://console.cloud.google.com/errors/detail/CLSnttKfy4v90wE;time=P30D?project=bridgy-federated --- protocol.py | 2 ++ tests/test_follow.py | 57 +++++++++++++++++++----------- tests/test_ids.py | 12 +++---- tests/test_models.py | 7 ++++ tests/test_protocol.py | 20 +++++------ tests/test_web.py | 80 ++++++++++++++++++++++++++---------------- web.py | 20 +++++++---- 7 files changed, 125 insertions(+), 73 deletions(-) diff --git a/protocol.py b/protocol.py index cd489cf..49dddb5 100644 --- a/protocol.py +++ b/protocol.py @@ -603,6 +603,8 @@ class Protocol: if authed_as: assert isinstance(authed_as, str) + # TODO: handle domain vs URL for web users, eg + # "actor https://tiffwhite.me/ isn't authed user tiffwhite.me" if actor != authed_as: logger.warning(f"actor {actor} isn't authed user {authed_as}") diff --git a/tests/test_follow.py b/tests/test_follow.py index e5aeac9..d98f425 100644 --- a/tests/test_follow.py +++ b/tests/test_follow.py @@ -170,7 +170,8 @@ class FollowTest(TestCase): ) state = util.encode_oauth_state(self.state) - resp = self.client.get(f'/follow/callback?code=my_code&state={state}') + resp = self.client.get(f'/follow/callback?code=my_code&state={state}', + base_url='https://fed.brid.gy/') self.check('@foo@ba.r', resp, FOLLOW_ADDRESS, mock_get, mock_post) mock_get.assert_has_calls(( self.req('https://ba.r/.well-known/webfinger?resource=acct:foo@ba.r'), @@ -189,7 +190,8 @@ class FollowTest(TestCase): self.state['state'] = 'https://ba.r/actor' state = util.encode_oauth_state(self.state) - resp = self.client.get(f'/follow/callback?code=my_code&state={state}') + resp = self.client.get(f'/follow/callback?code=my_code&state={state}', + base_url='https://fed.brid.gy/') self.check('https://ba.r/actor', resp, FOLLOW_URL, mock_get, mock_post) def test_callback_stored_followee_with_our_as1(self, mock_get, mock_post): @@ -206,7 +208,8 @@ class FollowTest(TestCase): self.state['state'] = 'https://ba.r/id' state = util.encode_oauth_state(self.state) - resp = self.client.get(f'/follow/callback?code=my_code&state={state}') + resp = self.client.get(f'/follow/callback?code=my_code&state={state}', + base_url='https://fed.brid.gy/') follow_with_profile_link = { **FOLLOW_URL, @@ -236,7 +239,8 @@ class FollowTest(TestCase): self.state['state'] = 'https://ba.r/actor' state = util.encode_oauth_state(self.state) - resp = self.client.get(f'/follow/callback?code=my_code&state={state}') + resp = self.client.get(f'/follow/callback?code=my_code&state={state}', + base_url='https://fed.brid.gy/') self.check('https://ba.r/actor', resp, FOLLOW_URL, mock_get, mock_post, expected_follow_as1={ @@ -272,7 +276,8 @@ class FollowTest(TestCase): self.state['state'] = 'https://ba.r/actor' state = util.encode_oauth_state(self.state) - resp = self.client.get(f'/follow/callback?code=my_code&state={state}') + resp = self.client.get(f'/follow/callback?code=my_code&state={state}', + base_url='https://fed.brid.gy/') self.check('https://ba.r/actor', resp, FOLLOW_URL, mock_get, mock_post) @@ -350,7 +355,7 @@ class FollowTest(TestCase): sig_template.startswith('keyId="http://localhost/alice.com#key"'), sig_template) - follow_id = f'http://localhost/web/alice.com/following#2022-01-02T03:04:05-{input}' + follow_id = f'https://fed.brid.gy/web/alice.com/following#2022-01-02T03:04:05-{input}' followers = Follower.query().fetch() followee = ActivityPub(id='https://ba.r/id').key @@ -400,7 +405,8 @@ class FollowTest(TestCase): self.state['state'] = 'https://ba.r/actor' state = util.encode_oauth_state(self.state) - resp = self.client.get(f'/follow/callback?code=my_code&state={state}') + resp = self.client.get(f'/follow/callback?code=my_code&state={state}', + base_url='https://fed.brid.gy/') self.assertEqual(302, resp.status_code) self.assertEqual('/web/www.alice.com/following', resp.headers['Location']) @@ -413,7 +419,7 @@ class FollowTest(TestCase): del expected_follow_as1['to'] followee = ActivityPub(id='https://ba.r/id').key follow_obj = self.assert_object( - f'http://localhost/web/{id}', + f'https://fed.brid.gy/web/{id}', users=[user.key], notify=[followee], status='complete', @@ -450,7 +456,8 @@ class FollowTest(TestCase): self.state['state'] = 'https://ba.r/actor' state = util.encode_oauth_state(self.state) - resp = self.client.get(f'/follow/callback?code=my_code&state={state}') + resp = self.client.get(f'/follow/callback?code=my_code&state={state}', + base_url='https://fed.brid.gy/') self.check('https://ba.r/actor', resp, FOLLOW_URL, mock_get, mock_post) self.assertEqual( @@ -466,13 +473,14 @@ class FollowTest(TestCase): requests_response('OK'), # AP Follow to inbox ) - with self.client.session_transaction() as ctx_session: + with self.client.session_transaction(base_url='https://fed.brid.gy/') \ + as ctx_session: ctx_session['indieauthed-me'] = 'https://alice.com' resp = self.client.post('/follow/start', data={ 'me': 'https://alice.com', 'address': 'https://ba.r/actor', - }) + }, base_url='https://fed.brid.gy/') self.check('https://ba.r/actor', resp, FOLLOW_URL, mock_get, mock_post) def test_indieauthed_session_wrong_me(self, mock_get, mock_post): @@ -480,7 +488,8 @@ class FollowTest(TestCase): requests_response(''), # IndieAuth endpoint discovery ) - with self.client.session_transaction() as ctx_session: + with self.client.session_transaction(base_url='https://fed.brid.gy/') \ + as ctx_session: ctx_session['indieauthed-me'] = 'https://eve.com' resp = self.client.post('/follow/start', data={ @@ -544,7 +553,8 @@ class UnfollowTest(TestCase): requests_response('OK'), # AP Undo Follow to inbox ) - resp = self.client.get(f'/unfollow/callback?code=my_code&state={self.state}') + resp = self.client.get(f'/unfollow/callback?code=my_code&state={self.state}', + base_url='https://fed.brid.gy/') self.check(resp, UNDO_FOLLOW, mock_get, mock_post) def test_callback_last_follow_object_str(self, mock_get, mock_post): @@ -570,7 +580,8 @@ class UnfollowTest(TestCase): undo = copy.deepcopy(UNDO_FOLLOW) undo['object']['object'] = FOLLOWEE['id'] - resp = self.client.get(f'/unfollow/callback?code=my_code&state={self.state}') + resp = self.client.get(f'/unfollow/callback?code=my_code&state={self.state}', + base_url='https://fed.brid.gy/') self.check(resp, undo, mock_get, mock_post) def check(self, resp, expected_undo, mock_get, mock_post): @@ -596,7 +607,7 @@ class UnfollowTest(TestCase): self.assertEqual('inactive', follower.status) self.assert_object( - 'http://localhost/web/alice.com/following#undo-2022-01-02T03:04:05-https://ba.r/id', + 'https://fed.brid.gy/web/alice.com/following#undo-2022-01-02T03:04:05-https://ba.r/id', users=[self.user.key], notify=[ActivityPub(id='https://ba.r/id').key], status='complete', @@ -630,7 +641,8 @@ class UnfollowTest(TestCase): 'me': 'https://alice.com', 'state': self.follower.key.id(), }) - resp = self.client.get(f'/unfollow/callback?code=my_code&state={state}') + resp = self.client.get(f'/unfollow/callback?code=my_code&state={state}', + base_url='https://fed.brid.gy/') self.assertEqual(302, resp.status_code) self.assertEqual('/web/www.alice.com/following', resp.headers['Location']) @@ -658,7 +670,7 @@ class UnfollowTest(TestCase): self.assertEqual('inactive', follower.status) self.assert_object( - 'http://localhost/web/www.alice.com/following#undo-2022-01-02T03:04:05-https://ba.r/id', + 'https://fed.brid.gy/web/www.alice.com/following#undo-2022-01-02T03:04:05-https://ba.r/id', users=[user.key], notify=[ActivityPub(id='https://ba.r/id').key], status='complete', @@ -686,7 +698,8 @@ class UnfollowTest(TestCase): requests_response('OK'), # AP Undo Follow to inbox ) - resp = self.client.get(f'/unfollow/callback?code=my_code&state={self.state}') + resp = self.client.get(f'/unfollow/callback?code=my_code&state={self.state}', + base_url='https://fed.brid.gy/') self.assertEqual([f'Unfollowed ba.r/url.'], get_flashed_messages()) self.check(resp, UNDO_FOLLOW, mock_get, mock_post) @@ -698,13 +711,14 @@ class UnfollowTest(TestCase): requests_response('OK'), # AP Undo Follow to inbox ) - with self.client.session_transaction() as ctx_session: + with self.client.session_transaction(base_url='https://fed.brid.gy/') \ + as ctx_session: ctx_session['indieauthed-me'] = 'https://alice.com' resp = self.client.post('/unfollow/start', data={ 'me': 'https://alice.com', 'key': self.follower.key.id(), - }) + }, base_url='https://fed.brid.gy/') self.check(resp, UNDO_FOLLOW, mock_get, mock_post) def test_indieauthed_session_wrong_me(self, mock_get, mock_post): @@ -712,7 +726,8 @@ class UnfollowTest(TestCase): requests_response(''), # IndieAuth endpoint discovery ) - with self.client.session_transaction() as ctx_session: + with self.client.session_transaction(base_url='https://fed.brid.gy/') \ + as ctx_session: ctx_session['indieauthed-me'] = 'https://eve.com' resp = self.client.post('/unfollow/start', data={ diff --git a/tests/test_ids.py b/tests/test_ids.py index 11a9d92..85f432c 100644 --- a/tests/test_ids.py +++ b/tests/test_ids.py @@ -125,7 +125,7 @@ class IdsTest(TestCase): handle=handle, from_proto=from_, to_proto=to, enhanced=True)) def test_translate_object_id(self): - self.store_object(id='http://post', + self.store_object(id='http://po.st', copies=[Target(uri='at://did/web/post', protocol='atproto')]) self.store_object(id='https://inst/post', copies=[Target(uri='at://did/ap/post', protocol='atproto')]) @@ -140,7 +140,7 @@ class IdsTest(TestCase): Web, 'https://ap.brid.gy/convert/web/https://inst/post'), (ATProto, 'at://did/atp/post', ATProto, 'at://did/atp/post'), # copies - (ATProto, 'at://did/web/post', Web, 'http://post'), + (ATProto, 'at://did/web/post', Web, 'http://po.st'), (ATProto, 'at://did/ap/post', ActivityPub, 'https://inst/post'), (ATProto, 'at://did/fa/post', Fake, 'fake:post'), # no copies @@ -152,10 +152,10 @@ class IdsTest(TestCase): (Fake, 'fake:post', ATProto, 'at://did/fa/post'), (Fake, 'fake:post', Fake, 'fake:post'), (Fake, 'fake:post', Web, 'https://fa.brid.gy/convert/web/fake:post'), - (Web, 'http://post', ActivityPub, 'http://localhost/r/http://post'), - (Web, 'http://post', ATProto, 'at://did/web/post'), - (Web, 'http://post', Fake, 'fake:o:web:http://post'), - (Web, 'http://post', Web, 'http://post'), + (Web, 'http://po.st', ActivityPub, 'http://localhost/r/http://po.st'), + (Web, 'http://po.st', ATProto, 'at://did/web/post'), + (Web, 'http://po.st', Fake, 'fake:o:web:http://po.st'), + (Web, 'http://po.st', Web, 'http://po.st'), ]: with self.subTest(from_=from_.LABEL, to=to.LABEL): self.assertEqual(expected, translate_object_id( diff --git a/tests/test_models.py b/tests/test_models.py index 4151c17..2d451ce 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -683,6 +683,13 @@ class ObjectTest(TestCase): with self.assertRaises(AssertionError): Object(id='not a fake', source_protocol='fake').put() + def test_put_blocklisted_id(self): + Object(id='asdf foo').put() # ok, no source protocol + Object(id='fake:foo', source_protocol='fake').put() # ok, valid id + + with self.assertRaises(AssertionError): + Object(id='not a fake', source_protocol='fake').put() + def test_resolve_ids_empty(self): obj = Object() obj.resolve_ids() diff --git a/tests/test_protocol.py b/tests/test_protocol.py index 74c02df..1f3eeb7 100644 --- a/tests/test_protocol.py +++ b/tests/test_protocol.py @@ -105,24 +105,24 @@ class ProtocolTest(TestCase): self.assertEqual(Greedy, Protocol.for_id('https://bar/baz')) def test_for_id_object(self): - self.store_object(id='http://ui/obj', source_protocol='ui') - self.assertEqual(UIProtocol, Protocol.for_id('http://ui/obj')) + self.store_object(id='http://u.i/obj', source_protocol='ui') + self.assertEqual(UIProtocol, Protocol.for_id('http://u.i/obj')) def test_for_id_object_missing_source_protocol(self): - self.store_object(id='http://bad/obj') - self.assertIsNone(Protocol.for_id('http://bad/obj')) + self.store_object(id='http://ba.d/obj') + self.assertIsNone(Protocol.for_id('http://ba.d/obj')) @patch('requests.get') def test_for_id_activitypub_fetch(self, mock_get): mock_get.return_value = self.as2_resp(ACTOR) - self.assertEqual(ActivityPub, Protocol.for_id('http://ap/actor')) - self.assertIn(self.as2_req('http://ap/actor'), mock_get.mock_calls) + self.assertEqual(ActivityPub, Protocol.for_id('http://a.p/actor')) + self.assertIn(self.as2_req('http://a.p/actor'), mock_get.mock_calls) @patch('requests.get') def test_for_id_activitypub_fetch_fails(self, mock_get): mock_get.return_value = requests_response('', status=403) - self.assertIsNone(Protocol.for_id('http://ap/actor')) - self.assertIn(self.as2_req('http://ap/actor'), mock_get.mock_calls) + self.assertIsNone(Protocol.for_id('http://a.p/actor')) + self.assertIn(self.as2_req('http://a.p/actor'), mock_get.mock_calls) mock_get.assert_called_once() @patch('requests.get') @@ -272,7 +272,7 @@ class ProtocolTest(TestCase): @patch('requests.get', return_value=ACTOR_HTML_RESP) def test_load_remote_true_clear_our_as1(self, _): - self.store_object(id='https://foo', our_as1={'should': 'disappear'}, + self.store_object(id='https://fo.o', our_as1={'should': 'disappear'}, source_protocol='web') expected_mf2 = { @@ -280,7 +280,7 @@ class ProtocolTest(TestCase): 'url': 'https://user.com/', } - loaded = Web.load('https://foo', remote=True) + loaded = Web.load('https://fo.o', remote=True) self.assertEqual(expected_mf2, loaded.mf2) self.assertIsNone(loaded.our_as1) self.assertEqual(ACTOR_AS1_UNWRAPPED_URLS, loaded.as1) diff --git a/tests/test_web.py b/tests/test_web.py index 14610e8..85fcb1e 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -195,7 +195,7 @@ REPLY_HTML = """\

- + foo ☕ bar

@@ -252,12 +252,12 @@ AS2_CREATE = { 'url': 'http://localhost/r/https://user.com/reply', 'name': 'foo ☕ bar', 'content': """\ - + foo ☕ bar """, 'contentMap': { 'en': """\ - + foo ☕ bar """, }, @@ -400,7 +400,7 @@ NOT_FEDIVERSE = requests_response("""\
foo
-""", url='http://not/fediverse') +""", url='http://no.t/fediverse') ACTIVITYPUB_GETS = [ REPLY, NOT_FEDIVERSE, # AP @@ -652,7 +652,7 @@ class WebTest(TestCase): mock_get.side_effect = ( requests_response( REPLY_HTML.replace('https://mas.to/toot', 'bad:nope')\ - .replace('http://not/fediverse', ''), + .replace('http://no.t/fediverse', ''), content_type=CONTENT_TYPE_HTML, url='https://user.com/reply'), ValueError('foo bar'), # AS2 fetch ValueError('foo bar'), # HTML fetch @@ -683,9 +683,9 @@ class WebTest(TestCase): requests_response( REPLY_HTML.replace('https://mas.to/toot', 'bad:nope'), url='https://user.com/post'), - # http://not/fediverse AP protocol discovery + # http://no.t/fediverse AP protocol discovery requests.Timeout('foo bar'), - # http://not/fediverse web protocol discovery + # http://no.t/fediverse web protocol discovery requests.Timeout('foo bar'), ] @@ -694,7 +694,7 @@ class WebTest(TestCase): self.assertEqual(204, got.status_code) def test_target_fetch_has_no_content_type(self, mock_get, mock_post): - Object(id='http://not/fediverse', mf2=NOTE_MF2, source_protocol='web').put() + Object(id='http://no.t/fediverse', mf2=NOTE_MF2, source_protocol='web').put() no_content_type = requests_response(REPLY_HTML, content_type='') @@ -704,7 +704,7 @@ class WebTest(TestCase): no_content_type, # https://mas.to/toot AP protocol discovery no_content_type, # https://mas.to/toot Web protocol discovery no_content_type, # https://user.com/ webmention discovery - no_content_type, # http://not/fediverse webmention discovery + no_content_type, # http://no.t/fediverse webmention discovery ) got = self.post('/queue/webmention', data={'source': 'https://user.com/reply'}) self.assertEqual(204, got.status_code) @@ -750,8 +750,8 @@ class WebTest(TestCase): mock_get.assert_has_calls(( self.req('https://user.com/reply'), - self.as2_req('http://not/fediverse'), - self.req('http://not/fediverse'), + self.as2_req('http://no.t/fediverse'), + self.req('http://no.t/fediverse'), self.as2_req('https://mas.to/toot'), self.as2_req('https://mas.to/author'), )) @@ -874,8 +874,8 @@ class WebTest(TestCase): mock_get.assert_has_calls(( self.req('https://user.com/reply'), - self.as2_req('http://not/fediverse'), - self.req('http://not/fediverse'), + self.as2_req('http://no.t/fediverse'), + self.req('http://no.t/fediverse'), self.as2_req('https://mas.to/toot'), self.as2_req('https://mas.to/author'), )) @@ -962,8 +962,8 @@ class WebTest(TestCase): mock_get.assert_has_calls(( self.req('https://user.com/reply'), - self.as2_req('http://not/fediverse'), - self.req('http://not/fediverse'), + self.as2_req('http://no.t/fediverse'), + self.req('http://no.t/fediverse'), self.as2_req('https://mas.to/toot'), self.as2_req('https://mas.to/toot/id', headers=as2.CONNEG_HEADERS), self.as2_req('https://mas.to/author'), @@ -1849,17 +1849,17 @@ class WebTest(TestCase): - http://post/a + http://po.st/a I hereby ☕ post a Tue, 08 Dec 2012 00:00:00 +0000 - http://post/b + http://po.st/b I hereby ☕ post b Tue, 05 Dec 2012 00:00:00 +0000 - http://post/c + http://po.st/c I hereby ☕ post c Tue, 04 Dec 2012 00:00:00 +0000 @@ -1874,13 +1874,13 @@ class WebTest(TestCase): user = self.user.key.get() self.assertEqual(NOW, user.last_polled_feed) - self.assertEqual('http://post/a', user.feed_last_item) + self.assertEqual('http://po.st/a', user.feed_last_item) mock_get.assert_has_calls(( self.req('https://foo/rss'), )) for i, (id, day) in enumerate([('a', 8), ('b', 5), ('c', 4)]): - url = f'http://post/{id}' + url = f'http://po.st/{id}' obj = self.assert_object( url, users=[self.user.key], @@ -2040,6 +2040,29 @@ class WebTest(TestCase): feed = """\ + + + I hereby ☕ post + +""" + mock_get.return_value = requests_response( + feed, headers={'Content-Type': atom.CONTENT_TYPE}) + got = self.post('/queue/poll-feed', data={'domain': 'user.com'}) + self.assertEqual(200, got.status_code) + + self.assertEqual(1, Object.query().count()) # only user profile + mock_create_task.assert_called_once() # only the next poll-feed task + + @patch('oauth_dropins.webutil.appengine_config.tasks_client.create_task') + def test_poll_feed_blocklisted_entry_url(self, mock_create_task, mock_get, _): + common.RUN_TASKS_INLINE = False + self.user.obj.mf2 = ACTOR_MF2_REL_FEED_URL + self.user.obj.put() + self.user.feed_last_item = 'https://user.com/post' + self.user.put() + + feed = """\ + I hereby ☕ post @@ -2054,11 +2077,6 @@ class WebTest(TestCase): self.assertEqual(NOW, user.last_polled_feed) self.assertEqual('https://user.com/post', user.feed_last_item) - mock_create_task.assert_called_once() - expected_eta = NOW_SECONDS + web.MIN_FEED_POLL_PERIOD.total_seconds() - self.assert_task(mock_create_task, 'poll-feed', '/queue/poll-feed', - domain='user.com', eta_seconds=expected_eta) - @patch('oauth_dropins.webutil.appengine_config.tasks_client.create_task') def test_poll_feed_last_webmention_in_noop(self, mock_create_task, mock_get, _): common.RUN_TASKS_INLINE = False @@ -2456,7 +2474,6 @@ class WebUtilTest(TestCase): self.assertIsNone(Web.owns_id('http://foo.com')) self.assertIsNone(Web.owns_id('https://bar.com/')) self.assertIsNone(Web.owns_id('https://bar.com/baz')) - self.assertIsNone(Web.owns_id('https://bar/')) self.assertEqual(False, Web.owns_id('at://did:plc:foo/bar/123')) self.assertEqual(False, Web.owns_id('e45fab982')) @@ -2467,16 +2484,19 @@ class WebUtilTest(TestCase): self.user.key.delete() self.assertIsNone(Web.owns_id('user.com')) - # TODO: these should be False since they're blocklisted? - self.assertIsNone(Web.owns_id('https://twitter.com/foo')) - self.assertIsNone(Web.owns_id('https://fed.brid.gy/foo')) - def test_owns_id_returns_None(self, *_): self.user.manual_opt_out = True self.user.put() self.assertIsNone(Web.owns_id('https://user.com/')) self.assertIsNone(Web.owns_id('user.com')) + def test_owns_id_blocklisted(self, *_): + self.assertIs(False, Web.owns_id('localhost')) + self.assertIs(False, Web.owns_id('http://localhost/foo')) + self.assertIs(False, Web.owns_id('http://localhost:8080/foo')) + self.assertIs(False, Web.owns_id('https://twitter.com/')) + self.assertIs(False, Web.owns_id('https://ap.brid.gy/foo')) + def test_owns_handle(self, *_): self.assertIsNone(Web.owns_handle('foo.com')) self.assertIsNone(Web.owns_handle('foo.bar.com')) diff --git a/web.py b/web.py index 1904210..570dcda 100644 --- a/web.py +++ b/web.py @@ -326,12 +326,15 @@ class Web(User, Protocol): if not id: return False - key = cls.key_for(id) - if key: + if key := cls.key_for(id): user = key.get() return True if user and user.has_redirects else None + elif is_valid_domain(id): + return None + elif util.is_web(id) and is_valid_domain(util.domain_from_link(id)): + return None - return None if util.is_web(id) or is_valid_domain(id) else False + return False @classmethod def owns_handle(cls, handle): @@ -697,12 +700,17 @@ def poll_feed_task(): if not id: logger.warning('No id or URL!') continue + + if i == 0: + logger.info(f'Setting feed_last_item to {id}') + user.feed_last_item = id elif id == user.feed_last_item: logger.info(f'Already seen {id}, skipping rest of feed') break - elif i == 0: - logger.info(f'Setting feed_last_item to {id}') - user.feed_last_item = id + + if Web.owns_id(id) is False: + logger.warning(f'Skipping bad id {id}') + continue activity['feed_index'] = i obj = Object.get_or_create(id=id, our_as1=activity, status='new',