From 111df31736449c0e7e72e96f5c8fe0617d09e2ed Mon Sep 17 00:00:00 2001 From: Ryan Barrett Date: Sun, 2 Jul 2023 14:55:05 -0700 Subject: [PATCH] extend Web/ActivityPub.owns_id() to return False if blocklisted --- activitypub.py | 6 +++++- common.py | 11 +++++------ protocol.py | 7 +++++-- tests/test_activitypub.py | 3 +++ tests/test_web.py | 3 +++ 5 files changed, 21 insertions(+), 9 deletions(-) diff --git a/activitypub.py b/activitypub.py index aef539f..ec9a94e 100644 --- a/activitypub.py +++ b/activitypub.py @@ -24,6 +24,7 @@ from common import ( CONTENT_TYPE_HTML, error, host_url, + is_blocklisted, NoMicroformats, redirect_unwrap, redirect_wrap, @@ -114,7 +115,10 @@ class ActivityPub(User, Protocol): https://www.w3.org/TR/activitypub/#obj-id """ - return None if util.is_web(id) else False + if util.is_web(id) and not is_blocklisted(id): + return None + + return False @classmethod def target_for(cls, obj, shared=False): diff --git a/common.py b/common.py index 10f1e1c..6ccbc1a 100644 --- a/common.py +++ b/common.py @@ -130,16 +130,15 @@ def content_type(resp): return type.split(';')[0] -def remove_blocklisted(urls): - """Returns the subset of input URLs that aren't in our domain blocklist. +def is_blocklisted(url): + """Returns True if the given URL is in our domain blocklist, False otherwise. Args: - urls: sequence of str + url: str - Returns: list of str + Returns: boolean """ - return [u for u in urls if not util.domain_or_parent_in( - util.domain_from_link(u), DOMAIN_BLOCKLIST)] + return util.domain_or_parent_in(util.domain_from_link(url), DOMAIN_BLOCKLIST) def redirect_wrap(url): diff --git a/protocol.py b/protocol.py index 2e44379..09137d1 100644 --- a/protocol.py +++ b/protocol.py @@ -11,7 +11,7 @@ from granary import as1 import werkzeug.exceptions import common -from common import add, error +from common import add, error, is_blocklisted from models import Follower, Object, PROTOCOLS, Target, User from oauth_dropins.webutil import util from oauth_dropins.webutil.util import json_dumps, json_loads @@ -134,6 +134,8 @@ class Protocol: external HTTP fetches to fetch the id itself or otherwise perform discovery. + Returns False if the id's domain is in :attr:`common.DOMAIN_BLOCKLIST`. + Args: id: str @@ -727,7 +729,8 @@ class Protocol: error(f'{verb} missing target URL') logger.info(f'original object ids from object: {orig_ids}') - orig_ids = sorted(common.remove_blocklisted(util.dedupe_urls(orig_ids))) + orig_ids = sorted(id for id in util.dedupe_urls(orig_ids) + if not is_blocklisted(id)) orig_obj = None targets = {} for id in orig_ids: diff --git a/tests/test_activitypub.py b/tests/test_activitypub.py index 3bf1b1d..971e106 100644 --- a/tests/test_activitypub.py +++ b/tests/test_activitypub.py @@ -1452,6 +1452,9 @@ class ActivityPubUtilsTest(TestCase): self.assertFalse(ActivityPub.owns_id('at://did:plc:foo/bar/123')) self.assertFalse(ActivityPub.owns_id('e45fab982')) + self.assertFalse(ActivityPub.owns_id('https://twitter.com/foo')) + self.assertFalse(ActivityPub.owns_id('https://fed.brid.gy/foo')) + def test_postprocess_as2_multiple_in_reply_tos(self): self.assert_equals({ 'id': 'http://localhost/r/xyz', diff --git a/tests/test_web.py b/tests/test_web.py index 1132bd4..6771f0c 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -1803,6 +1803,9 @@ class WebProtocolTest(TestCase): g.user.key.delete() self.assertIsNone(Web.owns_id('user.com')) + self.assertFalse(Web.owns_id('https://twitter.com/foo')) + self.assertFalse(Web.owns_id('https://fed.brid.gy/foo')) + def test_fetch(self, mock_get, __): mock_get.return_value = REPOST