add domain blocklist, right now primarily for Twitter

pull/287/head
Ryan Barrett 2022-11-14 07:07:33 -08:00
rodzic 881307fa0b
commit fd5c88c713
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 6BE31FDF4776E9D4
4 zmienionych plików z 108 dodań i 57 usunięć

Wyświetl plik

@ -62,6 +62,13 @@ OTHER_DOMAINS = (
'localhost',
)
DOMAINS = (PRIMARY_DOMAIN,) + OTHER_DOMAINS
# TODO: unify with Bridgy's
DOMAIN_BLOCKLIST = frozenset((
'facebook.com',
'fb.com',
't.co',
'twitter.com',
) + DOMAINS)
def requests_get(url, **kwargs):
@ -144,6 +151,18 @@ def content_type(resp):
return type.split(';')[0]
def remove_blocklisted(urls):
"""Returns the subset of input URLs that aren't in our domain blocklist.
Args:
urls: sequence of str
Returns: list of str
"""
return [u for u in urls if not util.domain_or_parent_in(
util.domain_from_link(u), DOMAIN_BLOCKLIST)]
def send_webmentions(activity_wrapped, proxy=None, **activity_props):
"""Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery.
Args:
@ -183,9 +202,11 @@ def send_webmentions(activity_wrapped, proxy=None, **activity_props):
if verb in ('follow', 'like', 'share'):
targets.append(obj_url)
targets = util.dedupe_urls(util.get_url(t) for t in targets)
targets = remove_blocklisted(util.dedupe_urls(
util.get_url(t).lower() for t in targets))
if not targets:
error("Couldn't find any target URLs in inReplyTo, object, or mention tags")
error("Couldn't find any fediverse target URLs in inReplyTo, object, or mention tags")
logger.info(f'targets: {targets}')
# send webmentions and store Activitys
errors = [] # stores (code, body) tuples

Wyświetl plik

@ -24,7 +24,7 @@ REPLY_OBJECT = {
'id': 'http://this/reply/id',
'url': 'http://this/reply',
'inReplyTo': 'http://orig/post',
'to': ['https://www.w3.org/ns/activitystreams#Public'],
'to': [common.AS2_PUBLIC_AUDIENCE],
}
REPLY_OBJECT_WRAPPED = copy.deepcopy(REPLY_OBJECT)
REPLY_OBJECT_WRAPPED['inReplyTo'] = 'http://localhost/r/orig/post'
@ -40,7 +40,7 @@ NOTE_OBJECT = {
'content': '☕ just a normal post',
'id': 'http://this/mention/id',
'url': 'http://this/mention',
'to': ['https://www.w3.org/ns/activitystreams#Public'],
'to': [common.AS2_PUBLIC_AUDIENCE],
'cc': [
'https://this/author/followers',
'https://masto.foo/@other',
@ -251,17 +251,25 @@ class ActivityPubTest(testutil.TestCase):
self.assertEqual('complete', activity.status)
self.assertEqual(expected_as2, json_loads(activity.source_as2))
def test_inbox_reply_drop_self_domain_target(self, mock_head, mock_get, mock_post):
def test_inbox_reply_to_self_domain(self, mock_head, mock_get, mock_post):
self._test_inbox_ignore_reply_to('http://localhost/this', 200,
mock_head, mock_get, mock_post)
self.assert_req(mock_head, 'http://this', allow_redirects=True)
def test_inbox_reply_to_in_blocklist(self, *mocks):
self._test_inbox_ignore_reply_to('https://twitter.com/foo', 400, *mocks)
def _test_inbox_ignore_reply_to(self, reply_to, status, mock_head, mock_get,
mock_post):
reply = copy.deepcopy(REPLY_OBJECT)
# same domain as source; should drop
reply['inReplyTo'] = 'http://localhost/this',
reply['inReplyTo'] = reply_to
mock_head.return_value = requests_response(url='http://this/')
got = self.client.post('/foo.com/inbox', json=reply)
self.assertEqual(200, got.status_code, got.get_data(as_text=True))
self.assertEqual(status, got.status_code, got.get_data(as_text=True))
self.assert_req(mock_head, 'http://this', allow_redirects=True)
mock_get.assert_not_called()
mock_post.assert_not_called()
self.assertEqual(0, Activity.query().count())

Wyświetl plik

@ -802,6 +802,18 @@ class WebmentionTest(testutil.TestCase):
self.assertEqual('error', activity.status)
self.assertEqual(self.follow_mf2, json_loads(activity.source_mf2))
def test_activitypub_repost_blocklisted_error(self, mock_get, mock_post):
"""Reposts of non-fediverse (ie blocklisted) sites aren't yet supported."""
repost_html = REPOST_HTML.replace('http://orig/post', 'https://twitter.com/foo')
repost_resp = requests_response(repost_html, content_type=CONTENT_TYPE_HTML)
mock_get.side_effect = [repost_resp]
got = self.client.post('/webmention', data={
'source': 'http://a/repost',
'target': 'https://fed.brid.gy/',
})
self.assertEqual(400, got.status_code)
def test_salmon_reply(self, mock_get, mock_post):
mock_get.side_effect = [self.reply, self.not_fediverse,
self.orig_html_atom, self.orig_atom]

Wyświetl plik

@ -153,10 +153,13 @@ class Webmention(View):
"""
targets = util.get_urls(self.source_obj, 'inReplyTo')
if targets:
logger.info(f'targets from inReplyTo: {targets}')
return targets
if self.source_obj.get('verb') in as1.VERBS_WITH_OBJECT:
return util.get_urls(self.source_obj, 'object')
targets = util.get_urls(self.source_obj, 'object')
logger.info(f'targets from object: {targets}')
return targets
def _activitypub_targets(self):
"""
@ -178,64 +181,71 @@ class Webmention(View):
inboxes.add(actor.get('endpoints', {}).get('sharedInbox') or
actor.get('publicInbox')or
actor.get('inbox'))
return [(Activity.get_or_create(
source=self.source_url, target=inbox, domain=self.source_domain,
direction='out', protocol='activitypub',
source_mf2=json_dumps(self.source_mf2)),
inbox)
for inbox in sorted(inboxes) if inbox]
inboxes = [(Activity.get_or_create(
source=self.source_url, target=inbox, domain=self.source_domain,
direction='out', protocol='activitypub',
source_mf2=json_dumps(self.source_mf2)),
inbox)
for inbox in sorted(inboxes) if inbox]
logger.info(f"Delivering to followers' inboxes: {[i for _, i in inboxes]}")
return inboxes
targets = common.remove_blocklisted(targets)
if not targets:
error(f"Silo responses are not yet supported.")
activities_and_inbox_urls = []
for target in targets:
# fetch target page as AS2 object
try:
self.target_resp = common.get_as2(target)
except (requests.HTTPError, BadGateway) as e:
self.target_resp = getattr(e, 'requests_response', None)
if self.target_resp and self.target_resp.status_code // 100 == 2:
content_type = common.content_type(self.target_resp) or ''
if content_type.startswith('text/html'):
# TODO: pass e.requests_response to try_salmon's target_resp
continue # give up
raise
target_url = self.target_resp.url or target
# fetch target page as AS2 object
try:
self.target_resp = common.get_as2(target)
except (requests.HTTPError, BadGateway) as e:
self.target_resp = getattr(e, 'requests_response', None)
if self.target_resp and self.target_resp.status_code // 100 == 2:
content_type = common.content_type(self.target_resp) or ''
if content_type.startswith('text/html'):
# TODO: pass e.requests_response to try_salmon's target_resp
continue # give up
raise
target_url = self.target_resp.url or target
activity = Activity.get_or_create(
source=self.source_url, target=target_url, domain=self.source_domain,
direction='out', protocol='activitypub',
source_mf2=json_dumps(self.source_mf2))
activity = Activity.get_or_create(
source=self.source_url, target=target_url, domain=self.source_domain,
direction='out', protocol='activitypub',
source_mf2=json_dumps(self.source_mf2))
# find target's inbox
target_obj = self.target_resp.json()
activity.target_as2 = json_dumps(target_obj)
inbox_url = target_obj.get('inbox')
# find target's inbox
target_obj = self.target_resp.json()
activity.target_as2 = json_dumps(target_obj)
inbox_url = target_obj.get('inbox')
if not inbox_url:
# TODO: test actor/attributedTo and not, with/without inbox
actor = (util.get_first(target_obj, 'actor') or
util.get_first(target_obj, 'attributedTo'))
if isinstance(actor, dict):
inbox_url = actor.get('inbox')
actor = actor.get('url') or actor.get('id')
if not inbox_url and not actor:
error('Target object has no actor or attributedTo with URL or id.')
elif not isinstance(actor, str):
error(f'Target actor or attributedTo has unexpected url or id object: {actor}')
if not inbox_url:
# TODO: test actor/attributedTo and not, with/without inbox
actor = (util.get_first(target_obj, 'actor') or
util.get_first(target_obj, 'attributedTo'))
if isinstance(actor, dict):
inbox_url = actor.get('inbox')
actor = actor.get('url') or actor.get('id')
if not inbox_url and not actor:
error('Target object has no actor or attributedTo with URL or id.')
elif not isinstance(actor, str):
error(f'Target actor or attributedTo has unexpected url or id object: {actor}')
if not inbox_url:
# fetch actor as AS object
actor = common.get_as2(actor).json()
inbox_url = actor.get('inbox')
if not inbox_url:
# fetch actor as AS object
actor = common.get_as2(actor).json()
inbox_url = actor.get('inbox')
if not inbox_url:
# TODO: probably need a way to save errors like this so that we can
# return them if ostatus fails too.
# error('Target actor has no inbox')
continue
if not inbox_url:
# TODO: probably need a way to save errors like this so that we can
# return them if ostatus fails too.
# error('Target actor has no inbox')
continue
inbox_url = urllib.parse.urljoin(target_url, inbox_url)
activities_and_inbox_urls.append((activity, inbox_url))
inbox_url = urllib.parse.urljoin(target_url, inbox_url)
activities_and_inbox_urls.append((activity, inbox_url))
logger.info(f"Delivering to targets' inboxes: {[i for _, i in activities_and_inbox_urls]}")
return activities_and_inbox_urls
def try_salmon(self):