adjust Web.owns_id to say it owns protocol bot user domains but not their pages

eg bsky.brid.gy True, https://bsky.brid.gy/ True, https://bsky.brid.gy/foo False.

also move our internal synthetic UI-initiated follow ids from https://fed.brid.gy/web/... to under the user's own domain. hopefully this won't break anything 🤞
pull/968/head
Ryan Barrett 2024-04-22 11:58:01 -07:00
rodzic ed78090d2c
commit b9551c4de7
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 6BE31FDF4776E9D4
6 zmienionych plików z 47 dodań i 36 usunięć

Wyświetl plik

@ -122,7 +122,7 @@ class FollowCallback(indieauth.Callback):
followee_id = followee.as1.get('id') followee_id = followee.as1.get('id')
timestamp = util.now().replace(microsecond=0, tzinfo=None).isoformat() timestamp = util.now().replace(microsecond=0, tzinfo=None).isoformat()
follow_id = common.host_url(user.user_page_path(f'following#{timestamp}-{addr}')) follow_id = f'{user.web_url()}#follow-{timestamp}-{addr}'
follow_as1 = { follow_as1 = {
'objectType': 'activity', 'objectType': 'activity',
'verb': 'follow', 'verb': 'follow',
@ -204,7 +204,7 @@ class UnfollowCallback(indieauth.Callback):
# TODO(#529): generalize # TODO(#529): generalize
timestamp = util.now().replace(microsecond=0, tzinfo=None).isoformat() timestamp = util.now().replace(microsecond=0, tzinfo=None).isoformat()
unfollow_id = common.host_url(user.user_page_path(f'following#undo-{timestamp}-{followee_id}')) unfollow_id = f'{user.web_url()}#unfollow-{timestamp}-{followee_id}'
unfollow_as1 = { unfollow_as1 = {
'objectType': 'activity', 'objectType': 'activity',
'verb': 'stop-following', 'verb': 'stop-following',

Wyświetl plik

@ -547,18 +547,22 @@ class Protocol:
raise NotImplementedError() raise NotImplementedError()
@classmethod @classmethod
def is_blocklisted(cls, url): def is_blocklisted(cls, url, allow_internal=False):
"""Returns True if we block the given URL and shouldn't deliver to it. """Returns True if we block the given URL and shouldn't deliver to it.
Default implementation here, subclasses may override. Default implementation here, subclasses may override.
Args: Args:
url (str): url (str):
allow_internal (bool): whether to return False for internal domains
like ``fed.brid.gy``, ``bsky.brid.gy``, etc
Returns: bool: Returns: bool:
""" """
return util.domain_or_parent_in(util.domain_from_link(url), blocklist = DOMAIN_BLOCKLIST
DOMAIN_BLOCKLIST + DOMAINS) if not allow_internal:
blocklist += DOMAINS
return util.domain_or_parent_in(util.domain_from_link(url), blocklist)
@classmethod @classmethod
def translate_ids(to_cls, obj): def translate_ids(to_cls, obj):
@ -674,7 +678,7 @@ class Protocol:
if not id: if not id:
error('No id provided') error('No id provided')
elif from_cls.is_blocklisted(id) and not internal: elif from_cls.is_blocklisted(id, allow_internal=internal):
error(f'Activity {id} is blocklisted') error(f'Activity {id} is blocklisted')
# short circuit if we've already seen this activity id. # short circuit if we've already seen this activity id.

Wyświetl plik

@ -1825,6 +1825,7 @@ class ActivityPubUtilsTest(TestCase):
self.assertFalse(ActivityPub.owns_id('https://twitter.com/foo')) self.assertFalse(ActivityPub.owns_id('https://twitter.com/foo'))
self.assertFalse(ActivityPub.owns_id('https://fed.brid.gy/foo')) self.assertFalse(ActivityPub.owns_id('https://fed.brid.gy/foo'))
self.assertFalse(ActivityPub.owns_id('https://ap.brid.gy/foo'))
def test_owns_handle(self): def test_owns_handle(self):
for handle in ('@user@instance', 'user@instance.com', 'user.com@instance.com', for handle in ('@user@instance', 'user@instance.com', 'user.com@instance.com',

Wyświetl plik

@ -43,17 +43,17 @@ FOLLOWEE = {
FOLLOW_ADDRESS = { FOLLOW_ADDRESS = {
'@context': 'https://www.w3.org/ns/activitystreams', '@context': 'https://www.w3.org/ns/activitystreams',
'type': 'Follow', 'type': 'Follow',
'id': f'http://localhost/r/alice.com/following#2022-01-02T03:04:05-@foo@ba.r', 'id': f'http://localhost/r/https://alice.com/#follow-2022-01-02T03:04:05-@foo@ba.r',
'actor': 'http://localhost/alice.com', 'actor': 'http://localhost/alice.com',
'object': FOLLOWEE['id'], 'object': FOLLOWEE['id'],
'to': [as2.PUBLIC_AUDIENCE], 'to': [as2.PUBLIC_AUDIENCE],
} }
FOLLOW_URL = copy.deepcopy(FOLLOW_ADDRESS) FOLLOW_URL = copy.deepcopy(FOLLOW_ADDRESS)
FOLLOW_URL['id'] = f'http://localhost/r/alice.com/following#2022-01-02T03:04:05-https://ba.r/actor' FOLLOW_URL['id'] = f'http://localhost/r/https://alice.com/#follow-2022-01-02T03:04:05-https://ba.r/actor'
UNDO_FOLLOW = { UNDO_FOLLOW = {
'@context': 'https://www.w3.org/ns/activitystreams', '@context': 'https://www.w3.org/ns/activitystreams',
'type': 'Undo', 'type': 'Undo',
'id': f'http://localhost/r/alice.com/following#undo-2022-01-02T03:04:05-https://ba.r/id', 'id': f'http://localhost/r/https://alice.com/#unfollow-2022-01-02T03:04:05-https://ba.r/id',
'actor': 'http://localhost/alice.com', 'actor': 'http://localhost/alice.com',
'object': copy.deepcopy(FOLLOW_ADDRESS), 'object': copy.deepcopy(FOLLOW_ADDRESS),
} }
@ -213,7 +213,7 @@ class FollowTest(TestCase):
follow_with_profile_link = { follow_with_profile_link = {
**FOLLOW_URL, **FOLLOW_URL,
'id': f'http://localhost/r/alice.com/following#2022-01-02T03:04:05-https://ba.r/id', 'id': f'http://localhost/r/https://alice.com/#follow-2022-01-02T03:04:05-https://ba.r/id',
'object': 'https://ba.r/id', 'object': 'https://ba.r/id',
} }
self.check('https://ba.r/id', resp, follow_with_profile_link, mock_get, self.check('https://ba.r/id', resp, follow_with_profile_link, mock_get,
@ -355,7 +355,7 @@ class FollowTest(TestCase):
sig_template.startswith('keyId="http://localhost/alice.com#key"'), sig_template.startswith('keyId="http://localhost/alice.com#key"'),
sig_template) sig_template)
follow_id = f'https://fed.brid.gy/web/alice.com/following#2022-01-02T03:04:05-{input}' follow_id = f'https://alice.com/#follow-2022-01-02T03:04:05-{input}'
followers = Follower.query().fetch() followers = Follower.query().fetch()
followee = ActivityPub(id='https://ba.r/id').key followee = ActivityPub(id='https://ba.r/id').key
@ -410,7 +410,7 @@ class FollowTest(TestCase):
self.assertEqual(302, resp.status_code) self.assertEqual(302, resp.status_code)
self.assertEqual('/web/www.alice.com/following', resp.headers['Location']) self.assertEqual('/web/www.alice.com/following', resp.headers['Location'])
id = 'www.alice.com/following#2022-01-02T03:04:05-https://ba.r/actor' id = 'https://www.alice.com/#follow-2022-01-02T03:04:05-https://ba.r/actor'
expected_follow_as1 = as2.to_as1({ expected_follow_as1 = as2.to_as1({
**FOLLOW_URL, **FOLLOW_URL,
'id': id, 'id': id,
@ -418,16 +418,14 @@ class FollowTest(TestCase):
}) })
del expected_follow_as1['to'] del expected_follow_as1['to']
followee = ActivityPub(id='https://ba.r/id').key followee = ActivityPub(id='https://ba.r/id').key
follow_obj = self.assert_object( follow_obj = self.assert_object(id, users=[user.key],
f'https://fed.brid.gy/web/{id}', notify=[followee],
users=[user.key], status='complete',
notify=[followee], labels=['user', 'activity'],
status='complete', source_protocol='ui',
labels=['user', 'activity'], our_as1=expected_follow_as1,
source_protocol='ui', delivered=['http://ba.r/inbox'],
our_as1=expected_follow_as1, delivered_protocol='activitypub')
delivered=['http://ba.r/inbox'],
delivered_protocol='activitypub')
followers = Follower.query().fetch() followers = Follower.query().fetch()
self.assert_entities_equal( self.assert_entities_equal(
@ -607,7 +605,7 @@ class UnfollowTest(TestCase):
self.assertEqual('inactive', follower.status) self.assertEqual('inactive', follower.status)
self.assert_object( self.assert_object(
'https://fed.brid.gy/web/alice.com/following#undo-2022-01-02T03:04:05-https://ba.r/id', 'https://alice.com/#unfollow-2022-01-02T03:04:05-https://ba.r/id',
users=[self.user.key], users=[self.user.key],
notify=[ActivityPub(id='https://ba.r/id').key], notify=[ActivityPub(id='https://ba.r/id').key],
status='complete', status='complete',
@ -646,7 +644,7 @@ class UnfollowTest(TestCase):
self.assertEqual(302, resp.status_code) self.assertEqual(302, resp.status_code)
self.assertEqual('/web/www.alice.com/following', resp.headers['Location']) self.assertEqual('/web/www.alice.com/following', resp.headers['Location'])
id = 'http://localhost/r/www.alice.com/following#undo-2022-01-02T03:04:05-https://ba.r/id' id = 'http://localhost/r/https://www.alice.com/#unfollow-2022-01-02T03:04:05-https://ba.r/id'
expected_undo = { expected_undo = {
'@context': 'https://www.w3.org/ns/activitystreams', '@context': 'https://www.w3.org/ns/activitystreams',
'type': 'Undo', 'type': 'Undo',
@ -670,7 +668,7 @@ class UnfollowTest(TestCase):
self.assertEqual('inactive', follower.status) self.assertEqual('inactive', follower.status)
self.assert_object( self.assert_object(
'https://fed.brid.gy/web/www.alice.com/following#undo-2022-01-02T03:04:05-https://ba.r/id', 'https://www.alice.com/#unfollow-2022-01-02T03:04:05-https://ba.r/id',
users=[user.key], users=[user.key],
notify=[ActivityPub(id='https://ba.r/id').key], notify=[ActivityPub(id='https://ba.r/id').key],
status='complete', status='complete',

Wyświetl plik

@ -2541,8 +2541,6 @@ class WebUtilTest(TestCase):
self.assertIs(False, Web.owns_id('http://localhost:8080/foo')) self.assertIs(False, Web.owns_id('http://localhost:8080/foo'))
self.assertIs(False, Web.owns_id('https://twitter.com/')) self.assertIs(False, Web.owns_id('https://twitter.com/'))
self.assertIs(False, Web.owns_id('https://ap.brid.gy/foo')) self.assertIs(False, Web.owns_id('https://ap.brid.gy/foo'))
# TODO: this still needs to be false
# special-case PROTOCOL_DOMAINS homepages in Web.owns_id, otherwise False
def test_owns_handle(self, *_): def test_owns_handle(self, *_):
self.assertIsNone(Web.owns_handle('foo.com')) self.assertIsNone(Web.owns_handle('foo.com'))

28
web.py
Wyświetl plik

@ -67,18 +67,21 @@ MAX_FEED_POLL_PERIOD = timedelta(weeks=1)
MAX_FEED_PROPERTY_SIZE = 500 * 1000 # Object.atom/rss MAX_FEED_PROPERTY_SIZE = 500 * 1000 # Object.atom/rss
def is_valid_domain(domain): def is_valid_domain(domain, allow_internal=True):
"""Returns True if this is a valid domain we can use, False otherwise. """Returns True if this is a valid domain we can use, False otherwise.
Args:
domain (str):
allow_internal (bool): whether to return True for internal domains
like ``fed.brid.gy``, ``bsky.brid.gy``, etc
Valid means TLD is ok, not blacklisted, etc. Valid means TLD is ok, not blacklisted, etc.
""" """
if not domain or not re.match(DOMAIN_RE, domain): if not domain or not re.match(DOMAIN_RE, domain):
# logger.debug(f"{domain} doesn't look like a domain") # logger.debug(f"{domain} doesn't look like a domain")
return False return False
if (Web.is_blocklisted(domain) if Web.is_blocklisted(domain, allow_internal=allow_internal):
and domain != PRIMARY_DOMAIN
and domain not in PROTOCOL_DOMAINS):
logger.debug(f'{domain} is blocklisted') logger.debug(f'{domain} is blocklisted')
return False return False
@ -135,7 +138,7 @@ class Web(User, Protocol):
"""Validate domain id, don't allow upper case or invalid characters.""" """Validate domain id, don't allow upper case or invalid characters."""
super()._pre_put_hook() super()._pre_put_hook()
id = self.key.id() id = self.key.id()
assert is_valid_domain(id), id assert is_valid_domain(id, allow_internal=False), id
assert id.lower() == id, f'upper case is not allowed in Web key id: {id}' assert id.lower() == id, f'upper case is not allowed in Web key id: {id}'
@classmethod @classmethod
@ -324,7 +327,7 @@ class Web(User, Protocol):
if parsed.path in ('', '/'): if parsed.path in ('', '/'):
id = parsed.netloc id = parsed.netloc
if is_valid_domain(id): if is_valid_domain(id, allow_internal=True):
return super().key_for(id) return super().key_for(id)
# logger.info(f'{id} is not a domain or usable home page URL') # logger.info(f'{id} is not a domain or usable home page URL')
@ -344,14 +347,21 @@ class Web(User, Protocol):
return True if user and user.has_redirects else None return True if user and user.has_redirects else None
elif is_valid_domain(id): elif is_valid_domain(id):
return None return None
elif util.is_web(id) and is_valid_domain(util.domain_from_link(id)):
# we allowed internal domains for protocol bot actors above, but we
# don't want to allow non-homepage URLs on those domains, eg
# https://bsky.brid.gy/foo, so don't allow internal here
domain = util.domain_from_link(id)
if util.is_web(id) and is_valid_domain(domain, allow_internal=False):
return None return None
return False return False
@classmethod @classmethod
def owns_handle(cls, handle): def owns_handle(cls, handle):
if not is_valid_domain(handle): if handle in PROTOCOL_DOMAINS:
return True
elif not is_valid_domain(handle, allow_internal=False):
return False return False
@classmethod @classmethod
@ -586,7 +596,7 @@ def check_web_site():
# this normalizes and lower cases domain # this normalizes and lower cases domain
domain = util.domain_from_link(url, minimize=False) domain = util.domain_from_link(url, minimize=False)
if not domain or not is_valid_domain(domain): if not domain or not is_valid_domain(domain, allow_internal=False):
flash(f'{url} is not a valid or supported web site') flash(f'{url} is not a valid or supported web site')
return render_template('enter_web_site.html'), 400 return render_template('enter_web_site.html'), 400