From 0d33b6422d3bdfa99ceed0b8f678b6afb9ecb27d Mon Sep 17 00:00:00 2001 From: Ryan Barrett Date: Fri, 22 Sep 2023 12:14:50 -0700 Subject: [PATCH] add Protocol.owns_handle (and in subclasses) --- activitypub.py | 12 ++++++++++++ atproto.py | 6 ++++++ ids.py | 5 +---- protocol.py | 27 +++++++++++++++++++++++++-- tests/test_activitypub.py | 11 +++++++++++ tests/test_atproto.py | 10 ++++++++++ tests/test_web.py | 10 ++++++++++ tests/testutil.py | 2 ++ web.py | 11 ++++++++--- 9 files changed, 85 insertions(+), 9 deletions(-) diff --git a/activitypub.py b/activitypub.py index e46a465..04e95cb 100644 --- a/activitypub.py +++ b/activitypub.py @@ -122,6 +122,18 @@ class ActivityPub(User, Protocol): return False + @classmethod + def owns_handle(cls, handle): + """Returns True if handle is a WebFinger @-@, False otherwise. + + Example: ``@user@instance.com``. The leading ``@`` is optional. + + https://datatracker.ietf.org/doc/html/rfc7033#section-3.1 + https://datatracker.ietf.org/doc/html/rfc7033#section-4.5 + """ + parts = handle.lstrip('@').split('@') + return len(parts) == 2 and parts[0] and parts[1] + @classmethod def target_for(cls, obj, shared=False): """Returns `obj`'s or its author's/actor's inbox, if available.""" diff --git a/atproto.py b/atproto.py index c04f049..4d7805b 100644 --- a/atproto.py +++ b/atproto.py @@ -23,6 +23,7 @@ import common from common import ( add, DOMAIN_BLOCKLIST, + DOMAIN_RE, error, USER_AGENT, ) @@ -93,6 +94,11 @@ class ATProto(User, Protocol): or id.startswith('did:web:') or id.startswith('https://bsky.app/')) + @classmethod + def owns_handle(cls, handle): + if not re.match(DOMAIN_RE, handle): + return False + @classmethod def target_for(cls, obj, shared=False): """Returns the PDS URL for the given object, or None. diff --git a/ids.py b/ids.py index 0e49736..5c1e3a1 100644 --- a/ids.py +++ b/ids.py @@ -64,10 +64,7 @@ def convert_handle(*, handle, from_proto, to_proto): """ assert handle and from_proto and to_proto assert from_proto != to_proto - - if from_proto in (Web, ATProto): - # Web, ATProto, Nostr handles are all domains - assert re.match(DOMAIN_RE, handle) + assert from_proto.owns_handle(handle) is not False match (from_proto.LABEL, to_proto.LABEL): case (_, 'activitypub'): diff --git a/protocol.py b/protocol.py index dc0df59..ba209fa 100644 --- a/protocol.py +++ b/protocol.py @@ -138,10 +138,33 @@ class Protocol: Returns False if the id's domain is in :attr:`common.DOMAIN_BLOCKLIST`. Args: - id: str + id (str) Returns: - boolean or None + bool or None + """ + return False + + @classmethod + def owns_handle(cls, handle): + """Returns whether this protocol owns the handle, or None if it's unclear. + + To be implemented by subclasses. + + Some protocols' handles are more or less deterministic based on the id + format, eg ActivityPub (technically WebFinger) handles are + ``@user@instance.com``. Others, like domains, could be owned by eg Web, + ActivityPub, AT Protocol, or others. + + This should be a quick guess without expensive side effects, eg no + external HTTP fetches to fetch the id itself or otherwise perform + discovery. + + Args: + handle (str) + + Returns: + bool or None """ return False diff --git a/tests/test_activitypub.py b/tests/test_activitypub.py index ff9ccba..75f910d 100644 --- a/tests/test_activitypub.py +++ b/tests/test_activitypub.py @@ -1497,6 +1497,17 @@ class ActivityPubUtilsTest(TestCase): self.assertFalse(ActivityPub.owns_id('https://twitter.com/foo')) self.assertFalse(ActivityPub.owns_id('https://fed.brid.gy/foo')) + def test_owns_handle(self): + for handle in ('@user@instance', 'user@instance.com', 'user.com@instance.com', + 'user@instance'): + with self.subTest(handle=handle): + assert ActivityPub.owns_handle(handle) + + for handle in ('instance', 'instance.com', '@user', '@user.com', + 'http://user.com'): + with self.subTest(handle=handle): + self.assertFalse(ActivityPub.owns_handle(handle)) + def test_postprocess_as2_multiple_in_reply_tos(self): self.assert_equals({ 'id': 'http://localhost/r/xyz', diff --git a/tests/test_atproto.py b/tests/test_atproto.py index 97ac55b..dbdaa73 100644 --- a/tests/test_atproto.py +++ b/tests/test_atproto.py @@ -90,6 +90,16 @@ class ATProtoTest(TestCase): self.assertTrue(ATProto.owns_id( 'https://bsky.app/profile/snarfed.org/post/3k62u4ht77f2z')) + def test_owns_handle(self): + self.assertIsNone(ATProto.owns_handle('foo.com')) + self.assertIsNone(ATProto.owns_handle('foo.bar.com')) + + self.assertFalse(ATProto.owns_handle('foo')) + self.assertFalse(ATProto.owns_handle('@foo')) + self.assertFalse(ATProto.owns_handle('@foo.com')) + self.assertFalse(ATProto.owns_handle('@foo@bar.com')) + self.assertFalse(ATProto.owns_handle('foo@bar.com')) + def test_target_for_did_doc(self): self.assertIsNone(ATProto.target_for(Object(id='did:plc:foo'))) diff --git a/tests/test_web.py b/tests/test_web.py index 31c8b58..2000e24 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -1900,6 +1900,16 @@ class WebUtilTest(TestCase): self.assertFalse(Web.owns_id('https://twitter.com/foo')) self.assertFalse(Web.owns_id('https://fed.brid.gy/foo')) + def test_owns_handle(self, *_): + self.assertIsNone(Web.owns_handle('foo.com')) + self.assertIsNone(Web.owns_handle('foo.bar.com')) + + self.assertFalse(Web.owns_handle('foo')) + self.assertFalse(Web.owns_handle('@foo')) + self.assertFalse(Web.owns_handle('@foo.com')) + self.assertFalse(Web.owns_handle('@foo@bar.com')) + self.assertFalse(Web.owns_handle('foo@bar.com')) + def test_fetch(self, mock_get, __): mock_get.return_value = REPOST diff --git a/tests/testutil.py b/tests/testutil.py index da0b37a..72a916c 100644 --- a/tests/testutil.py +++ b/tests/testutil.py @@ -89,6 +89,8 @@ class Fake(User, protocol.Protocol): return id.startswith('fake:') or id in cls.fetchable + owns_handle = owns_id + @classmethod def is_blocklisted(cls, url): return url.startswith('fake:blocklisted') diff --git a/web.py b/web.py index 697d961..b619af1 100644 --- a/web.py +++ b/web.py @@ -21,7 +21,7 @@ from requests import HTTPError, RequestException from werkzeug.exceptions import BadGateway, BadRequest, HTTPException, NotFound import common -from common import add +from common import add, DOMAIN_RE from flask_app import app, cache from models import Follower, Object, PROTOCOLS, Target, User from protocol import Protocol @@ -67,7 +67,7 @@ class Web(User, Protocol): """Validate domain id, don't allow upper case or invalid characters.""" super()._pre_put_hook() id = self.key.id() - assert re.match(common.DOMAIN_RE, id) + assert re.match(DOMAIN_RE, id) assert id.lower() == id, f'upper case is not allowed in Web key id: {id}' assert not self.is_blocklisted(id), f'{id} is a blocked domain' @@ -234,7 +234,7 @@ class Web(User, Protocol): if parsed.path in ('', '/'): id = parsed.netloc - if re.match(common.DOMAIN_RE, id): + if re.match(DOMAIN_RE, id): tld = id.split('.')[-1] if tld in NON_TLDS: logger.info(f"{id} looks like a domain but {tld} isn't a TLD") @@ -260,6 +260,11 @@ class Web(User, Protocol): return None if util.is_web(id) else False + @classmethod + def owns_handle(cls, handle): + if not re.match(DOMAIN_RE, handle): + return False + @classmethod def target_for(cls, obj, shared=False): """Returns `obj`'s id, as a URL webmention target."""