"""ATProto protocol implementation. https://atproto.com/ """ import itertools import logging import os import re from arroba import did from arroba.datastore_storage import AtpRepo, DatastoreStorage from arroba.repo import Repo, Write import arroba.server from arroba.storage import Action, CommitData from arroba.util import at_uri, next_tid, parse_at_uri, service_jwt import dag_json from flask import abort, request from google.cloud import dns from google.cloud import ndb from granary import as1, bluesky from lexrpc import Client import requests from oauth_dropins.webutil.appengine_info import DEBUG from oauth_dropins.webutil import util from oauth_dropins.webutil.util import json_dumps, json_loads import common from common import ( add, DOMAIN_BLOCKLIST, DOMAIN_RE, error, USER_AGENT, ) import flask_app from models import Object, PROTOCOLS, Target, User from protocol import Protocol logger = logging.getLogger(__name__) arroba.server.storage = DatastoreStorage() LEXICONS = Client('https://unused').defs DNS_GCP_PROJECT = 'brid-gy' DNS_ZONE = 'brid-gy' DNS_TTL = 10800 # seconds logger.info(f'Using GCP DNS project {DNS_GCP_PROJECT} zone {DNS_ZONE}') dns_client = dns.Client(project=DNS_GCP_PROJECT) class ATProto(User, Protocol): """AT Protocol class. Key id is DID, currently either did:plc or did:web. https://atproto.com/specs/did """ ABBREV = 'atproto' LOGO_HTML = '' def _pre_put_hook(self): """Validate id, require did:plc or non-blocklisted did:web. Also check that the ``atproto_did`` property isn't set. """ super()._pre_put_hook() id = self.key.id() assert id if id.startswith('did:plc:'): assert id.removeprefix('did:plc:') elif id.startswith('did:web:'): domain = id.removeprefix('did:web:') assert (re.match(common.DOMAIN_RE, domain) and not Protocol.is_blocklisted(domain)), domain else: assert False, f'{id} is not valid did:plc or did:web' assert not self.atproto_did, \ f"{self.key} shouldn't have atproto_did {self.atproto_did}" @ndb.ComputedProperty def handle(self): """Returns handle if the DID document includes one, otherwise None.""" did_obj = ATProto.load(self.key.id()) if did_obj: handle, _, _ = parse_at_uri( util.get_first(did_obj.raw, 'alsoKnownAs', '')) if handle: return handle def web_url(self): return bluesky.Bluesky.user_url(self.handle_or_id()) @classmethod def owns_id(cls, id): return (id.startswith('at://') or id.startswith('did:plc:') or id.startswith('did:web:') or id.startswith('https://bsky.app/')) @classmethod def owns_handle(cls, handle): if not re.match(DOMAIN_RE, handle): return False @classmethod def handle_to_id(cls, handle): assert cls.owns_handle(handle) is not False user = ATProto.query(ATProto.handle == handle).get() if user: return user.key.id() return did.resolve_handle(handle, get_fn=util.requests_get) def profile_id(self): return f'at://{self.key.id()}/app.bsky.actor.profile/self' @classmethod def target_for(cls, obj, shared=False): """Returns the PDS URL for the given object, or None. If the repo DID/handle doesn't exist in the PLC directory, defaults to returning Bridgy Fed's URL as the PDS. Args: obj (Object) Returns: str: """ id = obj.key.id() if id.startswith('did:'): return None logger.info(f'Finding ATProto PDS for {id}') if id.startswith('https://bsky.app/'): return cls.target_for(Object(id=bluesky.web_url_to_at_uri(id))) if id.startswith('at://'): repo, collection, rkey = parse_at_uri(id) if not repo.startswith('did:'): # repo is a handle; resolve it repo_did = did.resolve_handle(repo, get_fn=util.requests_get) if repo_did: return cls.target_for(Object(id=id.replace( f'at://{repo}', f'at://{repo_did}'))) else: return None did_obj = ATProto.load(repo) if did_obj: return cls._pds_for(did_obj) # TODO: what should we do if the DID doesn't exist? should we return # None here? or do we need this path to return BF's URL so that we # then create the DID for non-ATP users on demand? if obj.as1: owner = as1.get_owner(obj.as1) if owner: user_key = Protocol.key_for(owner) if user_key: user = user_key.get() if user and user.atproto_did: return cls.target_for(Object(id=f'at://{user.atproto_did}')) return common.host_url() @classmethod def _pds_for(cls, did_obj): """ Args: did_obj (Object) Returns: str: PDS URL, or None """ assert did_obj.key.id().startswith('did:') for service in did_obj.raw.get('service', []): if service.get('id') in ('#atproto_pds', f'{did_obj.key.id()}#atproto_pds'): return service.get('serviceEndpoint') logger.info(f"{did_obj.key.id()}'s DID doc has no ATProto PDS") return None def is_blocklisted(url): # don't block common.DOMAINS since we want ourselves, ie our own PDS, to # be a valid domain to send to return util.domain_or_parent_in(util.domain_from_link(url), DOMAIN_BLOCKLIST) @classmethod @ndb.transactional() def create_for(cls, user): """Creates an ATProto user, repo, and profile for a non-ATProto user. Args: user (models.User) """ assert not isinstance(user, ATProto) if user.atproto_did: return # create new DID, repo logger.info(f'Creating new did:plc for {user.key}') did_plc = did.create_plc(user.handle_as('atproto'), pds_url=common.host_url(), post_fn=util.requests_post) Object.get_or_create(did_plc.did, raw=did_plc.doc) user.atproto_did = did_plc.did add(user.copies, Target(uri=did_plc.did, protocol='atproto')) handle = user.handle_as('atproto') # create _atproto DNS record for handle resolution # https://atproto.com/specs/handle#handle-resolution name = f'_atproto.{handle}.' val = f'"did={did_plc.did}"' logger.info(f'adding GCP DNS TXT record for {name} {val}') if not DEBUG: zone = dns_client.zone(DNS_ZONE) r = zone.resource_record_set(name=name, record_type='TXT', ttl=DNS_TTL, rrdatas=[val]) changes = zone.changes() changes.add_record_set(r) changes.create() logger.info(' done!') # fetch and store profile if not user.obj: user.obj = user.load(user.profile_id()) initial_writes = None if user.obj and user.obj.as1: # create user profile profile = user.obj.as_bsky(fetch_blobs=True) profile_json = json_dumps(dag_json.encode(profile).decode(), indent=2) logger.info(f'Storing ATProto app.bsky.actor.profile self: {profile_json}') initial_writes = [Write( action=Action.CREATE, collection='app.bsky.actor.profile', rkey='self', record=profile)] uri = at_uri(user.atproto_did, 'app.bsky.actor.profile', 'self') user.obj.add('copies', Target(uri=uri, protocol='atproto')) user.obj.put() repo = Repo.create( arroba.server.storage, user.atproto_did, handle=handle, callback=lambda _: common.create_task(queue='atproto-commit'), initial_writes=initial_writes, signing_key=did_plc.signing_key, rotation_key=did_plc.rotation_key) user.put() @classmethod def send(to_cls, obj, url, orig_obj=None): """Creates a record if we own its repo. Creates the repo first if it doesn't exist. If the repo's DID doc doesn't say we're its PDS, does nothing and returns False. Doesn't deliver anywhere externally! BGS(es) will receive this record through ``subscribeRepos`` and then deliver it to AppView(s), which will notify recipients as necessary. """ if url.rstrip('/') != common.host_url().rstrip('/'): logger.info(f'Target PDS {url} is not us') return False type = as1.object_type(obj.as1) if type in ('accept', 'undo'): logger.info(f'Skipping sending to {url}') return False elif type == 'post': type = as1.object_type(as1.get_object(obj.as1)) assert type in ('note', 'article') from_cls = PROTOCOLS[obj.source_protocol] from_key = from_cls.actor_key(obj) if not from_key: logger.info(f"Couldn't find {obj.source_protocol} user for {obj.key}") return False # load user user = from_cls.get_or_create(from_key.id(), propagate=True) assert user.atproto_did logger.info(f'{user.key} is {user.atproto_did}') did_doc = to_cls.load(user.atproto_did) pds = to_cls._pds_for(did_doc) if not pds or pds.rstrip('/') != url.rstrip('/'): logger.warning(f'{from_key} {user.atproto_did} PDS {pds} is not us') return False # load repo repo = arroba.server.storage.load_repo(user.atproto_did) assert repo repo.callback = lambda _: common.create_task(queue='atproto-commit') # create record and commit record = obj.as_bsky(fetch_blobs=True) type = record['$type'] lex_type = LEXICONS[type]['type'] assert lex_type == 'record', f"Can't store {type} object of type {lex_type}" ndb.transactional() def write(): tid = next_tid() record_json = json_dumps(dag_json.encode(record).decode(), indent=2) logger.info(f'Storing ATProto app.bsky.feed.post {tid}: {record_json}') repo.apply_writes( [Write(action=Action.CREATE, collection='app.bsky.feed.post', rkey=tid, record=record)]) at_uri = f'at://{user.atproto_did}/app.bsky.feed.post/{tid}' obj.add('copies', Target(uri=at_uri, protocol=to_cls.ABBREV)) obj.put() write() return True @classmethod def fetch(cls, obj, **kwargs): """Tries to fetch a ATProto object. Args: obj (models.Object): with the id to fetch. Fills data into the ``as2`` property. kwargs: ignored Returns: bool: True if the object was fetched and populated successfully, False otherwise Raises: TODO """ id = obj.key.id() if not cls.owns_id(id): logger.info(f"ATProto can't fetch {id}") return False # did:plc, did:web if id.startswith('did:'): try: obj.raw = did.resolve(id, get_fn=util.requests_get) return True except (ValueError, requests.RequestException) as e: util.interpret_http_exception(e) return False pds = cls.target_for(obj) if not pds: return False # at:// URI # examples: # at://did:plc:s2koow7r6t7tozgd4slc3dsg/app.bsky.feed.post/3jqcpv7bv2c2q # https://bsky.social/xrpc/com.atproto.repo.getRecord?repo=did:plc:s2koow7r6t7tozgd4slc3dsg&collection=app.bsky.feed.post&rkey=3jqcpv7bv2c2q repo, collection, rkey = parse_at_uri(obj.key.id()) client = Client(pds, headers={'User-Agent': USER_AGENT}) ret = client.com.atproto.repo.getRecord( repo=repo, collection=collection, rkey=rkey) # TODO: verify sig? obj.bsky = ret['value'] return True @classmethod def serve(cls, obj): """Serves a :class:`models.Object` as AS2. This is minimally implemented to serve ``app.bsky.*`` lexicon data, but BGSes and other clients will generally receive ATProto commits via ``com.atproto.sync.subscribeRepos`` subscriptions, not BF-specific ``/convert/...`` HTTP requests, so this should never be used in practice. """ return bluesky.from_as1(obj.as1), {'Content-Type': 'application/json'} # URL route is registered in hub.py def poll_notifications(): """Fetches and enqueueus new activities from the AppView for our users. Uses the ``listNotifications`` endpoint, which is intended for end users. 🤷 https://github.com/bluesky-social/atproto/discussions/1538 """ repos = {r.key.id(): r for r in AtpRepo.query()} logger.info(f'Got {len(repos)} repos') repo_dids = [] users = itertools.chain(*(cls.query(cls.atproto_did.IN(list(repos))) for cls in set(PROTOCOLS.values()) if cls and cls != ATProto)) # TODO: convert to Session for connection pipelining! client = Client(f'https://{os.environ["APPVIEW_HOST"]}', headers={'User-Agent': USER_AGENT}) for user in users: # TODO: store and use cursor # seenAt would be easier, but they don't support it yet # https://github.com/bluesky-social/atproto/issues/1636 repo = repos[user.atproto_did] client.access_token = service_jwt(os.environ['APPVIEW_HOST'], repo_did=user.atproto_did, privkey=repo.signing_key) resp = client.app.bsky.notification.listNotifications() for notif in resp['notifications']: logger.info(f'Got {notif["reason"]} from {notif["author"]["handle"]} {notif["uri"]} {notif["cid"]}') # TODO: verify sig. skipping this for now because we're getting # these from the AppView, which is trusted, specifically we expect # the BGS and/or the AppView already checked sigs. obj = Object.get_or_create(id=notif['uri'], bsky=notif['record'], source_protocol=ATProto.ABBREV) if not obj.status: obj.status = 'new' obj.add('notify', user.key) obj.put() common.create_task(queue='receive', obj=obj.key.urlsafe(), # TODO: should this be the receiving user? # or the sending user? user=user.key.urlsafe()) return 'OK'