2023-03-08 21:10:41 +00:00
""" Base protocol class and common code. """
2023-10-26 19:04:04 +00:00
import copy
2024-02-08 19:22:32 +00:00
from datetime import timedelta
2023-03-08 21:10:41 +00:00
import logging
2024-07-16 21:12:03 +00:00
import os
2024-02-10 00:48:19 +00:00
import re
2023-11-13 22:18:32 +00:00
from threading import Lock
2024-04-23 16:39:30 +00:00
from urllib . parse import urljoin , urlparse
2023-03-08 21:10:41 +00:00
2023-11-13 22:18:32 +00:00
from cachetools import cached , LRUCache
2024-05-08 00:01:01 +00:00
from flask import request
2023-03-08 21:10:41 +00:00
from google . cloud import ndb
from google . cloud . ndb import OR
2024-03-11 18:43:58 +00:00
from google . cloud . ndb . model import _entity_to_protobuf
2024-04-24 23:45:43 +00:00
from granary import as1 , as2
2024-05-14 22:58:53 +00:00
from granary . source import html_to_text
2024-04-17 23:43:10 +00:00
from oauth_dropins . webutil . appengine_info import DEBUG
2023-10-16 20:04:34 +00:00
from oauth_dropins . webutil . flask_util import cloud_tasks_only
2024-03-11 18:43:58 +00:00
from oauth_dropins . webutil import models
2024-04-17 23:43:10 +00:00
from oauth_dropins . webutil import util
2023-10-26 23:00:03 +00:00
from oauth_dropins . webutil . util import json_dumps , json_loads
2024-09-19 17:44:34 +00:00
from requests import RequestException
2023-06-13 20:17:11 +00:00
import werkzeug . exceptions
2024-07-11 04:04:04 +00:00
from werkzeug . exceptions import BadGateway , HTTPException
2023-03-08 21:10:41 +00:00
import common
2024-04-18 22:56:40 +00:00
from common import (
add ,
DOMAIN_BLOCKLIST ,
DOMAIN_RE ,
DOMAINS ,
2024-05-14 22:58:53 +00:00
PRIMARY_DOMAIN ,
2024-04-18 22:56:40 +00:00
PROTOCOL_DOMAINS ,
2024-06-03 21:11:23 +00:00
report_error ,
2024-04-18 22:56:40 +00:00
subdomain_wrap ,
)
2024-08-18 05:53:14 +00:00
import dms
2024-07-27 23:40:56 +00:00
import ids
2024-06-02 15:21:18 +00:00
from ids import (
BOT_ACTOR_AP_IDS ,
normalize_user_id ,
translate_object_id ,
translate_user_id ,
)
2024-06-13 18:26:55 +00:00
from models import (
2024-08-16 00:24:25 +00:00
DM ,
2024-06-13 18:26:55 +00:00
Follower ,
Object ,
PROTOCOLS ,
PROTOCOLS_BY_KIND ,
Target ,
User ,
)
2023-03-08 21:10:41 +00:00
2024-02-08 19:22:32 +00:00
OBJECT_REFRESH_AGE = timedelta ( days = 30 )
2024-05-12 00:39:04 +00:00
# require a follow for users on these domains before we deliver anything from
# them other than their profile
2024-07-16 21:12:03 +00:00
LIMITED_DOMAINS = ( os . getenv ( ' LIMITED_DOMAINS ' , ' ' ) . split ( )
or util . load_file_lines ( ' limited_domains ' ) )
2024-05-12 00:39:04 +00:00
2023-03-08 21:10:41 +00:00
logger = logging . getLogger ( __name__ )
2024-07-11 04:04:04 +00:00
def error ( * args , status = 299 , * * kwargs ) :
""" Default HTTP status code to 299 to prevent retrying task. """
return common . error ( * args , status = status , * * kwargs )
class ErrorButDoNotRetryTask ( HTTPException ) :
code = 299
description = ' ErrorButDoNotRetryTask '
# https://github.com/pallets/flask/issues/1837#issuecomment-304996942
werkzeug . exceptions . default_exceptions . setdefault ( 299 , ErrorButDoNotRetryTask )
werkzeug . exceptions . _aborter . mapping . setdefault ( 299 , ErrorButDoNotRetryTask )
2024-07-12 15:21:30 +00:00
def activity_id_memcache_key ( id ) :
return common . memcache_key ( f ' receive- { id } ' )
2023-05-26 23:07:36 +00:00
class Protocol :
2023-03-08 21:10:41 +00:00
""" Base protocol class. Not to be instantiated; classmethods only.
Attributes :
2023-10-06 06:32:31 +00:00
LABEL ( str ) : human - readable lower case name
2023-10-06 15:22:50 +00:00
OTHER_LABELS ( list of str ) : label aliases
2023-10-06 06:32:31 +00:00
ABBREV ( str ) : lower case abbreviation , used in URL paths
2024-01-25 03:20:54 +00:00
PHRASE ( str ) : human - readable name or phrase . Used in phrases like
` ` Follow this person on { PHRASE } ` `
2023-10-10 18:14:42 +00:00
LOGO_HTML ( str ) : logo emoji or ` ` < img > ` ` tag
2023-10-24 23:09:28 +00:00
CONTENT_TYPE ( str ) : MIME type of this protocol ' s native data format,
appropriate for the ` ` Content - Type ` ` HTTP header .
2024-04-23 23:52:53 +00:00
HAS_COPIES ( bool ) : whether this protocol is push and needs us to
proactively create " copy " users and objects , as opposed to pulling
converted objects on demand
2024-05-11 23:03:07 +00:00
REQUIRES_AVATAR ( bool ) : whether accounts on this protocol are required
to have a profile picture . If they don ' t, their ``User.status`` will be
` ` blocked ` ` .
REQUIRES_NAME ( bool ) : whether accounts on this protocol are required to
have a profile name that ' s different than their handle or id. If they
don ' t, their ``User.status`` will be ``blocked``.
2024-05-12 02:20:33 +00:00
REQUIRES_OLD_ACCOUNT : ( bool ) : whether accounts on this protocol are
required to be at least : const : ` common . OLD_ACCOUNT_AGE ` old . If their
profile includes creation date and it ' s not old enough, their
` ` User . status ` ` will be ` ` blocked ` ` .
2024-07-04 23:58:06 +00:00
DEFAULT_ENABLED_PROTOCOLS ( sequence of str ) : labels of other protocols
that are automatically enabled for this protocol to bridge into
SUPPORTED_AS1_TYPES ( sequence of str ) : AS1 objectTypes and verbs that this
2024-07-05 04:11:38 +00:00
protocol supports receiving and sending .
2024-08-09 03:41:57 +00:00
SUPPORTS_DMS ( bool ) : whether this protocol can receive DMs ( chat messages )
2023-03-08 21:10:41 +00:00
"""
2023-06-11 15:14:17 +00:00
ABBREV = None
2024-01-25 03:20:54 +00:00
PHRASE = None
2023-06-11 15:14:17 +00:00
OTHER_LABELS = ( )
2023-10-10 18:14:42 +00:00
LOGO_HTML = ' '
2023-10-24 23:09:28 +00:00
CONTENT_TYPE = None
2024-04-23 23:52:53 +00:00
HAS_COPIES = False
2024-05-11 23:03:07 +00:00
REQUIRES_AVATAR = False
REQUIRES_NAME = False
2024-05-12 02:20:33 +00:00
REQUIRES_OLD_ACCOUNT = False
2024-04-17 23:43:10 +00:00
DEFAULT_ENABLED_PROTOCOLS = ( )
2024-07-04 23:58:06 +00:00
SUPPORTED_AS1_TYPES = ( )
2024-08-09 03:41:57 +00:00
SUPPORTS_DMS = False
2023-03-08 21:10:41 +00:00
def __init__ ( self ) :
assert False
2023-06-11 15:14:17 +00:00
@classmethod
@property
def LABEL ( cls ) :
return cls . __name__ . lower ( )
2023-06-11 02:50:31 +00:00
@staticmethod
2023-06-13 03:51:32 +00:00
def for_request ( fed = None ) :
2023-06-11 02:50:31 +00:00
""" Returns the protocol for the current request.
. . . based on the request ' s hostname.
2023-06-13 03:51:32 +00:00
Args :
2023-10-06 06:32:31 +00:00
fed ( str or protocol . Protocol ) : protocol to return if the current
request is on ` ` fed . brid . gy ` `
2023-06-13 03:51:32 +00:00
2023-06-11 02:50:31 +00:00
Returns :
2023-10-06 15:22:50 +00:00
Protocol : protocol , or None if the provided domain or request hostname
domain is not a subdomain of ` ` brid . gy ` ` or isn ' t a known protocol
2023-06-11 02:50:31 +00:00
"""
2023-09-22 18:55:19 +00:00
return Protocol . for_bridgy_subdomain ( request . host , fed = fed )
2023-06-11 02:50:31 +00:00
@staticmethod
2023-09-22 18:55:19 +00:00
def for_bridgy_subdomain ( domain_or_url , fed = None ) :
2023-06-11 02:50:31 +00:00
""" Returns the protocol for a brid.gy subdomain.
2023-06-13 05:01:12 +00:00
Args :
2023-10-06 06:32:31 +00:00
domain_or_url ( str )
fed ( str or protocol . Protocol ) : protocol to return if the current
request is on ` ` fed . brid . gy ` `
2023-06-13 05:01:12 +00:00
2023-10-06 15:22:50 +00:00
Returns :
class : : class : ` Protocol ` subclass , or None if the provided domain or request
hostname domain is not a subdomain of ` ` brid . gy ` ` or isn ' t a known
protocol
2023-06-11 02:50:31 +00:00
"""
2023-06-13 05:01:12 +00:00
domain = ( util . domain_from_link ( domain_or_url , minimize = False )
if util . is_web ( domain_or_url )
else domain_or_url )
2023-06-11 02:50:31 +00:00
2023-06-13 05:01:12 +00:00
if domain == common . PRIMARY_DOMAIN or domain in common . LOCAL_DOMAINS :
2023-09-27 20:55:16 +00:00
return PROTOCOLS [ fed ] if isinstance ( fed , str ) else fed
2023-06-13 05:01:12 +00:00
elif domain and domain . endswith ( common . SUPERDOMAIN ) :
label = domain . removesuffix ( common . SUPERDOMAIN )
return PROTOCOLS . get ( label )
2023-06-11 02:50:31 +00:00
2023-06-13 20:17:11 +00:00
@classmethod
def owns_id ( cls , id ) :
""" Returns whether this protocol owns the id, or None if it ' s unclear.
To be implemented by subclasses .
2023-09-22 20:11:15 +00:00
IDs are string identities that uniquely identify users , and are intended
primarily to be machine readable and usable . Compare to handles , which
are human - chosen , human - meaningful , and often but not always unique .
2023-06-13 20:17:11 +00:00
Some protocols ' ids are more or less deterministic based on the id
2023-10-06 15:22:50 +00:00
format , eg AT Protocol owns ` ` at : / / ` ` URIs . Others , like http ( s ) URLs ,
could be owned by eg Web or ActivityPub .
2023-06-13 20:17:11 +00:00
This should be a quick guess without expensive side effects , eg no
external HTTP fetches to fetch the id itself or otherwise perform
discovery .
2023-10-06 15:22:50 +00:00
Returns False if the id ' s domain is in :const:`common.DOMAIN_BLOCKLIST`.
2023-07-02 21:55:05 +00:00
2023-06-13 20:17:11 +00:00
Args :
2023-09-22 19:14:50 +00:00
id ( str )
2023-06-13 20:17:11 +00:00
Returns :
2023-10-06 15:22:50 +00:00
bool or None :
2023-09-22 19:14:50 +00:00
"""
return False
@classmethod
2024-05-03 22:18:16 +00:00
def owns_handle ( cls , handle , allow_internal = False ) :
2023-09-22 19:14:50 +00:00
""" Returns whether this protocol owns the handle, or None if it ' s unclear.
To be implemented by subclasses .
2023-09-22 20:11:15 +00:00
Handles are string identities that are human - chosen , human - meaningful ,
and often but not always unique . Compare to IDs , which uniquely identify
users , and are intended primarily to be machine readable and usable .
2023-09-22 19:14:50 +00:00
Some protocols ' handles are more or less deterministic based on the id
format , eg ActivityPub ( technically WebFinger ) handles are
` ` @user @instance.com ` ` . Others , like domains , could be owned by eg Web ,
ActivityPub , AT Protocol , or others .
This should be a quick guess without expensive side effects , eg no
external HTTP fetches to fetch the id itself or otherwise perform
discovery .
Args :
handle ( str )
2024-05-03 22:18:16 +00:00
allow_internal ( bool ) : whether to return False for internal domains
like ` ` fed . brid . gy ` ` , ` ` bsky . brid . gy ` ` , etc
2023-09-22 19:14:50 +00:00
Returns :
bool or None
2023-06-13 20:17:11 +00:00
"""
return False
2023-09-22 20:11:15 +00:00
@classmethod
def handle_to_id ( cls , handle ) :
""" Converts a handle to an id.
To be implemented by subclasses .
2024-07-27 15:55:34 +00:00
May incur network requests , eg DNS queries or HTTP requests . Avoids
blocked or opted out users .
2023-09-22 20:11:15 +00:00
Args :
handle ( str )
Returns :
str : corresponding id , or None if the handle can ' t be found
"""
raise NotImplementedError ( )
2023-06-13 21:30:00 +00:00
@classmethod
2024-07-24 02:26:30 +00:00
def key_for ( cls , id , allow_opt_out = False ) :
2023-10-06 15:22:50 +00:00
""" Returns the :class:`google.cloud.ndb.Key` for a given id ' s :class:`models.User`.
2023-06-13 21:30:00 +00:00
2023-07-23 06:32:55 +00:00
To be implemented by subclasses . Canonicalizes the id if necessary .
2023-06-14 20:46:13 +00:00
If called via ` Protocol . key_for ` , infers the appropriate protocol with
: meth : ` for_id ` . If called with a concrete subclass , uses that subclass
as is .
2023-07-23 06:32:55 +00:00
2024-07-24 02:26:30 +00:00
Args :
id ( str ) :
allow_opt_out ( bool ) : whether to allow users who are currently opted out
2023-07-23 06:32:55 +00:00
Returns :
2023-10-06 15:22:50 +00:00
google . cloud . ndb . Key : matching key , or None if the given id is not a
valid : class : ` User ` id for this protocol .
2023-06-13 21:30:00 +00:00
"""
2023-06-14 20:46:13 +00:00
if cls == Protocol :
2023-10-13 19:36:31 +00:00
proto = Protocol . for_id ( id )
2024-07-24 02:26:30 +00:00
return proto . key_for ( id , allow_opt_out = allow_opt_out ) if proto else None
2023-06-14 20:46:13 +00:00
2023-10-10 16:57:10 +00:00
# load user so that we follow use_instead
2023-11-27 19:18:12 +00:00
existing = cls . get_by_id ( id , allow_opt_out = True )
2023-10-13 19:36:31 +00:00
if existing :
2024-07-24 02:26:30 +00:00
if existing . status and not allow_opt_out :
2023-10-13 19:36:31 +00:00
return None
return existing . key
return cls ( id = id ) . key
2023-06-13 21:30:00 +00:00
2023-11-13 22:18:32 +00:00
@cached ( LRUCache ( 20000 ) , lock = Lock ( ) )
2023-06-13 20:17:11 +00:00
@staticmethod
2024-04-09 17:48:05 +00:00
def for_id ( id , remote = True ) :
2023-06-13 20:17:11 +00:00
""" Returns the protocol for a given id.
Args :
2023-09-27 21:58:33 +00:00
id ( str )
2024-04-09 17:48:05 +00:00
remote ( bool ) : whether to perform expensive side effects like fetching
the id itself over the network , or other discovery .
2023-06-13 20:17:11 +00:00
Returns :
2024-04-09 17:48:05 +00:00
Protocol subclass : matching protocol , or None if no single known
protocol definitively owns this id
"""
2024-07-18 20:51:22 +00:00
logger . debug ( f ' Determining protocol for id { id } ' )
2023-06-13 20:17:11 +00:00
if not id :
return None
2023-06-13 20:43:41 +00:00
if util . is_web ( id ) :
2024-04-23 16:39:30 +00:00
# step 1: check for our per-protocol subdomains
2024-08-15 20:29:45 +00:00
try :
is_homepage = urlparse ( id ) . path . strip ( ' / ' ) == ' '
except ValueError as e :
logger . info ( f ' urlparse ValueError: { e } ' )
return None
2023-09-22 18:55:19 +00:00
by_subdomain = Protocol . for_bridgy_subdomain ( id )
2024-06-02 15:21:18 +00:00
if by_subdomain and not is_homepage and id not in BOT_ACTOR_AP_IDS :
2024-07-18 20:51:22 +00:00
logger . debug ( f ' { by_subdomain . LABEL } owns id { id } ' )
2023-09-22 18:55:19 +00:00
return by_subdomain
2023-06-13 20:43:41 +00:00
2023-06-18 14:29:54 +00:00
# step 2: check if any Protocols say conclusively that they own it
2023-06-14 20:46:13 +00:00
# sort to be deterministic
protocols = sorted ( set ( p for p in PROTOCOLS . values ( ) if p ) ,
2023-09-28 20:15:47 +00:00
key = lambda p : p . LABEL )
2023-06-14 20:46:13 +00:00
candidates = [ ]
for protocol in protocols :
2023-06-13 20:17:11 +00:00
owns = protocol . owns_id ( id )
if owns :
2024-07-18 20:51:22 +00:00
logger . debug ( f ' { protocol . LABEL } owns id { id } ' )
2023-06-13 20:17:11 +00:00
return protocol
elif owns is not False :
candidates . append ( protocol )
if len ( candidates ) == 1 :
2024-07-18 20:51:22 +00:00
logger . debug ( f ' { candidates [ 0 ] . LABEL } owns id { id } ' )
2023-06-13 20:17:11 +00:00
return candidates [ 0 ]
2023-06-18 14:29:54 +00:00
# step 3: look for existing Objects in the datastore
obj = Protocol . load ( id , remote = False )
if obj and obj . source_protocol :
2024-08-02 15:02:36 +00:00
logger . debug ( f ' { obj . key . id ( ) } owned by source_protocol { obj . source_protocol } ' )
2023-06-18 14:29:54 +00:00
return PROTOCOLS [ obj . source_protocol ]
2024-04-09 17:48:05 +00:00
# step 4: fetch over the network, if necessary
if not remote :
return None
2023-06-13 20:17:11 +00:00
for protocol in candidates :
2024-07-18 20:51:22 +00:00
logger . debug ( f ' Trying { protocol . LABEL } ' )
2023-06-13 20:17:11 +00:00
try :
2023-07-14 19:45:47 +00:00
if protocol . load ( id , local = False , remote = True ) :
2024-07-18 20:51:22 +00:00
logger . debug ( f ' { protocol . LABEL } owns id { id } ' )
2023-07-14 19:45:47 +00:00
return protocol
2024-07-11 04:04:04 +00:00
except BadGateway :
2023-07-14 19:45:47 +00:00
# we tried and failed fetching the id over the network.
# this depends on ActivityPub.fetch raising this!
return None
2024-07-11 04:04:04 +00:00
except HTTPException as e :
2023-06-13 20:17:11 +00:00
# internal error we generated ourselves; try next protocol
pass
except Exception as e :
code , _ = util . interpret_http_exception ( e )
if code :
# we tried and failed fetching the id over the network
return None
2023-06-14 04:36:56 +00:00
raise
2023-06-13 20:17:11 +00:00
logger . info ( f ' No matching protocol found for { id } ! ' )
return None
2024-08-12 20:31:13 +00:00
@cached ( LRUCache ( 20000 ) , lock = Lock ( ) )
2023-09-27 21:58:33 +00:00
@staticmethod
def for_handle ( handle ) :
""" Returns the protocol for a given handle.
May incur expensive side effects like resolving the handle itself over
the network or other discovery .
Args :
handle ( str )
Returns :
( Protocol subclass , str ) tuple : matching protocol and optional id ( if
resolved ) , or ` ` ( None , None ) ` ` if no known protocol owns this handle
"""
2023-09-28 20:15:47 +00:00
# TODO: normalize, eg convert domains to lower case
2024-07-18 20:51:22 +00:00
logger . debug ( f ' Determining protocol for handle { handle } ' )
2023-09-27 21:58:33 +00:00
if not handle :
return ( None , None )
# step 1: check if any Protocols say conclusively that they own it.
# sort to be deterministic.
protocols = sorted ( set ( p for p in PROTOCOLS . values ( ) if p ) ,
2023-09-28 20:15:47 +00:00
key = lambda p : p . LABEL )
2023-09-27 21:58:33 +00:00
candidates = [ ]
for proto in protocols :
owns = proto . owns_handle ( handle )
if owns :
2024-07-18 20:51:22 +00:00
logger . debug ( f ' { proto . LABEL } owns handle { handle } ' )
2023-09-27 21:58:33 +00:00
return ( proto , None )
elif owns is not False :
candidates . append ( proto )
if len ( candidates ) == 1 :
2024-07-18 20:51:22 +00:00
logger . debug ( f ' { candidates [ 0 ] . LABEL } owns handle { handle } ' )
2023-09-27 21:58:33 +00:00
return ( candidates [ 0 ] , None )
# step 2: look for matching User in the datastore
for proto in candidates :
2023-10-13 19:36:31 +00:00
user = proto . query ( proto . handle == handle ) . get ( )
2023-09-27 21:58:33 +00:00
if user :
2024-05-13 01:45:51 +00:00
if user . status :
2023-10-13 19:36:31 +00:00
return ( None , None )
2024-08-02 15:02:36 +00:00
logger . debug ( f ' user { user . key } handle { handle } ' )
2023-10-13 19:36:31 +00:00
return ( proto , user . key . id ( ) )
2023-09-27 21:58:33 +00:00
# step 3: resolve handle to id
for proto in candidates :
id = proto . handle_to_id ( handle )
if id :
2024-07-18 20:51:22 +00:00
logger . debug ( f ' { proto . LABEL } resolved handle { handle } to id { id } ' )
2023-09-27 21:58:33 +00:00
return ( proto , id )
2024-07-18 20:51:22 +00:00
logger . info ( f ' No matching protocol found for handle { handle } ! ' )
2023-09-27 21:58:33 +00:00
return ( None , None )
2024-04-12 14:15:33 +00:00
@classmethod
2024-09-10 23:44:18 +00:00
def bridged_web_url_for ( cls , user , fallback = False ) :
2024-04-12 14:15:33 +00:00
""" Returns the web URL for a user ' s bridged profile in this protocol.
For example , for Web user ` ` alice . com ` ` , : meth : ` ATProto . bridged_web_url_for `
returns ` ` https : / / bsky . app / profile / alice . com . web . brid . gy ` `
Args :
2024-04-12 15:46:59 +00:00
user ( models . User )
2024-09-10 23:44:18 +00:00
fallback ( bool ) : if True , and bridged users have no canonical user
profile URL in this protocol , return the native protocol ' s profile URL
2024-04-12 14:15:33 +00:00
Returns :
str , or None if there isn ' t a canonical URL
"""
2024-09-10 23:44:18 +00:00
if fallback :
return user . web_url ( )
2024-04-12 14:15:33 +00:00
2023-07-03 15:19:30 +00:00
@classmethod
2024-09-17 01:27:04 +00:00
def actor_key ( cls , obj , allow_opt_out = False ) :
2023-07-03 15:19:30 +00:00
""" Returns the :class:`User`: key for a given object ' s author or actor.
Args :
2023-10-06 06:32:31 +00:00
obj ( models . Object )
2024-09-17 01:27:04 +00:00
allow_opt_out ( bool ) : whether to return a user key if they ' re opted out
2023-07-03 15:19:30 +00:00
Returns :
2023-10-06 15:22:50 +00:00
google . cloud . ndb . key . Key or None :
2023-07-03 15:19:30 +00:00
"""
owner = as1 . get_owner ( obj . as1 )
if owner :
2024-09-17 01:27:04 +00:00
return cls . key_for ( owner , allow_opt_out = allow_opt_out )
2023-07-03 15:19:30 +00:00
2024-05-06 00:13:42 +00:00
@classmethod
def bot_user_id ( cls ) :
""" Returns the Web user id for the bot user for this protocol.
For example , ` ` ' bsky.brid.gy ' ` ` for ATProto .
Returns :
str :
"""
2024-05-07 17:54:18 +00:00
return f ' { cls . ABBREV } { common . SUPERDOMAIN } '
2024-05-06 00:13:42 +00:00
2024-04-21 18:27:23 +00:00
@classmethod
def create_for ( cls , user ) :
""" Creates a copy user in this protocol.
2024-04-21 19:18:12 +00:00
Should add the copy user to : attr : ` copies ` .
2024-04-21 18:27:23 +00:00
Args :
user ( models . User ) : original source user . Shouldn ' t already have a
2024-04-21 19:18:12 +00:00
copy user for this protocol in : attr : ` copies ` .
2024-05-03 22:18:16 +00:00
Raises :
ValueError : if we can ' t create a copy of the given user in this protocol
2024-04-21 18:27:23 +00:00
"""
raise NotImplementedError ( )
2023-03-08 21:10:41 +00:00
@classmethod
2023-11-26 04:07:14 +00:00
def send ( to_cls , obj , url , from_user = None , orig_obj = None ) :
2023-03-08 21:10:41 +00:00
""" Sends an outgoing activity.
To be implemented by subclasses .
2024-06-25 20:37:14 +00:00
NOTE : if this protocol ' s ``HAS_COPIES`` is True, and this method creates
a copy and sends it , it * must * add that copy to the * object * ' s (not
activity ' s) :attr:`copies`!
2023-03-08 21:10:41 +00:00
Args :
2023-10-06 06:32:31 +00:00
obj ( models . Object ) : with activity to send
url ( str ) : destination URL to send to
2023-11-26 04:07:14 +00:00
from_user ( models . User ) : user ( actor ) this activity is from
2023-10-07 19:48:20 +00:00
orig_obj ( models . Object ) : the " original object " that this object
refers to , eg replies to or reposts or likes
2023-03-20 18:23:49 +00:00
Returns :
2023-10-07 19:48:20 +00:00
bool : True if the activity is sent successfully , False if it is
ignored or otherwise unsent due to protocol logic , eg no webmention
endpoint , protocol doesn ' t support the activity type. (Failures are
raised as exceptions . )
2023-03-08 21:10:41 +00:00
Raises :
2023-10-07 19:48:20 +00:00
werkzeug . HTTPException if the request fails
2023-03-08 21:10:41 +00:00
"""
raise NotImplementedError ( )
@classmethod
2023-06-12 22:50:47 +00:00
def fetch ( cls , obj , * * kwargs ) :
2023-06-18 14:29:54 +00:00
""" Fetches a protocol-specific object and populates it in an :class:`Object`.
2023-03-08 21:10:41 +00:00
2023-07-14 19:45:47 +00:00
Errors are raised as exceptions . If this method returns False , the fetch
didn ' t fail but didn ' t succeed either , eg the id isn ' t valid for this
protocol , or the fetch didn ' t return valid data for this protocol.
2023-06-18 14:29:54 +00:00
To be implemented by subclasses .
2023-03-08 21:10:41 +00:00
Args :
2023-10-06 06:32:31 +00:00
obj ( models . Object ) : with the id to fetch . Data is filled into one of
2023-10-06 15:22:50 +00:00
the protocol - specific properties , eg ` ` as2 ` ` , ` ` mf2 ` ` , ` ` bsky ` ` .
kwargs : subclass - specific
2023-07-14 19:45:47 +00:00
Returns :
2023-10-06 15:22:50 +00:00
bool : True if the object was fetched and populated successfully ,
2023-07-14 19:45:47 +00:00
False otherwise
2023-10-06 15:22:50 +00:00
Raises :
2024-08-12 23:52:10 +00:00
requests . RequestException or werkzeug . HTTPException : if the fetch fails
2023-03-08 21:10:41 +00:00
"""
raise NotImplementedError ( )
2023-05-24 04:30:57 +00:00
@classmethod
2024-05-14 22:58:53 +00:00
def convert ( cls , obj , from_user = None , * * kwargs ) :
2023-10-24 23:09:28 +00:00
""" Converts an :class:`Object` to this protocol ' s data format.
2023-05-24 04:30:57 +00:00
2023-10-24 23:09:28 +00:00
For example , an HTML string for : class : ` Web ` , or a dict with AS2 JSON
and ` ` application / activity + json ` ` for : class : ` ActivityPub ` .
2023-05-24 04:30:57 +00:00
2024-05-14 22:58:53 +00:00
Just passes through to : meth : ` _convert ` , then does minor
protocol - independent postprocessing .
Args :
obj ( models . Object ) :
from_user ( models . User ) : user ( actor ) this activity / object is from
kwargs : protocol - specific , passed through to : meth : ` _convert `
Returns :
converted object in the protocol ' s native format, often a dict
"""
2024-06-06 18:37:31 +00:00
if not obj or not obj . as1 :
2024-06-06 14:50:16 +00:00
return { }
2024-06-06 18:37:31 +00:00
id = obj . key . id ( ) if obj . key else obj . as1 . get ( ' id ' )
2024-07-01 04:28:51 +00:00
is_activity = obj . as1 . get ( ' verb ' ) in ( ' post ' , ' update ' )
2024-06-06 18:37:31 +00:00
base_obj = as1 . get_object ( obj . as1 ) if is_activity else obj . as1
2024-06-06 14:50:16 +00:00
orig_our_as1 = obj . our_as1
2024-05-14 22:58:53 +00:00
# mark bridged actors as bots and add "bridged by Bridgy Fed" to their bios
2024-06-06 18:37:31 +00:00
if ( from_user and base_obj
and base_obj . get ( ' objectType ' ) in as1 . ACTOR_TYPES
2024-05-14 22:58:53 +00:00
and PROTOCOLS . get ( obj . source_protocol ) != cls
and Protocol . for_bridgy_subdomain ( id ) not in DOMAINS
# Web users are special cased, they don't get the label if they've
# explicitly enabled Bridgy Fed with redirects or webmentions
and not ( from_user . LABEL == ' web '
and ( from_user . last_webmention_in or from_user . has_redirects ) ) ) :
2024-06-05 23:44:29 +00:00
2024-05-14 22:58:53 +00:00
obj . our_as1 = copy . deepcopy ( obj . as1 )
2024-06-06 18:37:31 +00:00
actor = as1 . get_object ( obj . as1 ) if is_activity else obj . as1
actor [ ' objectType ' ] = ' application '
cls . add_source_links ( actor = actor , obj = obj , from_user = from_user )
2024-05-14 22:58:53 +00:00
2024-06-06 14:50:16 +00:00
converted = cls . _convert ( obj , from_user = from_user , * * kwargs )
obj . our_as1 = orig_our_as1
return converted
2024-05-14 22:58:53 +00:00
@classmethod
def _convert ( cls , obj , from_user = None , * * kwargs ) :
""" Converts an :class:`Object` to this protocol ' s data format.
2023-11-03 22:52:37 +00:00
To be implemented by subclasses . Implementations should generally call
: meth : ` Protocol . translate_ids ` ( as their own class ) before converting to
their format .
2023-05-24 04:30:57 +00:00
Args :
2023-10-06 06:32:31 +00:00
obj ( models . Object ) :
2023-11-26 04:07:14 +00:00
from_user ( models . User ) : user ( actor ) this activity / object is from
2024-05-14 22:58:53 +00:00
kwargs : protocol - specific
2023-05-24 04:30:57 +00:00
Returns :
2024-05-16 20:11:29 +00:00
converted object in the protocol ' s native format, often a dict. May
return the ` ` { } ` ` empty dict if the object can ' t be converted.
2023-05-24 04:30:57 +00:00
"""
raise NotImplementedError ( )
2024-06-05 23:44:29 +00:00
@classmethod
2024-06-06 18:37:31 +00:00
def add_source_links ( cls , actor , obj , from_user ) :
""" Adds " bridged from ... by Bridgy Fed " HTML to ``actor[ ' summary ' ]``.
2024-06-05 23:44:29 +00:00
Default implementation ; subclasses may override .
Args :
2024-06-06 18:37:31 +00:00
actor ( dict ) : AS1 actor
2024-06-05 23:44:29 +00:00
obj ( models . Object ) :
from_user ( models . User ) : user ( actor ) this activity / object is from
"""
assert from_user
2024-06-06 18:37:31 +00:00
summary = actor . setdefault ( ' summary ' , ' ' )
2024-06-07 01:14:35 +00:00
if ' Bridgy Fed] ' in html_to_text ( summary , ignore_links = True ) :
2024-06-05 23:44:29 +00:00
return
2024-06-06 18:37:31 +00:00
id = actor . get ( ' id ' )
2024-06-06 05:21:50 +00:00
proto_phrase = ( PROTOCOLS [ obj . source_protocol ] . PHRASE
if obj . source_protocol else ' ' )
if proto_phrase :
proto_phrase = f ' on { proto_phrase } '
2024-06-06 18:37:31 +00:00
if from_user . key and id in ( from_user . key . id ( ) , from_user . profile_id ( ) ) :
2024-06-06 05:21:50 +00:00
source_links = f ' [<a href= " https:// { PRIMARY_DOMAIN } { from_user . user_page_path ( ) } " >bridged</a> from <a href= " { from_user . web_url ( ) } " > { from_user . handle } </a> { proto_phrase } by <a href= " https:// { PRIMARY_DOMAIN } / " >Bridgy Fed</a>] '
2024-06-05 23:44:29 +00:00
else :
2024-06-06 18:37:31 +00:00
url = as1 . get_url ( actor ) or id
2024-06-06 14:50:16 +00:00
source = util . pretty_link ( url ) if url else ' ? '
2024-06-06 05:21:50 +00:00
source_links = f ' [bridged from { source } { proto_phrase } by <a href= " https:// { PRIMARY_DOMAIN } / " >Bridgy Fed</a>] '
2024-06-05 23:44:29 +00:00
if summary :
summary + = ' <br><br> '
2024-06-06 18:37:31 +00:00
actor [ ' summary ' ] = summary + source_links
2024-06-05 23:44:29 +00:00
2023-06-16 20:16:17 +00:00
@classmethod
def target_for ( cls , obj , shared = False ) :
2023-06-21 00:06:32 +00:00
""" Returns an :class:`Object` ' s delivery target (endpoint).
2023-06-16 20:16:17 +00:00
To be implemented by subclasses .
Examples :
2023-10-06 15:22:50 +00:00
* If obj has ` ` source_protocol ` ` ` ` web ` ` , returns its URL , as a
2023-06-16 20:16:17 +00:00
webmention target .
2023-10-06 15:22:50 +00:00
* If obj is an ` ` activitypub ` ` actor , returns its inbox .
* If obj is an ` ` activitypub ` ` object , returns it ' s author ' s or actor ' s
2023-06-21 00:06:32 +00:00
inbox .
2023-06-16 20:16:17 +00:00
Args :
2023-10-06 06:32:31 +00:00
obj ( models . Object ) :
2023-10-06 15:22:50 +00:00
shared ( bool ) : optional . If True , returns a common / shared
endpoint , eg ActivityPub ' s ``sharedInbox``, that can be reused for
2023-06-16 20:16:17 +00:00
multiple recipients for efficiency
Returns :
2023-10-06 15:22:50 +00:00
str : target endpoint , or None if not available .
2023-06-16 20:16:17 +00:00
"""
raise NotImplementedError ( )
2023-09-06 23:15:19 +00:00
@classmethod
2024-04-22 18:58:01 +00:00
def is_blocklisted ( cls , url , allow_internal = False ) :
2023-09-06 23:15:19 +00:00
""" Returns True if we block the given URL and shouldn ' t deliver to it.
Default implementation here , subclasses may override .
Args :
2023-10-06 06:32:31 +00:00
url ( str ) :
2024-04-22 18:58:01 +00:00
allow_internal ( bool ) : whether to return False for internal domains
like ` ` fed . brid . gy ` ` , ` ` bsky . brid . gy ` ` , etc
2023-09-06 23:15:19 +00:00
"""
2024-04-22 18:58:01 +00:00
blocklist = DOMAIN_BLOCKLIST
if not allow_internal :
blocklist + = DOMAINS
return util . domain_or_parent_in ( util . domain_from_link ( url ) , blocklist )
2023-09-06 23:15:19 +00:00
2023-03-08 21:10:41 +00:00
@classmethod
2023-10-26 20:49:42 +00:00
def translate_ids ( to_cls , obj ) :
2024-04-09 20:06:32 +00:00
""" Translates all ids in an AS1 object to a specific protocol.
2023-10-26 19:04:04 +00:00
Infers source protocol for each id value separately .
For example , if ` ` proto ` ` is : class : ` ActivityPub ` , the ATProto URI
` ` at : / / did : plc : abc / coll / 123 ` ` will be converted to
2024-04-16 18:52:50 +00:00
` ` https : / / bsky . brid . gy / ap / at : / / did : plc : abc / coll / 123 ` ` .
2023-10-26 19:04:04 +00:00
Wraps these AS1 fields :
* ` ` id ` `
* ` ` actor ` `
* ` ` author ` `
2024-09-13 21:50:27 +00:00
* ` ` bcc ` `
* ` ` bto ` `
* ` ` cc ` `
2023-10-26 19:04:04 +00:00
* ` ` object ` `
* ` ` object . actor ` `
* ` ` object . author ` `
* ` ` object . id ` `
* ` ` object . inReplyTo ` `
2024-06-17 23:36:35 +00:00
* ` ` attachments [ ] . id ` `
* ` ` tags [ objectType = mention ] . url ` `
2024-09-13 21:50:27 +00:00
* ` ` to ` `
2023-10-26 19:04:04 +00:00
2023-11-01 19:30:30 +00:00
This is the inverse of : meth : ` models . Object . resolve_ids ` . Much of the
same logic is duplicated there !
2023-10-26 19:04:04 +00:00
2024-04-09 20:06:32 +00:00
TODO : unify with : meth : ` Object . resolve_ids ` ,
2024-04-15 01:26:34 +00:00
: meth : ` models . Object . normalize_ids ` .
2024-04-09 20:06:32 +00:00
2023-10-26 19:04:04 +00:00
Args :
to_proto ( Protocol subclass )
2023-11-03 22:52:37 +00:00
obj ( dict ) : AS1 object or activity ( not : class : ` models . Object ` ! )
2023-10-26 19:04:04 +00:00
Returns :
2024-06-22 23:41:23 +00:00
dict : wrapped AS1 version of ` ` obj ` `
2023-10-26 19:04:04 +00:00
"""
2023-11-03 22:52:37 +00:00
assert to_cls != Protocol
2023-11-03 22:10:05 +00:00
if not obj :
return obj
2023-10-26 19:04:04 +00:00
outer_obj = copy . deepcopy ( obj )
2024-08-06 04:22:41 +00:00
inner_objs = outer_obj [ ' object ' ] = as1 . get_objects ( outer_obj )
2023-10-26 19:04:04 +00:00
2023-11-02 20:08:12 +00:00
def translate ( elem , field , fn ) :
2024-08-13 22:51:20 +00:00
elem [ field ] = as1 . get_objects ( elem , field )
for obj in elem [ field ] :
if id := obj . get ( ' id ' ) :
2024-09-13 21:50:27 +00:00
if field in ( ' to ' , ' cc ' , ' bcc ' , ' bto ' ) and as1 . is_audience ( id ) :
continue
2024-08-13 22:51:20 +00:00
from_cls = Protocol . for_id ( id )
# TODO: what if from_cls is None? relax translate_object_id,
# make it a noop if we don't know enough about from/to?
if from_cls and from_cls != to_cls :
obj [ ' id ' ] = fn ( id = id , from_ = from_cls , to = to_cls )
elem [ field ] = [ o [ ' id ' ] if o . keys ( ) == { ' id ' } else o
for o in elem [ field ] ]
if len ( elem [ field ] ) == 1 :
elem [ field ] = elem [ field ] [ 0 ]
2023-10-26 19:04:04 +00:00
2023-11-02 20:08:12 +00:00
type = as1 . object_type ( outer_obj )
translate ( outer_obj , ' id ' ,
translate_user_id if type in as1 . ACTOR_TYPES
else translate_object_id )
2024-08-06 04:22:41 +00:00
for o in inner_objs :
is_actor = ( as1 . object_type ( o ) in as1 . ACTOR_TYPES
or as1 . get_owner ( outer_obj ) == o . get ( ' id ' )
or type in ( ' follow ' , ' stop-following ' ) )
translate ( o , ' id ' , translate_user_id if is_actor else translate_object_id )
2023-10-26 19:04:04 +00:00
2024-08-06 04:22:41 +00:00
for o in [ outer_obj ] + inner_objs :
2023-11-02 20:08:12 +00:00
translate ( o , ' inReplyTo ' , translate_object_id )
2024-09-13 21:50:27 +00:00
for field in ' actor ' , ' author ' , ' to ' , ' cc ' , ' bto ' , ' bcc ' :
2023-11-02 20:08:12 +00:00
translate ( o , field , translate_user_id )
for tag in as1 . get_objects ( o , ' tags ' ) :
if tag . get ( ' objectType ' ) == ' mention ' :
translate ( tag , ' url ' , translate_user_id )
2024-06-17 23:36:35 +00:00
for att in as1 . get_objects ( o , ' attachments ' ) :
translate ( att , ' id ' , translate_object_id )
2024-06-26 04:10:33 +00:00
url = att . get ( ' url ' )
if url and not att . get ( ' id ' ) :
2024-06-26 18:06:57 +00:00
if from_cls := Protocol . for_id ( url ) :
att [ ' id ' ] = translate_object_id ( from_ = from_cls , to = to_cls ,
id = url )
2023-10-26 19:04:04 +00:00
outer_obj = util . trim_nulls ( outer_obj )
2024-08-06 04:22:41 +00:00
if objs := outer_obj . get ( ' object ' , [ ] ) :
outer_obj [ ' object ' ] = [ o [ ' id ' ] if o . keys ( ) == { ' id ' } else o for o in objs ]
if len ( outer_obj [ ' object ' ] ) == 1 :
outer_obj [ ' object ' ] = outer_obj [ ' object ' ] [ 0 ]
2023-10-26 19:04:04 +00:00
return outer_obj
@classmethod
2024-02-27 06:52:52 +00:00
def receive ( from_cls , obj , authed_as = None , internal = False ) :
2023-03-08 21:10:41 +00:00
""" Handles an incoming activity.
2023-10-06 15:22:50 +00:00
If ` ` obj ` ` ' s key is unset, ``obj.as1`` ' s id field is used . If both are
2024-07-11 04:04:04 +00:00
unset , returns HTTP 299.
2023-07-02 05:40:42 +00:00
2023-03-08 21:10:41 +00:00
Args :
2023-10-06 15:22:50 +00:00
obj ( models . Object )
2023-11-26 04:07:14 +00:00
authed_as ( str ) : authenticated actor id who sent this activity
2024-07-24 02:26:30 +00:00
internal ( bool ) : whether to allow activity ids on internal domains ,
from opted out / blocked users , etc .
2023-03-08 21:10:41 +00:00
Returns :
2023-10-06 15:22:50 +00:00
( str , int ) tuple : ( response body , HTTP status code ) Flask response
2023-03-08 21:10:41 +00:00
Raises :
2023-10-06 15:22:50 +00:00
werkzeug . HTTPException : if the request is invalid
2023-03-08 21:10:41 +00:00
"""
2023-07-02 05:40:42 +00:00
# check some invariants
2023-09-09 21:17:47 +00:00
assert from_cls != Protocol
2023-07-02 05:40:42 +00:00
assert isinstance ( obj , Object ) , obj
if not obj . as1 :
error ( ' No object data provided ' )
2023-06-13 03:51:32 +00:00
2023-07-06 19:38:51 +00:00
id = None
if obj . key and obj . key . id ( ) :
id = obj . key . id ( )
2023-03-08 21:10:41 +00:00
if not id :
2023-07-02 05:40:42 +00:00
id = obj . as1 . get ( ' id ' )
obj . key = ndb . Key ( Object , id )
2023-07-06 19:38:51 +00:00
if not id :
error ( ' No id provided ' )
2024-10-02 04:44:12 +00:00
elif from_cls . owns_id ( id ) is False :
error ( f ' Protocol { from_cls . LABEL } does not own id { id } ' )
2024-04-22 18:58:01 +00:00
elif from_cls . is_blocklisted ( id , allow_internal = internal ) :
2024-02-27 06:52:52 +00:00
error ( f ' Activity { id } is blocklisted ' )
2024-08-19 19:02:17 +00:00
# check that this activity is public. only do this for some activities,
# not eg likes or follows, since Mastodon doesn't currently mark those
# as explicitly public.
elif ( obj . type in set ( ( ' post ' , ' update ' ) ) | as1 . POST_TYPES | as1 . ACTOR_TYPES
and not as1 . is_public ( obj . as1 , unlisted = False )
and not as1 . is_dm ( obj . as1 ) ) :
logger . info ( ' Dropping non-public activity ' )
return ( ' OK ' , 200 )
2023-07-06 19:38:51 +00:00
2024-07-12 15:21:30 +00:00
# lease this object, atomically
memcache_key = activity_id_memcache_key ( id )
leased = common . memcache . add ( memcache_key , ' leased ' , noreply = False ,
expire = 5 * 60 ) # 5 min
2023-07-01 15:03:47 +00:00
# short circuit if we've already seen this activity id.
# (don't do this for bare objects since we need to check further down
# whether they've been updated since we saw them last.)
2024-07-12 15:21:30 +00:00
if ( obj . as1 . get ( ' objectType ' ) == ' activity '
and ' force ' not in request . values
and ( not leased
or ( obj . new is False and obj . changed is False )
# TODO: how does this make sense? won't these two lines
# always be true?!
or ( obj . new is None and obj . changed is None
and from_cls . load ( id , remote = False ) ) ) ) :
2024-07-20 04:34:37 +00:00
error ( f ' Already seen this activity { id } ' , status = 204 )
2023-07-02 05:40:42 +00:00
2024-08-02 17:39:25 +00:00
pruned = { k : v for k , v in obj . as1 . items ( )
if k not in ( ' contentMap ' , ' replies ' , ' signature ' ) }
2024-09-17 23:50:13 +00:00
delay = ' '
if request . headers . get ( ' X-AppEngine-TaskRetryCount ' ) == ' 0 ' and obj . created :
2024-09-18 02:18:19 +00:00
delay_s = int ( ( util . now ( ) . replace ( tzinfo = None ) - obj . created ) . total_seconds ( ) )
delay = f ' ( { delay_s } s behind) '
2024-09-17 23:50:13 +00:00
logger . info ( f ' Receiving { from_cls . LABEL } { obj . type } { id } { delay } AS1: { json_dumps ( pruned , indent = 2 ) } ' )
2024-07-25 21:44:15 +00:00
2024-07-05 04:11:38 +00:00
# does this protocol support this activity/object type?
from_cls . check_supported ( obj )
2024-07-04 23:58:06 +00:00
2023-11-26 04:07:14 +00:00
# load actor user, check authorization
2024-06-03 21:11:23 +00:00
# https://www.w3.org/wiki/ActivityPub/Primer/Authentication_Authorization
2023-10-16 18:13:38 +00:00
actor = as1 . get_owner ( obj . as1 )
2023-11-26 04:07:14 +00:00
if not actor :
2024-07-11 04:04:04 +00:00
error ( ' Activity missing actor or author ' )
2024-04-24 22:57:00 +00:00
elif from_cls . owns_id ( actor ) is False :
error ( f " { from_cls . LABEL } doesn ' t own actor { actor } , this is probably a bridged activity. Skipping. " , status = 204 )
2023-11-26 04:07:14 +00:00
2024-06-03 21:11:23 +00:00
assert authed_as
assert isinstance ( authed_as , str )
authed_as = normalize_user_id ( id = authed_as , proto = from_cls )
actor = normalize_user_id ( id = actor , proto = from_cls )
if actor != authed_as :
2024-06-04 18:27:08 +00:00
report_error ( " Auth: receive: authed_as doesn ' t match owner " ,
user = f ' { id } authed_as { authed_as } owner { actor } ' )
2024-07-11 04:04:04 +00:00
error ( f " actor { actor } isn ' t authed user { authed_as } " )
2023-11-26 04:07:14 +00:00
2024-05-15 00:01:20 +00:00
# update copy ids to originals
2024-06-02 15:41:52 +00:00
obj . normalize_ids ( )
2024-05-15 00:01:20 +00:00
obj . resolve_ids ( )
2024-05-15 18:41:20 +00:00
if ( obj . type == ' follow '
and Protocol . for_bridgy_subdomain ( as1 . get_object ( obj . as1 ) . get ( ' id ' ) ) ) :
2024-06-06 18:37:31 +00:00
# follows of bot user; refresh user profile first
2024-05-15 18:41:20 +00:00
logger . info ( f ' Follow of bot user, reloading { actor } ' )
from_user = from_cls . get_or_create ( id = actor , allow_opt_out = True )
2024-06-20 21:19:17 +00:00
from_user . obj = from_cls . load ( from_user . profile_id ( ) , remote = True )
2024-05-15 18:41:20 +00:00
else :
# load actor user
2024-07-24 02:26:30 +00:00
from_user = from_cls . get_or_create ( id = actor , allow_opt_out = internal )
2024-05-15 18:41:20 +00:00
2024-07-24 02:26:30 +00:00
if not internal and ( not from_user or from_user . manual_opt_out ) :
2024-05-15 00:01:20 +00:00
error ( f ' Actor { actor } is opted out or blocked ' , status = 204 )
2023-10-18 04:52:16 +00:00
2023-07-02 05:40:42 +00:00
# write Object to datastore
2023-07-09 02:19:57 +00:00
orig = obj
2024-05-29 19:29:38 +00:00
obj = Object . get_or_create ( id , authed_as = actor , * * orig . to_dict ( ) )
2023-07-09 02:19:57 +00:00
if orig . new is not None :
obj . new = orig . new
if orig . changed is not None :
obj . changed = orig . changed
2023-03-08 21:10:41 +00:00
2023-07-01 15:03:47 +00:00
# if this is a post, ie not an activity, wrap it in a create or update
2024-05-29 19:29:38 +00:00
obj = from_cls . handle_bare_object ( obj , authed_as = authed_as )
2023-11-26 04:07:14 +00:00
obj . add ( ' users ' , from_user . key )
2023-07-01 15:03:47 +00:00
2023-07-03 15:30:13 +00:00
inner_obj_as1 = as1 . get_object ( obj . as1 )
2024-07-04 23:58:06 +00:00
if obj . type in as1 . CRUD_VERBS :
2023-11-26 04:07:14 +00:00
if inner_owner := as1 . get_owner ( inner_obj_as1 ) :
if inner_owner_key := from_cls . key_for ( inner_owner ) :
obj . add ( ' users ' , inner_owner_key )
2023-07-01 15:03:47 +00:00
2023-09-09 21:17:47 +00:00
obj . source_protocol = from_cls . LABEL
2023-06-23 18:05:12 +00:00
obj . put ( )
2023-03-11 20:58:36 +00:00
2023-03-08 21:10:41 +00:00
# store inner object
2023-06-27 03:22:06 +00:00
inner_obj_id = inner_obj_as1 . get ( ' id ' )
2023-07-02 21:57:05 +00:00
if obj . type in ( ' post ' , ' update ' ) and inner_obj_as1 . keys ( ) > set ( [ ' id ' ] ) :
2023-07-03 15:30:13 +00:00
Object . get_or_create ( inner_obj_id , our_as1 = inner_obj_as1 ,
2024-05-28 23:04:14 +00:00
source_protocol = from_cls . LABEL , authed_as = actor )
2023-03-08 21:10:41 +00:00
actor = as1 . get_object ( obj . as1 , ' actor ' )
actor_id = actor . get ( ' id ' )
# handle activity!
2024-07-06 06:12:35 +00:00
if obj . type == ' stop-following ' :
2023-09-20 04:46:41 +00:00
# TODO: unify with handle_follow?
2023-07-16 21:06:03 +00:00
# TODO: handle multiple followees
2023-03-08 21:10:41 +00:00
if not actor_id or not inner_obj_id :
2024-06-20 02:59:55 +00:00
error ( f ' stop-following requires actor id and object id. Got: { actor_id } { inner_obj_id } { obj . as1 } ' )
2023-03-08 21:10:41 +00:00
# deactivate Follower
2023-09-09 21:17:47 +00:00
from_ = from_cls . key_for ( actor_id )
2023-11-15 22:23:08 +00:00
to_cls = Protocol . for_id ( inner_obj_id )
2023-07-16 21:06:03 +00:00
to = to_cls . key_for ( inner_obj_id )
2023-06-14 20:46:13 +00:00
follower = Follower . query ( Follower . to == to ,
Follower . from_ == from_ ,
Follower . status == ' active ' ) . get ( )
2023-03-08 21:10:41 +00:00
if follower :
2023-04-03 14:53:15 +00:00
logger . info ( f ' Marking { follower } inactive ' )
2023-03-08 21:10:41 +00:00
follower . status = ' inactive '
follower . put ( )
else :
2023-06-14 20:46:13 +00:00
logger . warning ( f ' No Follower found for { from_ } => { to } ' )
2023-03-08 21:10:41 +00:00
2023-07-16 21:06:03 +00:00
# fall through to deliver to followee
# TODO: do we convert stop-following to webmention 410 of original
# follow?
2023-03-08 21:10:41 +00:00
2023-09-27 16:42:40 +00:00
elif obj . type in ( ' update ' , ' like ' , ' share ' ) : # require object
2023-03-08 21:10:41 +00:00
if not inner_obj_id :
error ( " Couldn ' t find id of object to update " )
2023-07-16 21:06:03 +00:00
2023-07-02 21:57:05 +00:00
# fall through to deliver to followers
2023-03-08 21:10:41 +00:00
2024-08-14 20:07:03 +00:00
elif obj . type in ( ' delete ' , ' undo ' ) :
2023-03-08 21:10:41 +00:00
if not inner_obj_id :
error ( " Couldn ' t find id of object to delete " )
2023-07-02 21:57:05 +00:00
logger . info ( f ' Marking Object { inner_obj_id } deleted ' )
2024-05-28 23:04:14 +00:00
Object . get_or_create ( inner_obj_id , deleted = True , authed_as = authed_as )
2023-03-08 21:10:41 +00:00
2024-09-14 05:17:47 +00:00
# if this is an actor, handle deleting it later so that
# in case it's from_user, user.enabled_protocols is still populated
#
2024-09-14 04:27:56 +00:00
# fall through to deliver to followers and delete copy if necessary.
# should happen via protocol-specific copy target and send of
# delete activity.
2023-03-08 21:10:41 +00:00
# https://github.com/snarfed/bridgy-fed/issues/63
2024-04-18 22:56:40 +00:00
elif obj . type == ' block ' :
2024-07-06 05:46:52 +00:00
if proto := Protocol . for_bridgy_subdomain ( inner_obj_id ) :
# blocking protocol bot user disables that protocol
2024-09-17 01:27:04 +00:00
from_user . delete ( proto )
2024-07-06 05:46:52 +00:00
from_user . disable_protocol ( proto )
2024-04-18 22:56:40 +00:00
return ' OK ' , 200
2024-04-21 15:08:05 +00:00
elif obj . type == ' post ' :
2024-08-18 15:28:11 +00:00
# handle DMs to bot users
if as1 . is_dm ( obj . as1 ) :
return dms . receive ( from_user = from_user , obj = obj )
2024-04-21 15:08:05 +00:00
2024-03-13 23:08:08 +00:00
# fetch actor if necessary
2024-09-04 00:18:09 +00:00
if ( actor and actor . keys ( ) == set ( [ ' id ' ] )
and obj . type not in ( ' delete ' , ' undo ' ) ) :
2024-08-02 17:39:25 +00:00
logger . debug ( ' Fetching actor so we have name, profile photo, etc ' )
2023-09-09 21:17:47 +00:00
actor_obj = from_cls . load ( actor [ ' id ' ] )
2023-07-14 19:45:47 +00:00
if actor_obj and actor_obj . as1 :
2023-06-27 03:22:06 +00:00
obj . our_as1 = { * * obj . as1 , ' actor ' : actor_obj . as1 }
2023-03-08 21:10:41 +00:00
# fetch object if necessary so we can render it in feeds
2023-10-18 04:52:16 +00:00
if ( obj . type == ' share '
and inner_obj_as1 . keys ( ) == set ( [ ' id ' ] )
and from_cls . owns_id ( inner_obj_id ) ) :
2024-08-02 17:39:25 +00:00
logger . debug ( ' Fetching object so we can render it in feeds ' )
2023-10-18 04:52:16 +00:00
inner_obj = from_cls . load ( inner_obj_id )
2023-07-02 21:57:05 +00:00
if inner_obj and inner_obj . as1 :
2023-07-02 15:34:25 +00:00
obj . our_as1 = {
* * obj . as1 ,
' object ' : {
* * inner_obj_as1 ,
* * inner_obj . as1 ,
}
}
2023-03-08 21:10:41 +00:00
if obj . type == ' follow ' :
2024-07-16 03:09:33 +00:00
if proto := Protocol . for_bridgy_subdomain ( inner_obj_id ) :
2024-05-06 18:30:17 +00:00
# follow of one of our protocol bot users; enable that protocol.
2024-04-19 20:16:48 +00:00
# foll through so that we send an accept.
2024-04-19 19:11:08 +00:00
from_user . enable_protocol ( proto )
2024-05-07 17:54:18 +00:00
proto . bot_follow ( from_user )
2024-04-18 23:03:35 +00:00
2023-09-20 04:46:41 +00:00
from_cls . handle_follow ( obj )
2023-03-08 21:10:41 +00:00
2023-07-02 21:57:05 +00:00
# deliver to targets
2024-07-12 15:21:30 +00:00
resp = from_cls . deliver ( obj , from_user = from_user )
2024-09-14 05:17:47 +00:00
# if this is a user, deactivate its followers/followings
# https://github.com/snarfed/bridgy-fed/issues/1304
if obj . type == ' delete ' :
if user_key := from_cls . key_for ( id = inner_obj_id ) :
if user := user_key . get ( ) :
for proto in user . enabled_protocols :
user . disable_protocol ( PROTOCOLS [ proto ] )
logger . info ( f ' Deactivating Followers from or to = { inner_obj_id } ' )
followers = Follower . query (
OR ( Follower . to == user_key , Follower . from_ == user_key )
) . fetch ( )
for f in followers :
f . status = ' inactive '
ndb . put_multi ( followers )
2024-07-26 02:59:17 +00:00
common . memcache . set ( memcache_key , ' done ' , expire = 7 * 24 * 60 * 60 ) # 1w
2024-07-12 15:21:30 +00:00
return resp
2023-03-08 21:10:41 +00:00
2023-03-11 20:58:36 +00:00
@classmethod
2023-09-20 04:46:41 +00:00
def handle_follow ( from_cls , obj ) :
2023-07-10 19:00:42 +00:00
""" Handles an incoming follow activity.
2023-03-11 20:58:36 +00:00
2023-10-18 20:51:34 +00:00
Sends an ` ` Accept ` ` back , but doesn ' t send the ``Follow`` itself. That
happens in : meth : ` deliver ` .
2023-03-11 20:58:36 +00:00
Args :
2023-10-06 06:32:31 +00:00
obj ( models . Object ) : follow activity
2023-03-11 20:58:36 +00:00
"""
2024-08-02 17:39:25 +00:00
logger . debug ( ' Got follow. Loading users, storing Follow(s), sending accept(s) ' )
2023-06-27 03:22:06 +00:00
2023-07-10 19:00:42 +00:00
# Prepare follower (from) users' data
2023-06-27 03:22:06 +00:00
from_as1 = as1 . get_object ( obj . as1 , ' actor ' )
from_id = from_as1 . get ( ' id ' )
2023-07-10 19:00:42 +00:00
if not from_id :
error ( f ' Follow activity requires actor. Got: { obj . as1 } ' )
2023-03-11 20:58:36 +00:00
2023-06-27 03:22:06 +00:00
from_obj = from_cls . load ( from_id )
2023-07-14 19:45:47 +00:00
if not from_obj :
2024-07-11 04:04:04 +00:00
error ( f " Couldn ' t load { from_id } " , status = 502 )
2023-07-10 19:00:42 +00:00
2023-06-27 03:22:06 +00:00
if not from_obj . as1 :
from_obj . our_as1 = from_as1
from_obj . put ( )
from_key = from_cls . key_for ( from_id )
2023-07-23 06:32:55 +00:00
if not from_key :
error ( f ' Invalid { from_cls } user key: { from_id } ' )
2023-07-10 19:00:42 +00:00
obj . users = [ from_key ]
2024-04-19 19:25:27 +00:00
from_user = from_cls . get_or_create ( id = from_key . id ( ) , obj = from_obj )
2023-07-10 19:00:42 +00:00
# Prepare followee (to) users' data
to_as1s = as1 . get_objects ( obj . as1 )
if not to_as1s :
error ( f ' Follow activity requires object(s). Got: { obj . as1 } ' )
# Store Followers
for to_as1 in to_as1s :
to_id = to_as1 . get ( ' id ' )
2024-04-19 19:25:27 +00:00
if not to_id :
2023-07-10 19:00:42 +00:00
error ( f ' Follow activity requires object(s). Got: { obj . as1 } ' )
2023-10-18 19:14:18 +00:00
logger . info ( f ' Follow { from_id } => { to_id } ' )
2024-04-09 20:06:32 +00:00
2023-07-10 19:00:42 +00:00
to_cls = Protocol . for_id ( to_id )
2023-07-25 16:53:23 +00:00
if not to_cls :
error ( f " Couldn ' t determine protocol for { to_id } " )
elif from_cls == to_cls and from_cls . LABEL != ' fake ' :
2023-07-13 21:19:01 +00:00
logger . info ( f ' Skipping same-protocol Follower { from_id } => { to_id } ' )
continue
2023-07-10 19:00:42 +00:00
to_obj = to_cls . load ( to_id )
2023-07-14 19:45:47 +00:00
if to_obj and not to_obj . as1 :
2023-07-10 19:00:42 +00:00
to_obj . our_as1 = to_as1
to_obj . put ( )
to_key = to_cls . key_for ( to_id )
2023-07-23 06:32:55 +00:00
if not to_key :
logger . info ( f ' Skipping invalid { from_cls } user key: { from_id } ' )
continue
2024-04-19 19:25:27 +00:00
# If followee user is already direct, follower may not know they're
# interacting with a bridge. if followee user is indirect though,
# follower should know, so they're direct.
2024-09-24 13:58:58 +00:00
to_user = to_cls . get_or_create ( id = to_key . id ( ) , obj = to_obj , direct = False ,
allow_opt_out = True )
2023-07-10 19:00:42 +00:00
follower_obj = Follower . get_or_create ( to = to_user , from_ = from_user ,
follow = obj . key , status = ' active ' )
2023-10-07 20:51:59 +00:00
obj . add ( ' notify ' , to_key )
2024-04-19 19:25:27 +00:00
from_cls . maybe_accept_follow ( follower = from_user , followee = to_user ,
follow = obj )
@classmethod
def maybe_accept_follow ( _ , follower , followee , follow ) :
""" Sends an accept activity for a follow.
. . . if the follower protocol handles accepts . Otherwise , does nothing .
2023-07-10 19:00:42 +00:00
2024-04-19 19:25:27 +00:00
Args :
follower : : class : ` models . User `
followee : : class : ` models . User `
follow : : class : ` models . Object `
"""
2024-07-06 06:12:35 +00:00
if ' accept ' not in follower . SUPPORTED_AS1_TYPES :
2024-04-19 19:25:27 +00:00
return
2024-07-06 06:20:34 +00:00
target = follower . target_for ( follower . obj )
if not target :
2024-08-02 15:02:36 +00:00
error ( f " Couldn ' t find delivery target for follower { follower . key . id ( ) } " )
2024-07-06 06:20:34 +00:00
2024-04-19 19:25:27 +00:00
# send accept. note that this is one accept for the whole
# follow, even if it has multiple followees!
2024-04-19 20:16:48 +00:00
id = f ' { followee . key . id ( ) } /followers#accept- { follow . key . id ( ) } '
2024-07-06 06:20:34 +00:00
undelivered = [ Target ( protocol = follower . LABEL , uri = target ) ]
accept = {
2024-04-19 19:25:27 +00:00
' id ' : id ,
' objectType ' : ' activity ' ,
' verb ' : ' accept ' ,
' actor ' : followee . key . id ( ) ,
' object ' : follow . as1 ,
2024-07-06 06:20:34 +00:00
}
obj = Object . get_or_create ( id , authed_as = followee . key . id ( ) ,
undelivered = undelivered , our_as1 = accept )
common . create_task ( queue = ' send ' , obj = obj . key . urlsafe ( ) ,
url = target , protocol = follower . LABEL ,
user = followee . key . urlsafe ( ) )
2023-03-11 20:58:36 +00:00
2024-05-07 17:54:18 +00:00
@classmethod
def bot_follow ( bot_cls , user ) :
""" Follow a user from a protocol bot user.
. . . so that the protocol starts sending us their activities , if it needs
a follow for that ( eg ActivityPub ) .
Args :
user ( User )
"""
from web import Web
bot = Web . get_by_id ( bot_cls . bot_user_id ( ) )
now = util . now ( ) . isoformat ( )
logger . info ( f ' Following { user . key . id ( ) } back from bot user { bot . key . id ( ) } ' )
2024-08-11 15:51:08 +00:00
if not user . obj :
logger . info ( " can ' t follow, user has no profile obj " )
return
2024-05-22 05:53:39 +00:00
target = user . target_for ( user . obj )
2024-05-07 17:54:18 +00:00
follow_back_id = f ' https:// { bot . key . id ( ) } /#follow-back- { user . key . id ( ) } - { now } '
2024-05-22 05:53:39 +00:00
follow_back = Object ( id = follow_back_id , source_protocol = ' web ' ,
undelivered = [ Target ( protocol = user . LABEL , uri = target ) ] ,
our_as1 = {
2024-05-07 17:54:18 +00:00
' objectType ' : ' activity ' ,
' verb ' : ' follow ' ,
' id ' : follow_back_id ,
' actor ' : bot . key . id ( ) ,
' object ' : user . key . id ( ) ,
2024-05-22 05:53:39 +00:00
} ) . put ( )
2024-05-07 17:54:18 +00:00
2024-05-22 05:53:39 +00:00
common . create_task ( queue = ' send ' , obj = follow_back . urlsafe ( ) ,
url = target , protocol = user . LABEL ,
user = bot . key . urlsafe ( ) )
2024-05-07 17:54:18 +00:00
2023-03-21 02:17:55 +00:00
@classmethod
2024-05-29 19:29:38 +00:00
def handle_bare_object ( cls , obj , authed_as = None ) :
2023-07-01 15:03:47 +00:00
""" If obj is a bare object, wraps it in a create or update activity.
Checks if we ' ve seen it before.
2023-03-21 02:17:55 +00:00
Args :
2023-10-06 06:32:31 +00:00
obj ( models . Object )
2024-05-29 19:29:38 +00:00
authed_as ( str ) : authenticated actor id who sent this activity
2023-07-01 15:03:47 +00:00
Returns :
2023-10-06 15:22:50 +00:00
models . Object : ` ` obj ` ` if it ' s an activity, otherwise a new object
2023-03-21 02:17:55 +00:00
"""
2024-04-25 20:45:27 +00:00
is_actor = obj . type in as1 . ACTOR_TYPES
if not is_actor and obj . type not in ( ' note ' , ' article ' , ' comment ' ) :
2023-07-02 05:40:42 +00:00
return obj
2023-07-01 13:48:51 +00:00
2023-07-14 19:45:47 +00:00
obj_actor = as1 . get_owner ( obj . as1 )
now = util . now ( ) . isoformat ( )
2023-07-01 15:03:47 +00:00
# this is a raw post; wrap it in a create or update activity
2024-04-25 20:45:27 +00:00
if obj . changed or is_actor :
if obj . changed :
logger . info ( f ' Content has changed from last time at { obj . updated } ! Redelivering to all inboxes ' )
2024-08-02 17:39:25 +00:00
else :
logger . info ( f ' Got actor profile object, wrapping in update ' )
2023-07-01 15:03:47 +00:00
id = f ' { obj . key . id ( ) } #bridgy-fed-update- { now } '
update_as1 = {
' objectType ' : ' activity ' ,
' verb ' : ' update ' ,
' id ' : id ,
' actor ' : obj_actor ,
' object ' : {
# Mastodon requires the updated field for Updates, so
# add a default value.
# https://docs.joinmastodon.org/spec/activitypub/#supported-activities-for-statuses
# https://socialhub.activitypub.rocks/t/what-could-be-the-reason-that-my-update-activity-does-not-work/2893/4
# https://github.com/mastodon/documentation/pull/1150
' updated ' : now ,
* * obj . as1 ,
} ,
}
2024-07-25 21:44:15 +00:00
logger . debug ( f ' AS1: { json_dumps ( update_as1 , indent = 2 ) } ' )
2023-07-24 21:07:44 +00:00
return Object ( id = id , our_as1 = update_as1 ,
source_protocol = obj . source_protocol )
create_id = f ' { obj . key . id ( ) } #bridgy-fed-create '
create = cls . load ( create_id , remote = False )
if ( obj . new or not create or create . status != ' complete '
# HACK: force query param here is specific to webmention
or ' force ' in request . form ) :
if create :
2024-08-02 17:39:25 +00:00
logger . info ( f ' Existing create { create . key . id ( ) } status { create . status } ' )
2023-07-24 21:07:44 +00:00
else :
logger . info ( f ' No existing create activity ' )
2023-07-01 15:03:47 +00:00
create_as1 = {
' objectType ' : ' activity ' ,
' verb ' : ' post ' ,
2023-07-24 21:07:44 +00:00
' id ' : create_id ,
2023-07-01 15:03:47 +00:00
' actor ' : obj_actor ,
' object ' : obj . as1 ,
' published ' : now ,
}
2024-07-25 21:44:15 +00:00
logger . info ( f ' Wrapping in post ' )
logger . debug ( f ' AS1: { json_dumps ( create_as1 , indent = 2 ) } ' )
2023-07-24 21:07:44 +00:00
return Object . get_or_create ( create_id , our_as1 = create_as1 ,
2024-05-29 19:29:38 +00:00
source_protocol = obj . source_protocol ,
authed_as = authed_as )
2023-07-01 15:03:47 +00:00
2023-07-24 21:07:44 +00:00
error ( f ' { obj . key . id ( ) } is unchanged, nothing to do ' , status = 204 )
2023-07-01 15:03:47 +00:00
@classmethod
2024-06-14 04:45:17 +00:00
def deliver ( from_cls , obj , from_user , to_proto = None ) :
2023-07-01 15:03:47 +00:00
""" Delivers an activity to its external recipients.
Args :
2023-10-06 06:32:31 +00:00
obj ( models . Object ) : activity to deliver
2023-11-26 04:07:14 +00:00
from_user ( models . User ) : user ( actor ) this activity is from
2024-06-14 04:45:17 +00:00
to_proto ( protocol . Protocol ) : optional ; if provided , only deliver to
targets on this protocol
2024-07-12 15:21:30 +00:00
Returns :
( str , int ) tuple : Flask response
2023-07-01 15:03:47 +00:00
"""
2024-06-14 04:45:17 +00:00
if to_proto :
logger . info ( f ' Only delivering to { to_proto . LABEL } ' )
2024-05-06 00:13:42 +00:00
# find delivery targets. maps Target to Object or None
targets = from_cls . targets ( obj , from_user = from_user )
2023-07-01 13:48:51 +00:00
if not targets :
obj . status = ' ignored '
obj . put ( )
2024-06-14 04:45:17 +00:00
return r ' No targets, nothing to do ¯ \ _(ツ)_/¯ ' , 204
2023-07-01 13:48:51 +00:00
2023-11-13 22:07:56 +00:00
# sort targets so order is deterministic for tests, debugging, etc
2023-07-01 13:48:51 +00:00
sorted_targets = sorted ( targets . items ( ) , key = lambda t : t [ 0 ] . uri )
obj . populate (
status = ' in progress ' ,
delivered = [ ] ,
failed = [ ] ,
undelivered = [ t for t , _ in sorted_targets ] ,
)
2023-10-31 19:49:15 +00:00
obj . put ( )
2023-07-01 13:48:51 +00:00
logger . info ( f ' Delivering to: { obj . undelivered } ' )
2023-10-31 19:49:15 +00:00
# enqueue send task for each targets
2023-11-26 04:07:14 +00:00
user = from_user . key . urlsafe ( )
2023-10-31 19:49:15 +00:00
for i , ( target , orig_obj ) in enumerate ( sorted_targets ) :
2024-06-14 04:45:17 +00:00
if to_proto and target . protocol != to_proto . LABEL :
continue
2023-10-31 19:49:15 +00:00
orig_obj = orig_obj . key . urlsafe ( ) if orig_obj else ' '
common . create_task ( queue = ' send ' , obj = obj . key . urlsafe ( ) ,
url = target . uri , protocol = target . protocol ,
orig_obj = orig_obj , user = user )
2023-07-01 13:48:51 +00:00
2023-10-31 19:49:15 +00:00
return ' OK ' , 202
2023-07-01 13:48:51 +00:00
2023-07-03 15:05:29 +00:00
@classmethod
2024-07-08 15:06:39 +00:00
def targets ( from_cls , obj , from_user , internal = False ) :
2023-10-06 15:22:50 +00:00
""" Collects the targets to send a :class:`models.Object` to.
2023-07-01 13:48:51 +00:00
2023-09-20 02:59:28 +00:00
Targets are both objects - original posts , events , etc - and actors .
2023-07-01 13:48:51 +00:00
Args :
2023-10-06 06:32:31 +00:00
obj ( models . Object )
2024-05-06 00:13:42 +00:00
from_user ( User )
2024-07-08 15:06:39 +00:00
internal ( bool ) : whether this is a recursive internal call
2023-07-01 13:48:51 +00:00
2023-09-20 04:46:41 +00:00
Returns :
2023-10-06 15:22:50 +00:00
dict : maps : class : ` models . Target ` to original ( in response to )
: class : ` models . Object ` , if any , otherwise None
2023-07-01 13:48:51 +00:00
"""
logger . info ( ' Finding recipients and their targets ' )
2023-11-13 22:07:56 +00:00
target_uris = sorted ( set ( as1 . targets ( obj . as1 ) ) )
2023-09-20 04:46:41 +00:00
logger . info ( f ' Raw targets: { target_uris } ' )
2024-05-02 01:53:09 +00:00
orig_obj = None
2023-09-20 04:46:41 +00:00
targets = { } # maps Target to Object or None
2023-10-06 21:57:36 +00:00
owner = as1 . get_owner ( obj . as1 )
2024-09-17 01:27:04 +00:00
allow_opt_out = ( obj . type == ' delete ' )
2024-08-17 19:22:05 +00:00
inner_obj_as1 = as1 . get_object ( obj . as1 )
inner_obj_id = inner_obj_as1 . get ( ' id ' )
in_reply_tos = as1 . get_ids ( inner_obj_as1 , ' inReplyTo ' )
2024-05-02 00:45:24 +00:00
is_reply = obj . type == ' comment ' or in_reply_tos
2024-05-02 20:38:43 +00:00
is_self_reply = False
2023-11-10 22:39:32 +00:00
2024-08-12 00:41:51 +00:00
if is_reply :
original_ids = in_reply_tos
else :
2024-08-14 00:48:15 +00:00
if inner_obj_id == from_user . key . id ( ) :
inner_obj_id = from_user . profile_id ( )
original_ids = [ inner_obj_id ]
2024-08-12 00:41:51 +00:00
2024-07-10 00:28:01 +00:00
# which protocols should we allow delivering to?
to_protocols = [ ]
if DEBUG and from_user . LABEL != ' eefake ' : # for unit tests
2024-08-12 01:00:32 +00:00
to_protocols + = [ PROTOCOLS [ ' fake ' ] , PROTOCOLS [ ' other ' ] ]
for label in ( list ( from_user . DEFAULT_ENABLED_PROTOCOLS )
2024-07-10 00:28:01 +00:00
+ from_user . enabled_protocols ) :
proto = PROTOCOLS [ label ]
if proto . HAS_COPIES and ( obj . type in ( ' update ' , ' delete ' , ' share ' )
or is_reply ) :
for id in original_ids :
if orig := from_user . load ( id , remote = False ) :
if orig . get_copy ( proto ) :
2024-07-18 20:51:22 +00:00
logger . info ( f ' Allowing { proto . LABEL } , original post { id } was bridged there ' )
2024-07-10 00:28:01 +00:00
break
else :
2024-07-10 02:17:04 +00:00
logger . info ( f " Skipping { proto . LABEL } , original posts { original_ids } weren ' t bridged there " )
2024-07-10 00:28:01 +00:00
continue
add ( to_protocols , proto )
# process direct targets
2023-09-20 04:46:41 +00:00
for id in sorted ( target_uris ) :
2024-08-16 21:03:44 +00:00
target_proto = Protocol . for_id ( id )
if not target_proto :
2023-07-01 13:48:51 +00:00
logger . info ( f " Can ' t determine protocol for { id } " )
continue
2024-08-16 21:03:44 +00:00
elif target_proto . is_blocklisted ( id ) :
2023-09-09 22:11:52 +00:00
logger . info ( f ' { id } is blocklisted ' )
continue
2023-07-01 13:48:51 +00:00
2024-08-16 21:03:44 +00:00
orig_obj = target_proto . load ( id )
2023-07-01 13:48:51 +00:00
if not orig_obj or not orig_obj . as1 :
logger . info ( f " Couldn ' t load { id } " )
continue
2024-08-16 21:03:44 +00:00
target_author_key = target_proto . actor_key ( orig_obj )
if ( target_proto not in to_protocols
and obj . source_protocol != target_proto . LABEL ) :
# if author isn't bridged and inReplyTo author is, DM a prompt
if id in in_reply_tos :
if target_author := target_author_key . get ( ) :
if target_author . is_enabled ( from_cls ) :
2024-08-18 05:53:14 +00:00
dms . maybe_send (
from_proto = target_proto , to_user = from_user ,
type = ' replied_to_bridged_user ' , text = f """ \
2024-08-17 19:22:05 +00:00
Hi ! You < a href = " { inner_obj_as1.get( ' url ' ) or inner_obj_id} " > recently replied < / a > to { orig_obj . actor_link ( image = False ) } , who ' s bridged here from {target_proto.PHRASE} . If you want them to see your replies, you can bridge your account into {target_proto.PHRASE} by following this account. <a href= " https://fed.brid.gy/docs " >See the docs</a> for more information. " " " )
2024-08-16 21:03:44 +00:00
continue
2024-05-02 20:38:43 +00:00
# deliver self-replies to followers
# https://github.com/snarfed/bridgy-fed/issues/639
2024-08-14 00:48:15 +00:00
if id in in_reply_tos and owner == as1 . get_owner ( orig_obj . as1 ) :
2024-05-02 20:38:43 +00:00
is_self_reply = True
2024-08-02 17:39:25 +00:00
logger . info ( f ' self reply! ' )
2024-05-02 20:38:43 +00:00
2024-06-27 20:19:47 +00:00
# also add copies' targets
for copy in orig_obj . copies :
2024-07-14 20:42:38 +00:00
proto = PROTOCOLS [ copy . protocol ]
if proto in to_protocols :
# copies generally won't have their own Objects
if target := proto . target_for ( Object ( id = copy . uri ) ) :
logger . info ( f ' Adding target { target } for copy { copy . uri } of original { id } ' )
targets [ Target ( protocol = copy . protocol , uri = target ) ] = orig_obj
2024-06-27 20:19:47 +00:00
2024-08-16 21:03:44 +00:00
if target_proto == from_cls and from_cls . LABEL != ' fake ' :
2024-02-13 02:56:47 +00:00
logger . info ( f ' Skipping same-protocol target { id } ' )
continue
2024-08-16 21:03:44 +00:00
target = target_proto . target_for ( orig_obj )
2023-07-01 13:48:51 +00:00
if not target :
# TODO: surface errors like this somehow?
logger . error ( f " Can ' t find delivery target for { id } " )
continue
logger . info ( f ' Target for { id } is { target } ' )
2024-08-14 00:48:15 +00:00
# only use orig_obj for inReplyTos and repost objects
# https://github.com/snarfed/bridgy-fed/issues/1237
2024-08-16 21:03:44 +00:00
targets [ Target ( protocol = target_proto . LABEL , uri = target ) ] = (
2024-08-14 00:48:15 +00:00
orig_obj if id in in_reply_tos or id in as1 . get_ids ( obj . as1 , ' object ' )
else None )
2024-08-16 21:03:44 +00:00
if target_author_key :
logger . info ( f ' Recipient is { target_author_key } ' )
obj . add ( ' notify ' , target_author_key )
2023-07-01 13:48:51 +00:00
2024-07-08 00:21:37 +00:00
if obj . type == ' undo ' :
logger . info ( ' Object is an undo; adding targets for inner object ' )
if set ( inner_obj_as1 . keys ( ) ) == { ' id ' } :
2024-08-14 00:48:15 +00:00
inner_obj = from_cls . load ( inner_obj_id )
2024-07-08 00:21:37 +00:00
else :
2024-08-14 00:48:15 +00:00
inner_obj = Object ( id = inner_obj_id , our_as1 = inner_obj_as1 )
2024-07-08 00:21:37 +00:00
if inner_obj :
2024-07-08 15:06:39 +00:00
targets . update ( from_cls . targets ( inner_obj , from_user = from_user ,
internal = True ) )
2024-07-08 00:21:37 +00:00
2024-06-27 20:19:47 +00:00
logger . info ( f ' Direct (and copy) targets: { targets . keys ( ) } ' )
2023-07-15 21:23:09 +00:00
2023-07-16 21:06:03 +00:00
# deliver to followers, if appropriate
2024-09-17 01:27:04 +00:00
user_key = from_cls . actor_key ( obj , allow_opt_out = allow_opt_out )
2023-07-03 15:05:29 +00:00
if not user_key :
logger . info ( " Can ' t tell who this is from! Skipping followers. " )
return targets
2024-06-25 21:08:35 +00:00
followers = [ ]
2024-07-10 00:28:01 +00:00
if ( obj . type in ( ' post ' , ' update ' , ' delete ' , ' share ' )
and ( not is_reply or is_self_reply ) ) :
logger . info ( f ' Delivering to followers of { user_key } ' )
followers = [
f for f in Follower . query ( Follower . to == user_key ,
Follower . status == ' active ' )
# skip protocol bot users
if not Protocol . for_bridgy_subdomain ( f . from_ . id ( ) )
# skip protocols this user hasn't enabled, or where the base
# object of this activity hasn't been bridged
and PROTOCOLS_BY_KIND [ f . from_ . kind ( ) ] in to_protocols ]
user_keys = [ f . from_ for f in followers ]
users = [ u for u in ndb . get_multi ( user_keys ) if u ]
User . load_multi ( users )
if ( not followers and
( util . domain_or_parent_in (
util . domain_from_link ( from_user . key . id ( ) ) , LIMITED_DOMAINS )
or util . domain_or_parent_in (
util . domain_from_link ( obj . key . id ( ) ) , LIMITED_DOMAINS ) ) ) :
logger . info ( f ' skipping, { from_user . key . id ( ) } is on a limited domain and has no followers ' )
return { }
# which object should we add to followers' feeds, if any
feed_obj = None
if not internal :
if obj . type == ' share ' :
feed_obj = obj
2024-09-14 05:17:47 +00:00
elif obj . type not in ( ' delete ' , ' undo ' , ' stop-following ' ) :
2024-07-10 00:28:01 +00:00
inner = as1 . get_object ( obj . as1 )
# don't add profile updates to feeds
if not ( obj . type == ' update '
and inner . get ( ' objectType ' ) in as1 . ACTOR_TYPES ) :
inner_id = inner . get ( ' id ' )
if inner_id :
feed_obj = from_cls . load ( inner_id )
for user in users :
2024-06-25 20:37:14 +00:00
if feed_obj :
2024-07-10 00:28:01 +00:00
feed_obj . add ( ' feed ' , user . key )
# TODO: should we pass remote=False through here to Protocol.load?
target = ( user . target_for ( user . obj , shared = True )
if user . obj else None )
if not target :
# TODO: surface errors like this somehow?
logger . error ( f ' Follower { user . key } has no delivery target ' )
2024-07-08 04:37:03 +00:00
continue
2024-07-10 00:28:01 +00:00
# normalize URL (lower case hostname, etc)
# ...but preserve our PDS URL without trailing slash in path
# https://atproto.com/specs/did#did-documents
target = util . dedupe_urls ( [ target ] , trailing_slash = False ) [ 0 ]
# HACK: use last target object from above for reposts, which
# has its resolved id
targets [ Target ( protocol = user . LABEL , uri = target ) ] = \
orig_obj if obj . type == ' share ' else None
if feed_obj :
feed_obj . put ( )
# deliver to enabled HAS_COPIES protocols proactively
# TODO: abstract for other protocols
from atproto import ATProto
if ( ATProto in to_protocols
and obj . type in ( ' post ' , ' update ' , ' delete ' , ' share ' ) ) :
logger . info ( f ' user has ATProto enabled, adding { ATProto . PDS_URL } ' )
targets . setdefault (
Target ( protocol = ATProto . LABEL , uri = ATProto . PDS_URL ) , None )
2024-05-23 06:28:53 +00:00
2023-09-09 22:11:52 +00:00
# de-dupe targets, discard same-domain
2024-05-02 00:45:24 +00:00
# maps string target URL to (Target, Object) tuple
2023-07-10 21:58:45 +00:00
candidates = { t . uri : ( t , obj ) for t , obj in targets . items ( ) }
2024-05-02 00:45:24 +00:00
# maps Target to Object or None
2023-07-10 21:58:45 +00:00
targets = { }
source_domains = [
util . domain_from_link ( url ) for url in
( obj . as1 . get ( ' id ' ) , obj . as1 . get ( ' url ' ) , as1 . get_owner ( obj . as1 ) )
if util . is_web ( url )
]
2024-05-02 00:23:39 +00:00
for url in sorted ( util . dedupe_urls (
candidates . keys ( ) ,
2024-05-08 16:37:47 +00:00
# preserve our PDS URL without trailing slash in path
2024-05-02 00:23:39 +00:00
# https://atproto.com/specs/did#did-documents
trailing_slash = False ) ) :
2023-09-09 22:11:52 +00:00
if util . is_web ( url ) and util . domain_from_link ( url ) in source_domains :
2023-07-10 21:58:45 +00:00
logger . info ( f ' Skipping same-domain target { url } ' )
2024-06-13 18:26:55 +00:00
continue
target , obj = candidates [ url ]
targets [ target ] = obj
2023-07-01 13:48:51 +00:00
return targets
2023-03-21 02:17:55 +00:00
2023-03-08 21:10:41 +00:00
@classmethod
2023-06-18 14:29:54 +00:00
def load ( cls , id , remote = None , local = True , * * kwargs ) :
2023-03-08 21:10:41 +00:00
""" Loads and returns an Object from memory cache, datastore, or HTTP fetch.
2023-06-28 22:27:11 +00:00
Sets the : attr : ` new ` and : attr : ` changed ` attributes if we know either
one for the loaded object , ie local is True and remote is True or None .
2023-03-08 21:10:41 +00:00
Note that : meth : ` Object . _post_put_hook ` updates the cache .
Args :
2023-10-06 06:32:31 +00:00
id ( str )
remote ( bool ) : whether to fetch the object over the network . If True ,
2023-06-18 14:29:54 +00:00
fetches even if we already have the object stored , and updates our
stored copy . If False and we don ' t have the object stored, returns
None . Default ( None ) means to fetch over the network only if we
don ' t already have it stored.
2023-10-06 06:32:31 +00:00
local ( bool ) : whether to load from the datastore before
2023-06-18 14:29:54 +00:00
fetching over the network . If False , still stores back to the
datastore after a successful remote fetch .
2023-06-14 03:24:09 +00:00
kwargs : passed through to : meth : ` fetch ( ) `
2023-03-08 21:10:41 +00:00
2023-10-06 15:22:50 +00:00
Returns :
models . Object : loaded object , or None if it isn ' t fetchable, eg a
non - URL string for Web , or ` ` remote ` ` is False and it isn ' t in the
cache or datastore
2023-03-08 21:10:41 +00:00
Raises :
2023-10-06 06:32:31 +00:00
requests . HTTPError : anything that : meth : ` fetch ` raises
2023-03-08 21:10:41 +00:00
"""
2024-03-14 22:40:25 +00:00
assert id
2023-06-18 14:29:54 +00:00
assert local or remote is not False
2024-03-12 02:15:27 +00:00
# logger.debug(f'Loading Object {id} local={local} remote={remote}')
2023-04-03 03:36:23 +00:00
2023-06-18 14:29:54 +00:00
obj = orig_as1 = None
2024-02-08 19:22:32 +00:00
if local and not obj :
2023-06-18 14:29:54 +00:00
obj = Object . get_by_id ( id )
2024-02-08 19:22:32 +00:00
if not obj :
2024-03-12 02:15:27 +00:00
# logger.debug(f' not in datastore')
pass
2024-02-08 19:22:32 +00:00
elif obj . as1 or obj . raw or obj . deleted :
2024-03-12 02:15:27 +00:00
# logger.debug(' got from datastore')
2023-06-18 14:29:54 +00:00
obj . new = False
2024-02-08 19:22:32 +00:00
if remote is False :
2023-06-19 05:26:30 +00:00
return obj
2024-02-08 19:22:32 +00:00
elif remote is None and obj :
if obj . updated < util . as_utc ( util . now ( ) - OBJECT_REFRESH_AGE ) :
2024-03-12 02:15:27 +00:00
# logger.debug(f' last updated {obj.updated}, refreshing')
pass
2024-02-08 19:22:32 +00:00
else :
return obj
2023-04-03 14:53:15 +00:00
if obj :
2024-02-08 19:22:32 +00:00
orig_as1 = obj . as1
2024-01-08 21:03:44 +00:00
obj . clear ( )
2023-06-03 04:53:44 +00:00
obj . new = False
2023-04-03 14:53:15 +00:00
else :
2023-06-28 22:27:11 +00:00
obj = Object ( id = id )
2023-06-18 14:29:54 +00:00
if local :
2024-03-12 02:15:27 +00:00
# logger.debug(' not in datastore')
2023-06-28 22:27:11 +00:00
obj . new = True
obj . changed = False
2023-04-03 14:53:15 +00:00
2023-07-14 19:45:47 +00:00
fetched = cls . fetch ( obj , * * kwargs )
if not fetched :
return None
2024-03-11 18:43:58 +00:00
# https://stackoverflow.com/a/3042250/186123
size = len ( _entity_to_protobuf ( obj ) . _pb . SerializeToString ( ) )
if size > models . MAX_ENTITY_SIZE :
logger . warning ( f ' Object is too big! { size } bytes is over { models . MAX_ENTITY_SIZE } ' )
return None
2023-10-24 17:46:57 +00:00
obj . resolve_ids ( )
2024-04-09 20:06:32 +00:00
obj . normalize_ids ( )
2023-10-18 18:18:20 +00:00
2023-06-28 22:27:11 +00:00
if obj . new is False :
2023-07-01 21:24:18 +00:00
obj . changed = obj . activity_changed ( orig_as1 )
2023-03-08 21:10:41 +00:00
2023-11-15 22:23:08 +00:00
if obj . source_protocol not in ( cls . LABEL , cls . ABBREV ) :
2024-02-11 06:12:44 +00:00
if obj . source_protocol :
logger . warning ( f ' Object { obj . key . id ( ) } changed protocol from { obj . source_protocol } to { cls . LABEL } ?! ' )
2023-11-15 22:23:08 +00:00
obj . source_protocol = cls . LABEL
2023-04-03 03:36:23 +00:00
2024-04-25 00:06:25 +00:00
obj . put ( )
2023-03-08 21:10:41 +00:00
return obj
2023-09-19 02:19:59 +00:00
2024-07-05 04:11:38 +00:00
@classmethod
def check_supported ( cls , obj ) :
""" If this protocol doesn ' t support this object, return 204.
Also reports an error .
( This logic is duplicated in some protocols , eg ActivityPub , so that
they can short circuit out early . It generally uses their native formats
instead of AS1 , before an : class : ` models . Object ` is created . )
Args :
obj ( Object )
"""
if not obj . type :
return
2024-07-05 19:26:59 +00:00
inner_type = as1 . object_type ( as1 . get_object ( obj . as1 ) ) or ' '
2024-08-01 06:12:37 +00:00
if ( obj . type not in cls . SUPPORTED_AS1_TYPES
or ( obj . type in as1 . CRUD_VERBS
and inner_type
and inner_type not in cls . SUPPORTED_AS1_TYPES ) ) :
2024-07-05 05:13:28 +00:00
error ( f " Bridgy Fed for { cls . LABEL } doesn ' t support { obj . type } { inner_type } yet " , status = 204 )
2024-07-05 04:11:38 +00:00
2024-08-16 21:01:28 +00:00
# DMs are only allowed to/from protocol bot accounts
2024-08-16 19:46:02 +00:00
if recip := as1 . recipient_if_dm ( obj . as1 ) :
2024-08-29 04:45:09 +00:00
protocol_user_ids = PROTOCOL_DOMAINS + common . protocol_user_copy_ids ( )
2024-08-16 21:01:28 +00:00
if ( not cls . SUPPORTS_DMS
2024-08-29 04:45:09 +00:00
or ( recip not in protocol_user_ids
and as1 . get_owner ( obj . as1 ) not in protocol_user_ids ) ) :
2024-08-16 19:46:02 +00:00
error ( f " Bridgy Fed doesn ' t support DMs " , status = 204 )
2024-08-01 06:12:37 +00:00
2023-09-19 02:19:59 +00:00
2023-10-16 20:04:34 +00:00
@cloud_tasks_only
2023-09-19 02:19:59 +00:00
def receive_task ( ) :
2023-10-06 06:32:31 +00:00
""" Task handler for a newly received :class:`models.Object`.
2023-10-16 19:45:27 +00:00
Calls : meth : ` Protocol . receive ` with the form parameters .
2023-10-06 06:32:31 +00:00
Parameters :
2023-10-16 19:45:27 +00:00
authed_as ( str ) : passed to : meth : ` Protocol . receive `
2024-10-02 04:44:12 +00:00
obj ( url - safe google . cloud . ndb . key . Key ) : : class : ` models . Object ` to handle
* : If ` ` obj ` ` is unset , all other parameters are properties for a new
: class : ` models . Object ` to handle
2023-10-06 06:32:31 +00:00
2024-06-08 20:14:56 +00:00
TODO : migrate incoming webmentions to this . See how we did it for AP . The
2023-10-06 06:32:31 +00:00
difficulty is that parts of : meth : ` protocol . Protocol . receive ` depend on
2024-06-08 20:14:56 +00:00
setup in : func : ` web . webmention ` , eg : class : ` models . Object ` with ` ` new ` ` and
` ` changed ` ` , HTTP request details , etc . See stash for attempt at this for
: class : ` web . Web ` .
2023-09-19 02:19:59 +00:00
"""
2023-10-16 19:45:27 +00:00
form = request . form . to_dict ( )
2024-10-02 04:44:12 +00:00
logger . info ( f ' Params: \n ' + ' \n ' . join ( f ' { k } = { v [ : 100 ] } ' for k , v in form . items ( ) ) )
authed_as = form . pop ( ' authed_as ' , None )
internal = ( authed_as == common . PRIMARY_DOMAIN
or authed_as in common . PROTOCOL_DOMAINS )
if obj_key := form . get ( ' obj ' ) :
obj = ndb . Key ( urlsafe = obj_key ) . get ( )
else :
for json_prop in ' as2 ' , ' bsky ' , ' mf2 ' , ' our_as1 ' , ' raw ' :
if val := form . get ( json_prop ) :
form [ json_prop ] = json_loads ( val )
obj = Object ( * * form )
2023-09-19 02:19:59 +00:00
assert obj
2024-10-02 04:44:12 +00:00
assert obj . source_protocol
2023-10-17 18:59:50 +00:00
obj . new = True
2023-09-19 18:15:49 +00:00
try :
2024-04-25 19:39:04 +00:00
return PROTOCOLS [ obj . source_protocol ] . receive ( obj = obj , authed_as = authed_as ,
internal = internal )
2024-09-19 17:44:34 +00:00
except RequestException as e :
util . interpret_http_exception ( e )
error ( e , status = 304 )
2023-09-19 18:15:49 +00:00
except ValueError as e :
logger . warning ( e , exc_info = True )
error ( e , status = 304 )
2023-10-31 19:49:15 +00:00
@cloud_tasks_only
def send_task ( ) :
""" Task handler for sending an activity to a single specific destination.
Calls : meth : ` Protocol . send ` with the form parameters .
Parameters :
protocol ( str ) : : class : ` Protocol ` to send to
url ( str ) : destination URL to send to
obj ( url - safe google . cloud . ndb . key . Key ) : : class : ` models . Object ` to send
orig_obj ( url - safe google . cloud . ndb . key . Key ) : optional " original object "
: class : ` models . Object ` that this object refers to , eg replies to or
reposts or likes
2023-11-26 04:07:14 +00:00
user ( url - safe google . cloud . ndb . key . Key ) : : class : ` models . User ` ( actor )
this activity is from
2023-10-31 19:49:15 +00:00
"""
form = request . form . to_dict ( )
logger . info ( f ' Params: { list ( form . items ( ) ) } ' )
# prepare
2024-05-22 21:27:06 +00:00
url = form . get ( ' url ' )
protocol = form . get ( ' protocol ' )
if not url or not protocol :
logger . warning ( f ' Missing protocol or url; got { protocol } { url } ' )
return ' ' , 204
2023-10-31 19:49:15 +00:00
target = Target ( uri = url , protocol = protocol )
obj = ndb . Key ( urlsafe = form [ ' obj ' ] ) . get ( )
2024-07-05 04:11:38 +00:00
PROTOCOLS [ protocol ] . check_supported ( obj )
2024-09-17 01:27:04 +00:00
allow_opt_out = ( obj . type == ' delete ' )
2024-07-04 23:58:06 +00:00
2023-11-07 23:35:03 +00:00
if ( target not in obj . undelivered and target not in obj . failed
2024-03-13 04:41:05 +00:00
and ' force ' not in request . values ) :
2023-10-31 19:49:15 +00:00
logger . info ( f " { url } not in { obj . key . id ( ) } undelivered or failed, giving up " )
2023-11-12 21:21:35 +00:00
return r ' ¯ \ _(ツ)_/¯ ' , 204
2023-10-31 19:49:15 +00:00
2023-11-26 04:07:14 +00:00
user = None
2023-10-31 19:49:15 +00:00
if user_key := form . get ( ' user ' ) :
2024-09-12 20:03:51 +00:00
key = ndb . Key ( urlsafe = user_key )
# use get_by_id so that we follow use_instead
2024-09-17 01:27:04 +00:00
user = PROTOCOLS_BY_KIND [ key . kind ( ) ] . get_by_id ( key . id ( ) , allow_opt_out = allow_opt_out )
2024-09-12 20:03:51 +00:00
2023-10-31 19:49:15 +00:00
orig_obj = ( ndb . Key ( urlsafe = form [ ' orig_obj ' ] ) . get ( )
if form . get ( ' orig_obj ' ) else None )
# send
2024-09-18 00:51:20 +00:00
delay = ' '
if request . headers . get ( ' X-AppEngine-TaskRetryCount ' ) == ' 0 ' and obj . created :
2024-09-18 02:18:19 +00:00
delay_s = int ( ( util . now ( ) . replace ( tzinfo = None ) - obj . created ) . total_seconds ( ) )
delay = f ' ( { delay_s } s behind) '
2024-09-18 00:51:20 +00:00
logger . info ( f ' Sending { obj . source_protocol } { obj . type } { obj . key . id ( ) } to { protocol } { url } { delay } ' )
2024-07-25 21:44:15 +00:00
logger . debug ( f ' AS1: { json_dumps ( obj . as1 , indent = 2 ) } ' )
2023-10-31 19:49:15 +00:00
sent = None
try :
2023-11-26 04:07:14 +00:00
sent = PROTOCOLS [ protocol ] . send ( obj , url , from_user = user , orig_obj = orig_obj )
2023-10-31 19:49:15 +00:00
except BaseException as e :
code , body = util . interpret_http_exception ( e )
if not code and not body :
Protocol.send_task: only interpret HTTP exceptions as failures; re-raise others
this contributed to hiding the root cause of #1121. log:
```
Running send task 6251425858687873358
Params: [('obj', 'ahBicmlkZ3ktZmVkZXJhdGVkclYLEgZPYmplY3QiSmh0dHBzOi8vaW5kaWV3ZWIuc29jaWFsL3VzZXJzL3NuYXJmZWQvc3RhdHVzZXMvMTEyNTgyNzgwNzcwODQzNjQ1L2FjdGl2aXR5DA'), ('orig_obj', ''), ('protocol', 'atproto'), ('url', 'https://atproto.brid.gy'), ('user', 'ahBicmlkZ3ktZmVkZXJhdGVkcjYLEgtBY3Rpdml0eVB1YiIlaHR0cHM6Ly9pbmRpZXdlYi5zb2NpYWwvdXNlcnMvc25hcmZlZAw')]
Sending https://indieweb.social/users/snarfed/statuses/112582780770843645/activity AS1: ...
Using server at https://api.bsky.app
Determining protocol for id https://indieweb.social/users/snarfed/statuses/112582780770843645
Key('Object', 'https://indieweb.social/users/snarfed/statuses/112582780770843645') owned by source_protocol activitypub
Can't translate object id https://indieweb.social/users/snarfed/statuses/112582780770843645 to atproto , haven't copied it there yet!
Determining protocol for id https://indieweb.social/users/snarfed
Key('Object', 'https://indieweb.social/users/snarfed') owned by source_protocol activitypub
Using server at https://bsky.social/
"name 'link_length' is not defined
Traceback (most recent call last):
File "/workspace/protocol.py", line 1611, in send_task
sent = PROTOCOLS[protocol].send(obj, url, from_user=user, orig_obj=orig_obj)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspace/atproto.py", line 423, in send
record = to_cls.convert(base_obj, fetch_blobs=True, from_user=from_user)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspace/protocol.py", line 540, in convert
converted = cls._convert(obj, from_user=from_user, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspace/atproto.py", line 610, in _convert
ret = bluesky.from_as1(cls.translate_ids(obj.as1), blobs=blobs,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "granary/bluesky.py", line 577, in from_as1
text = src.truncate(full_text, None, OMIT_LINK)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "granary/source.py", line 908, in truncate
truncated = brevity.shorten(content, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "brevity.py", line 247, in shorten
base_length = total_length(tokens)
^^^^^^^^^^^^^^^^^^^^
File "brevity.py", line 188, in total_length
return sum(token_length(t) for t in tokens)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "brevity.py", line 188, in <genexpr>
return sum(token_length(t) for t in tokens)
^^^^^^^^^^^^^^^
File "brevity.py", line 183, in token_length
return link_length
^^^^^^^^^^^
NameError: name 'link_length' is not defined"
```
2024-06-08 20:20:34 +00:00
raise
2023-10-31 19:49:15 +00:00
2024-03-13 04:41:05 +00:00
if sent is False :
2024-08-02 15:02:36 +00:00
logger . info ( f ' Failed sending! ' )
2024-03-13 04:41:05 +00:00
2023-10-31 19:49:15 +00:00
# write results to Object
2024-07-16 20:58:43 +00:00
#
# retry aggressively because this has high contention during inbox delivery.
# (ndb does exponential backoff.)
# https://console.cloud.google.com/errors/detail/CJm_4sDv9O-iKg;time=P7D?project=bridgy-federated
@ndb.transactional ( retries = 10 )
2023-10-31 19:49:15 +00:00
def update_object ( obj_key ) :
obj = obj_key . get ( )
if target in obj . undelivered :
obj . remove ( ' undelivered ' , target )
if sent is None :
obj . add ( ' failed ' , target )
else :
if target in obj . failed :
obj . remove ( ' failed ' , target )
if sent :
obj . add ( ' delivered ' , target )
if not obj . undelivered :
obj . status = ( ' complete ' if obj . delivered
else ' failed ' if obj . failed
else ' ignored ' )
obj . put ( )
update_object ( obj . key )
2024-06-04 19:30:37 +00:00
return ' ' , 200 if sent else 204 if sent is False else 304