bridgy-fed/common.py

761 wiersze
26 KiB
Python
Czysty Zwykły widok Historia

# coding=utf-8
"""Misc common utilities.
"""
from base64 import b64encode
import copy
from datetime import timedelta, timezone
from hashlib import sha256
import itertools
import logging
import os
import re
2023-02-14 22:30:00 +00:00
import threading
import urllib.parse
2023-02-14 22:30:00 +00:00
from cachetools import cached, LRUCache
from flask import request
from granary import as1, as2, microformats2
from httpsig.requests_auth import HTTPSignatureAuth
import mf2util
from oauth_dropins.webutil import util, webmention
from oauth_dropins.webutil.flask_util import error
from oauth_dropins.webutil.appengine_info import DEBUG
from oauth_dropins.webutil.util import json_dumps, json_loads
import requests
from werkzeug.exceptions import BadGateway
from models import Follower, Object, Target, User
logger = logging.getLogger(__name__)
DOMAIN_RE = r'[^/:]+\.[^/:]+'
ACCT_RE = f'(?:acct:)?([^@]+)@({DOMAIN_RE})'
TLD_BLOCKLIST = ('7z', 'asp', 'aspx', 'gif', 'html', 'ico', 'jpg', 'jpeg', 'js',
'json', 'php', 'png', 'rar', 'txt', 'yaml', 'yml', 'zip')
XML_UTF8 = "<?xml version='1.0' encoding='UTF-8'?>\n"
LINK_HEADER_RE = re.compile(r""" *< *([^ >]+) *> *; *rel=['"]([^'"]+)['"] *""")
CONTENT_TYPE_LD_PLAIN = 'application/ld+json'
CONTENT_TYPE_HTML = 'text/html; charset=utf-8'
CONNEG_HEADERS_AS2_HTML = copy.deepcopy(as2.CONNEG_HEADERS)
2022-12-27 17:21:18 +00:00
CONNEG_HEADERS_AS2_HTML['Accept'] += f', {CONTENT_TYPE_HTML}; q=0.7'
2017-10-20 14:00:42 +00:00
SUPPORTED_VERBS = (
'checkin',
'create',
'follow',
'like',
'post',
'share',
'tag',
'update',
)
PRIMARY_DOMAIN = 'fed.brid.gy'
OTHER_DOMAINS = (
'bridgy-federated.appspot.com',
'bridgy-federated.uc.r.appspot.com',
)
LOCAL_DOMAINS = (
'localhost',
'localhost:8080',
'my.dev.com:8080',
)
DOMAINS = (PRIMARY_DOMAIN,) + OTHER_DOMAINS + LOCAL_DOMAINS
# TODO: unify with Bridgy's
DOMAIN_BLOCKLIST = frozenset((
# https://github.com/snarfed/bridgy-fed/issues/348
'aaronparecki.com',
'facebook.com',
'fb.com',
't.co',
'twitter.com',
) + DOMAINS)
_DEFAULT_SIGNATURE_USER = None
CACHE_TIME = timedelta(seconds=60)
PAGE_SIZE = 20
def host_url(path_query=None):
base = request.host_url
if (util.domain_or_parent_in(request.host, OTHER_DOMAINS) or
# when running locally against prod datastore
(not DEBUG and request.host in LOCAL_DOMAINS)):
base = f'https://{PRIMARY_DOMAIN}'
return urllib.parse.urljoin(base, path_query)
def default_signature_user():
global _DEFAULT_SIGNATURE_USER
if _DEFAULT_SIGNATURE_USER is None:
_DEFAULT_SIGNATURE_USER = User.get_or_create('snarfed.org')
return _DEFAULT_SIGNATURE_USER
def pretty_link(url, text=None, user=None):
"""Wrapper around util.pretty_link() that converts Mastodon user URLs to @-@.
Eg for URLs like https://mastodon.social/@foo and
https://mastodon.social/users/foo, defaults text to @foo@mastodon.social if
it's not provided.
Args:
url: str
text: str
user: :class:`User`, optional, user for the current request
"""
2023-02-12 20:03:27 +00:00
if user and user.is_homepage(url):
return user.user_page_link()
if text is None:
match = re.match(r'https?://([^/]+)/(@|users/)([^/]+)$', url)
if match:
text = match.expand(r'@\3@\1')
return util.pretty_link(url, text=text)
@cached(LRUCache(1000), key=lambda id, user=None: id, lock=threading.Lock())
2023-02-14 22:30:00 +00:00
def get_object(id, user=None):
"""Loads and returns an Object from memory cache, datastore, or HTTP fetch.
Note that :meth:`Object._post_put_hook` updates the cache.
Args:
id: str
user: optional, :class:`User` used to sign HTTP request, if necessary
Returns: Object, or None if it can't be fetched
"""
obj = Object.get_by_id(id)
if obj:
if obj.as2:
logger.info(f'Got Object from datastore: {id}')
2023-02-14 22:30:00 +00:00
return obj
else:
obj = Object(id=id)
2023-02-14 22:30:00 +00:00
logger.info(f'Object not in datastore or has no as2: {id}')
2023-02-14 22:30:00 +00:00
obj_as2 = get_as2(id, user=user).json()
obj.populate(as2=json_dumps(obj_as2),
2023-02-14 22:30:00 +00:00
as1=json_dumps(as2.to_as1(obj_as2)),
source_protocol='activitypub')
obj.put()
return obj
def signed_get(url, user, **kwargs):
return signed_request(util.requests_get, url, user, **kwargs)
def signed_post(url, user, **kwargs):
assert user
return signed_request(util.requests_post, url, user, **kwargs)
def signed_request(fn, url, user, data=None, log_data=True, headers=None, **kwargs):
"""Wraps requests.* and adds HTTP Signature.
Args:
fn: :func:`util.requests_get` or :func:`util.requests_get`
url: str
user: :class:`User` to sign request with
data: optional AS2 object
log_data: boolean, whether to log full data object
kwargs: passed through to requests
Returns: :class:`requests.Response`
"""
if headers is None:
headers = {}
# prepare HTTP Signature and headers
if not user:
user = default_signature_user()
if data:
if log_data:
logger.info(f'Sending AS2 object: {json_dumps(data, indent=2)}')
data = json_dumps(data).encode()
headers = copy.deepcopy(headers)
headers.update({
# required for HTTP Signature
# https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3
2023-01-03 23:41:43 +00:00
'Date': util.now().strftime('%a, %d %b %Y %H:%M:%S GMT'),
# required by Mastodon
# https://github.com/tootsuite/mastodon/pull/14556#issuecomment-674077648
'Host': util.domain_from_link(url, minimize=False),
'Content-Type': as2.CONTENT_TYPE,
# required for HTTP Signature and Mastodon
'Digest': f'SHA-256={b64encode(sha256(data or b"").digest()).decode()}',
})
domain = user.key.id()
logger.info(f"Signing with {domain}'s key")
key_id = host_url(domain)
# (request-target) is a special HTTP Signatures header that some fediverse
# implementations require, eg Peertube.
# https://datatracker.ietf.org/doc/html/draft-cavage-http-signatures-12#section-2.3
# https://github.com/snarfed/bridgy-fed/issues/40
auth = HTTPSignatureAuth(
secret=user.private_pem(), key_id=key_id, algorithm='rsa-sha256',
sign_header='signature',
headers=('Date', 'Host', 'Digest', '(request-target)'))
# make HTTP request
kwargs.setdefault('gateway', True)
resp = fn(url, data=data, auth=auth, headers=headers, allow_redirects=False,
**kwargs)
logger.info(f'Got {resp.status_code} headers: {resp.headers}')
# handle GET redirects manually so that we generate a new HTTP signature
if resp.is_redirect and fn == util.requests_get:
return signed_request(fn, resp.headers['Location'], data=data, user=user,
headers=headers, log_data=log_data, **kwargs)
type = content_type(resp)
if (type and type != 'text/html' and
2017-10-24 14:30:33 +00:00
(type.startswith('text/') or type.endswith('+json') or type.endswith('/json'))):
logger.info(resp.text)
return resp
def get_as2(url, user=None):
2017-10-20 14:00:42 +00:00
"""Tries to fetch the given URL as ActivityStreams 2.
Uses HTTP content negotiation via the Content-Type header. If the url is
HTML and it has a rel-alternate link with an AS2 content type, fetches and
returns that URL.
Includes an HTTP Signature with the request.
https://w3c.github.io/activitypub/#authorization
https://tools.ietf.org/html/draft-cavage-http-signatures-07
https://github.com/mastodon/mastodon/pull/11269
Mastodon requires this signature if AUTHORIZED_FETCH aka secure mode is on:
https://docs.joinmastodon.org/admin/config/#authorized_fetch
If user is not provided, defaults to using @snarfed.org@snarfed.org's key.
2017-10-20 14:00:42 +00:00
Args:
url: string
user: :class:`User` used to sign request
2017-10-20 14:00:42 +00:00
Returns:
:class:`requests.Response`
2017-10-20 14:00:42 +00:00
Raises:
:class:`requests.HTTPError`, :class:`werkzeug.exceptions.HTTPException`
If we raise a werkzeug HTTPException, it will have an additional
requests_response attribute with the last requests.Response we received.
2017-10-20 14:00:42 +00:00
"""
def _error(resp):
msg = f"Couldn't fetch {url} as ActivityStreams 2"
logger.warning(msg)
err = BadGateway(msg)
err.requests_response = resp
raise err
2017-10-20 14:00:42 +00:00
resp = signed_get(url, user=user, headers=CONNEG_HEADERS_AS2_HTML)
if content_type(resp) in (as2.CONTENT_TYPE, CONTENT_TYPE_LD_PLAIN):
return resp
2017-10-20 14:00:42 +00:00
parsed = util.parse_html(resp)
obj = parsed.find('link', rel=('alternate', 'self'), type=(
as2.CONTENT_TYPE, as2.CONTENT_TYPE_LD))
if not (obj and obj['href']):
_error(resp)
2017-10-20 14:00:42 +00:00
resp = signed_get(urllib.parse.urljoin(resp.url, obj['href']),
user=user, headers=as2.CONNEG_HEADERS)
if content_type(resp) in (as2.CONTENT_TYPE, CONTENT_TYPE_LD_PLAIN):
return resp
2017-10-20 14:00:42 +00:00
_error(resp)
2017-10-20 14:00:42 +00:00
def content_type(resp):
"""Returns a :class:`requests.Response`'s Content-Type, without charset suffix."""
type = resp.headers.get('Content-Type')
if type:
return type.split(';')[0]
def remove_blocklisted(urls):
"""Returns the subset of input URLs that aren't in our domain blocklist.
Args:
urls: sequence of str
Returns: list of str
"""
return [u for u in urls if not util.domain_or_parent_in(
util.domain_from_link(u), DOMAIN_BLOCKLIST)]
def send_webmentions(activity_wrapped, proxy=None, **object_props):
"""Sends webmentions for an incoming ActivityPub inbox delivery.
2021-07-09 05:50:33 +00:00
Args:
activity_wrapped: dict, AS1 activity
object_props: passed through to the newly created Object entities
Returns: boolean, True if any webmentions were sent, False otherwise
2021-07-09 05:50:33 +00:00
"""
activity = redirect_unwrap(activity_wrapped)
verb = activity.get('verb')
if verb and verb not in SUPPORTED_VERBS:
error(f'{verb} activities are not supported yet.', status=501)
2021-07-09 05:50:33 +00:00
# extract source and targets
source = activity.get('url') or activity.get('id')
obj = activity.get('object')
obj_url = util.get_url(obj)
targets = util.get_list(activity, 'inReplyTo')
if isinstance(obj, dict):
if not source or verb in ('create', 'post', 'update'):
source = obj_url or obj.get('id')
targets.extend(util.get_list(obj, 'inReplyTo'))
if not source:
error("Couldn't find original post URL")
2021-07-09 05:50:33 +00:00
tags = util.get_list(activity_wrapped, 'tags')
obj_wrapped = activity_wrapped.get('object')
if isinstance(obj_wrapped, dict):
tags.extend(util.get_list(obj_wrapped, 'tags'))
for tag in tags:
if tag.get('objectType') == 'mention':
url = tag.get('url')
if url and url.startswith(host_url()):
2021-07-09 05:50:33 +00:00
targets.append(redirect_unwrap(url))
if verb in ('follow', 'like', 'share'):
targets.append(obj_url)
2021-07-09 05:50:33 +00:00
targets = util.dedupe_urls(util.get_url(t) for t in targets)
targets = remove_blocklisted(t.lower() for t in targets)
2021-07-09 05:50:33 +00:00
if not targets:
logger.info("Couldn't find any IndieWeb target URLs in inReplyTo, object, or mention tags")
return False
logger.info(f'targets: {targets}')
2021-07-09 05:50:33 +00:00
# send webmentions and store Objects
errors = [] # stores (code, body) tuples
domains = []
targets = [Target(uri=uri, protocol='activitypub') for uri in targets]
obj = Object(id=source, labels=['notification'], undelivered=targets,
status='in progress', **object_props)
if activity.get('objectType') == 'activity':
obj.labels.append('activity')
obj.put()
while obj.undelivered:
target = obj.undelivered.pop()
domain = util.domain_from_link(target.uri, minimize=False)
if domain == util.domain_from_link(source, minimize=False):
logger.info(f'Skipping same-domain webmention from {source} to {target.uri}')
2021-07-09 05:50:33 +00:00
continue
if domain not in obj.domains:
obj.domains.append(domain)
wm_source = (obj.proxy_url()
2021-07-09 05:50:33 +00:00
if verb in ('follow', 'like', 'share') or proxy
else source)
logger.info(f'Sending webmention from {wm_source} to {target.uri}')
2021-07-09 05:50:33 +00:00
try:
endpoint = webmention.discover(target.uri).endpoint
2021-07-09 05:50:33 +00:00
if endpoint:
webmention.send(endpoint, wm_source, target.uri)
logger.info('Success!')
obj.delivered.append(target)
else:
logger.info('No webmention endpoint')
2021-07-09 05:50:33 +00:00
except BaseException as e:
code, body = util.interpret_http_exception(e)
if not code and not body:
raise
errors.append((code, body))
obj.failed.append(target)
obj.put()
2021-07-09 05:50:33 +00:00
obj.status = 'complete' if obj.delivered else 'failed' if obj.failed else 'ignored'
obj.put()
2021-07-09 05:50:33 +00:00
if errors:
msg = 'Errors: ' + ', '.join(f'{code} {body}' for code, body in errors)
error(msg, status=int(errors[0][0] or 502))
return True
def postprocess_as2(activity, user=None, target=None, create=True):
2021-07-08 04:02:13 +00:00
"""Prepare an AS2 object to be served or sent via ActivityPub.
2021-07-08 04:02:13 +00:00
Args:
activity: dict, AS2 object or activity
2022-11-16 06:00:28 +00:00
user: :class:`User`, required. populated into actor.id and
publicKey fields if needed.
2021-07-08 04:02:13 +00:00
target: dict, AS2 object, optional. The target of activity's inReplyTo or
Like/Announce/etc object, if any.
create: boolean, whether to wrap `Note` and `Article` objects in a
`Create` activity
2021-07-08 04:02:13 +00:00
"""
2022-11-16 06:00:28 +00:00
assert user
2021-07-08 04:02:13 +00:00
type = activity.get('type')
# actor objects
if type == 'Person':
2022-11-16 06:00:28 +00:00
postprocess_as2_actor(activity, user)
if not activity.get('publicKey'):
2021-07-08 04:02:13 +00:00
# underspecified, inferred from this issue and Mastodon's implementation:
# https://github.com/w3c/activitypub/issues/203#issuecomment-297553229
# https://github.com/tootsuite/mastodon/blob/bc2c263504e584e154384ecc2d804aeb1afb1ba3/app/services/activitypub/process_account_service.rb#L77
actor_url = host_url(activity.get('preferredUsername'))
2021-07-08 04:02:13 +00:00
activity.update({
'publicKey': {
'id': actor_url,
'owner': actor_url,
2022-11-16 06:00:28 +00:00
'publicKeyPem': user.public_pem().decode(),
2021-07-08 04:02:13 +00:00
},
'@context': (util.get_list(activity, '@context') +
['https://w3id.org/security/v1']),
})
return activity
for actor in (util.get_list(activity, 'attributedTo') +
util.get_list(activity, 'actor')):
2022-11-16 06:00:28 +00:00
postprocess_as2_actor(actor, user)
2021-07-08 04:02:13 +00:00
# inReplyTo: singly valued, prefer id over url
target_id = target.get('id') if target else None
in_reply_to = activity.get('inReplyTo')
if in_reply_to:
if target_id:
activity['inReplyTo'] = target_id
elif isinstance(in_reply_to, list):
if len(in_reply_to) > 1:
logger.warning(
2021-07-08 04:02:13 +00:00
"AS2 doesn't support multiple inReplyTo URLs! "
f'Only using the first: {in_reply_to[0]}')
2021-07-08 04:02:13 +00:00
activity['inReplyTo'] = in_reply_to[0]
# Mastodon evidently requires a Mention tag for replies to generate a
# notification to the original post's author. not required for likes,
# reposts, etc. details:
# https://github.com/snarfed/bridgy-fed/issues/34
if target:
for to in (util.get_list(target, 'attributedTo') +
util.get_list(target, 'actor')):
if isinstance(to, dict):
to = util.get_first(to, 'url') or to.get('id')
2021-07-08 04:02:13 +00:00
if to:
activity.setdefault('tag', []).append({
'type': 'Mention',
'href': to,
})
# activity objects (for Like, Announce, etc): prefer id over url
obj = activity.get('object')
if obj:
if isinstance(obj, dict) and not obj.get('id'):
obj['id'] = target_id or util.get_first(obj, 'url')
2021-07-08 04:02:13 +00:00
elif target_id and obj != target_id:
activity['object'] = target_id
# id is required for most things. default to url if it's not set.
if not activity.get('id'):
activity['id'] = util.get_first(activity, 'url')
2021-07-08 04:02:13 +00:00
# TODO: find a better way to check this, sometimes or always?
# removed for now since it fires on posts without u-id or u-url, eg
# https://chrisbeckstrom.com/2018/12/27/32551/
# assert activity.get('id') or (isinstance(obj, dict) and obj.get('id'))
activity['id'] = redirect_wrap(activity.get('id'))
activity['url'] = [redirect_wrap(u) for u in util.get_list(activity, 'url')]
if len(activity['url']) == 1:
activity['url'] = activity['url'][0]
2021-07-08 04:02:13 +00:00
# copy image(s) into attachment(s). may be Mastodon-specific.
# https://github.com/snarfed/bridgy-fed/issues/33#issuecomment-440965618
obj_or_activity = obj if isinstance(obj, dict) else activity
img = util.get_list(obj_or_activity, 'image')
if img:
obj_or_activity.setdefault('attachment', []).extend(img)
2021-07-08 04:02:13 +00:00
# cc target's author(s) and recipients
2021-07-08 04:02:13 +00:00
# https://www.w3.org/TR/activitystreams-vocabulary/#audienceTargeting
# https://w3c.github.io/activitypub/#delivery
if target and (type in as2.TYPE_TO_VERB or type in ('Article', 'Note')):
recips = itertools.chain(*(util.get_list(target, field) for field in
('actor', 'attributedTo', 'to', 'cc')))
2021-07-08 04:02:13 +00:00
activity['cc'] = util.dedupe_urls(util.get_url(recip) or recip.get('id')
for recip in recips)
# to public, since Mastodon interprets to public as public, cc public as unlisted:
# https://socialhub.activitypub.rocks/t/visibility-to-cc-mapping/284
# https://wordsmith.social/falkreon/securing-activitypub
to = activity.setdefault('to', [])
if as2.PUBLIC_AUDIENCE not in to:
to.append(as2.PUBLIC_AUDIENCE)
2021-07-08 04:02:13 +00:00
# wrap articles and notes in a Create activity
if create and type in ('Article', 'Note'):
2021-07-08 04:02:13 +00:00
activity = {
'@context': as2.CONTEXT,
'type': 'Create',
'id': f'{activity["id"]}#bridgy-fed-create',
2022-11-16 06:00:28 +00:00
'actor': postprocess_as2_actor({}, user),
2021-07-08 04:02:13 +00:00
'object': activity,
}
return util.trim_nulls(activity)
2022-11-16 06:00:28 +00:00
def postprocess_as2_actor(actor, user=None):
2021-07-08 04:02:13 +00:00
"""Prepare an AS2 actor object to be served or sent via ActivityPub.
Modifies actor in place.
2021-07-08 04:02:13 +00:00
Args:
actor: dict, AS2 actor object
2022-11-16 06:00:28 +00:00
user: :class:`User`
Returns:
actor dict
2021-07-08 04:02:13 +00:00
"""
2023-02-12 20:03:27 +00:00
url = user.homepage if user else None
urls = util.get_list(actor, 'url')
if not urls and url:
urls = [url]
domain = util.domain_from_link(urls[0], minimize=False)
urls[0] = redirect_wrap(urls[0])
actor.setdefault('id', host_url(domain))
actor.update({
'url': urls if len(urls) > 1 else urls[0],
# This has to be the domain for Mastodon interop/Webfinger discovery!
# See related comment in actor() below.
2022-11-16 06:00:28 +00:00
'preferredUsername': domain,
})
# Override the label for their home page to be "Web site"
for att in util.get_list(actor, 'attachment'):
if att.get('type') == 'PropertyValue':
val = att.get('value', '')
link = util.parse_html(val).find('a')
if url and (val == url or link.get('href') == url):
att['name'] = 'Web site'
2021-07-08 04:02:13 +00:00
# required by pixelfed. https://github.com/snarfed/bridgy-fed/issues/39
actor.setdefault('summary', '')
return actor
2021-07-08 04:02:13 +00:00
def redirect_wrap(url):
"""Returns a URL on our domain that redirects to this URL.
2021-07-08 04:02:13 +00:00
...to satisfy Mastodon's non-standard domain matching requirement. :(
2021-07-08 04:02:13 +00:00
Args:
url: string
2021-07-08 04:02:13 +00:00
https://github.com/snarfed/bridgy-fed/issues/16#issuecomment-424799599
https://github.com/tootsuite/mastodon/pull/6219#issuecomment-429142747
2021-07-08 04:02:13 +00:00
Returns: string, redirect url
"""
if not url:
return url
prefix = host_url('/r/')
2021-07-08 04:02:13 +00:00
if url.startswith(prefix):
return url
2021-07-08 04:02:13 +00:00
return prefix + url
2021-07-08 04:02:13 +00:00
def redirect_unwrap(val):
"""Removes our redirect wrapping from a URL, if it's there.
val may be a string, dict, or list. dicts and lists are unwrapped
2021-07-08 04:02:13 +00:00
recursively.
Strings that aren't wrapped URLs are left unchanged.
Args:
val: string or dict or list
2021-07-08 04:02:13 +00:00
Returns: string, unwrapped url
"""
if isinstance(val, dict):
return {k: redirect_unwrap(v) for k, v in val.items()}
2021-07-08 04:02:13 +00:00
elif isinstance(val, list):
return [redirect_unwrap(v) for v in val]
2021-07-08 04:02:13 +00:00
elif isinstance(val, str):
prefix = host_url('/r/')
2021-07-08 04:02:13 +00:00
if val.startswith(prefix):
unwrapped = val.removeprefix(prefix)
if util.is_web(unwrapped):
return util.follow_redirects(unwrapped).url
elif val.startswith(host_url()):
path = val.removeprefix(host_url())
if re.match(DOMAIN_RE, path):
return util.follow_redirects(path).url
2021-07-08 04:02:13 +00:00
return val
def actor(user):
"""Fetches a home page, converts its representative h-card to AS2 actor.
Args:
user: :class:`User`
Returns: (dict mf2 item, dict AS1 actor, dict AS2 actor)
"""
assert user
domain = user.key.id()
try:
2023-02-12 20:03:27 +00:00
mf2 = util.fetch_mf2(user.homepage, gateway=True)
except ValueError as e:
2023-02-12 20:03:27 +00:00
error(f"Couldn't fetch {user.homepage}: {e}")
hcard = mf2util.representative_hcard(mf2, mf2['url'])
logger.info(f'Representative h-card: {json_dumps(hcard, indent=2)}')
if not hcard:
error(f"Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {mf2['url']}")
actor_as1 = microformats2.json_to_object(hcard, rel_urls=mf2.get('rel-urls'))
actor_as2 = postprocess_as2(as2.from_as1(actor_as1), user=user)
# TODO: unify with activitypub.actor()
actor_as2.update({
'id': host_url(domain),
# This has to be the domain for Mastodon etc interop! It seems like it
# should be the custom username from the acct: u-url in their h-card,
# but that breaks Mastodon's Webfinger discovery. Background:
# https://github.com/snarfed/bridgy-fed/issues/302#issuecomment-1324305460
# https://github.com/snarfed/bridgy-fed/issues/77
'preferredUsername': domain,
'inbox': host_url(f'{domain}/inbox'),
'outbox': host_url(f'{domain}/outbox'),
'following': host_url(f'{domain}/following'),
'followers': host_url(f'{domain}/followers'),
'endpoints': {
'sharedInbox': host_url('inbox'),
},
})
logger.info(f'Generated AS2 actor: {json_dumps(actor_as2, indent=2)}')
return hcard, actor_as1, actor_as2
def fetch_followers(domain, collection):
"""Fetches a page of Follower entities.
Wraps :func:`common.fetch_page`. Paging uses the `before` and `after` query
parameters, if available in the request.
Args:
domain: str, user to fetch entities for
collection, str, 'followers' or 'following'
Returns:
(results, new_before, new_after) tuple with:
results: list of Follower entities
new_before, new_after: str query param values for `before` and `after`
to fetch the previous and next pages, respectively
"""
assert collection in ('followers', 'following'), collection
domain_prop = Follower.dest if collection == 'followers' else Follower.src
query = Follower.query(
Follower.status == 'active',
domain_prop == domain,
).order(-Follower.updated)
return fetch_page(query, Follower)
def fetch_page(query, model_class):
"""Fetches a page of results from a datastore query.
Uses the `before` and `after` query params (if provided; should be ISO8601
timestamps) and the queried model class's `updated` property to identify the
page to fetch.
Populates a `log_url_path` property on each result entity that points to a
its most recent logged request.
Args:
query: :class:`ndb.Query`
model_class: ndb model class
Returns:
(results, new_before, new_after) tuple with:
results: list of query result entities
new_before, new_after: str query param values for `before` and `after`
to fetch the previous and next pages, respectively
"""
# if there's a paging param ('before' or 'after'), update query with it
# TODO: unify this with Bridgy's user page
def get_paging_param(param):
val = request.values.get(param)
if val:
try:
dt = util.parse_iso8601(val.replace(' ', '+'))
except BaseException as e:
error(f"Couldn't parse {param}, {val!r} as ISO8601: {e}")
if dt.tzinfo:
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
return dt
before = get_paging_param('before')
after = get_paging_param('after')
if before and after:
error("can't handle both before and after")
elif after:
query = query.filter(model_class.updated >= after).order(model_class.updated)
elif before:
query = query.filter(model_class.updated < before).order(-model_class.updated)
else:
query = query.order(-model_class.updated)
query_iter = query.iter()
results = sorted(itertools.islice(query_iter, 0, PAGE_SIZE),
key=lambda r: r.updated, reverse=True)
# calculate new paging param(s)
has_next = results and query_iter.probably_has_next()
new_after = (
before if before
else results[0].updated if has_next and after
else None)
if new_after:
new_after = new_after.isoformat()
new_before = (
after if after else
results[-1].updated if has_next
else None)
if new_before:
new_before = new_before.isoformat()
return results, new_before, new_after