start to separate logic from protocols with new Protocol/ActivityPub classes

for #388
pull/448/head
Ryan Barrett 2023-03-08 13:10:41 -08:00
rodzic c2833e59d1
commit ffd8810b44
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 6BE31FDF4776E9D4
16 zmienionych plików z 1293 dodań i 1158 usunięć

Wyświetl plik

@ -1,60 +1,483 @@
"""Handles requests for ActivityPub endpoints: actors, inbox, etc.
"""
from base64 import b64encode
import datetime
from hashlib import sha256
import itertools
import logging
import re
import threading
from cachetools import LRUCache
from flask import abort, make_response, request
from google.cloud import ndb
from google.cloud.ndb import OR
from flask import request
from granary import as1, as2
from httpsig import HeaderVerifier
from httpsig.requests_auth import HTTPSignatureAuth
from httpsig.utils import parse_signature_header
from oauth_dropins.webutil import flask_util, util
from oauth_dropins.webutil.util import json_dumps, json_loads
import requests
from werkzeug.exceptions import BadGateway
from app import app, cache
import common
from common import CACHE_TIME, host_url, redirect_unwrap, redirect_wrap, TLD_BLOCKLIST
from common import (
CACHE_TIME,
CONTENT_TYPE_HTML,
error,
host_url,
redirect_unwrap,
redirect_wrap,
TLD_BLOCKLIST,
)
from models import Follower, Object, Target, User
from protocol import Protocol
logger = logging.getLogger(__name__)
SUPPORTED_TYPES = ( # AS1
'accept',
'article',
'audio',
'comment',
'create',
'delete',
'follow',
'image',
'like',
'note',
'post',
'share',
'stop-following',
'undo',
'update',
'video',
)
FETCH_OBJECT_TYPES = (
'share',
)
CONNEG_HEADERS_AS2_HTML = {
'Accept': f'{as2.CONNEG_HEADERS["Accept"]}, {CONTENT_TYPE_HTML}; q=0.7'
}
# activity ids that we've already handled and can now ignore
seen_ids = LRUCache(100000)
seen_ids_lock = threading.Lock()
HTTP_SIG_HEADERS = ('Date', 'Host', 'Digest', '(request-target)')
_DEFAULT_SIGNATURE_USER = None
def default_signature_user():
global _DEFAULT_SIGNATURE_USER
if _DEFAULT_SIGNATURE_USER is None:
_DEFAULT_SIGNATURE_USER = User.get_or_create('snarfed.org')
return _DEFAULT_SIGNATURE_USER
def error(msg, status=400):
"""Like flask_util.error, but wraps body in JSON."""
logger.info(f'Returning {status}: {msg}')
abort(status, response=make_response({'error': msg}, status))
class ActivityPub(Protocol):
"""ActivityPub protocol class."""
LABEL = 'activitypub'
@classmethod
def send(cls, url, activity, *, user=None):
"""Sends an outgoing activity.
To be implemented by subclasses.
Args:
url: str, destination URL to send to
activity: dict, AS1 activity to send
user: :class:`User` this is on behalf of
Raises:
:class:`werkzeug.HTTPException` if the request fails
"""
raise NotImplementedError()
@classmethod
def fetch(cls, id, obj, *, user=None):
"""Tries to fetch an AS2 object and populate it into an :class:`Object`.
Uses HTTP content negotiation via the Content-Type header. If the url is
HTML and it has a rel-alternate link with an AS2 content type, fetches and
returns that URL.
Includes an HTTP Signature with the request.
https://w3c.github.io/activitypub/#authorization
https://tools.ietf.org/html/draft-cavage-http-signatures-07
https://github.com/mastodon/mastodon/pull/11269
Mastodon requires this signature if AUTHORIZED_FETCH aka secure mode is on:
https://docs.joinmastodon.org/admin/config/#authorized_fetch
Signs the request with the given user. If not provided, defaults to
using @snarfed.org@snarfed.org's key.
Args:
id: str, object's URL id
obj: :class:`Object` to populate the fetched object into
user: optional :class:`User` we're fetching on behalf of
Raises:
:class:`requests.HTTPError`, :class:`werkzeug.exceptions.HTTPException`
If we raise a werkzeug HTTPException, it will have an additional
requests_response attribute with the last requests.Response we received.
"""
def _error(resp, extra_msg=None):
msg = f"Couldn't fetch {id} as ActivityStreams 2"
if extra_msg:
msg += ': ' + extra_msg
logger.warning(msg)
err = BadGateway(msg)
err.requests_response = resp
raise err
def _get(url, headers):
"""Returns None if we fetched and populated, resp otherwise."""
resp = signed_get(url, user=user, headers=headers, gateway=True)
if not resp.content:
_error(resp, 'empty response')
elif common.content_type(resp) == as2.CONTENT_TYPE:
try:
obj.as2 = resp.json()
return
except requests.JSONDecodeError:
_error(resp, "Couldn't decode as JSON")
return resp
resp = _get(id, CONNEG_HEADERS_AS2_HTML)
if resp is None:
return
# look in HTML to find AS2 link
if common.content_type(resp) != 'text/html':
_error(resp, 'no AS2 available')
parsed = util.parse_html(resp)
link = parsed.find('link', rel=('alternate', 'self'), type=(
as2.CONTENT_TYPE, as2.CONTENT_TYPE_LD))
if not (link and link['href']):
_error(resp, 'no AS2 available')
resp = _get(link['href'], as2.CONNEG_HEADERS)
if resp is not None:
_error(resp)
@classmethod
def verify_signature(cls, user):
"""Verifies the current request's HTTP Signature.
Args:
user: :class:`User`
Logs details of the result. Raises :class:`werkzeug.HTTPSignature` if the
signature is missing or invalid, otherwise does nothing and returns None.
"""
sig = request.headers.get('Signature')
if not sig:
error('No HTTP Signature', status=401)
logger.info(f'Headers: {json_dumps(dict(request.headers), indent=2)}')
# parse_signature_header lower-cases all keys
keyId = parse_signature_header(sig).get('keyid')
if not keyId:
error('HTTP Signature missing keyId', status=401)
digest = request.headers.get('Digest') or ''
if not digest:
error('Missing Digest header, required for HTTP Signature', status=401)
expected = b64encode(sha256(request.data).digest()).decode()
if digest.removeprefix('SHA-256=') != expected:
error('Invalid Digest header, required for HTTP Signature', status=401)
key_actor = cls.get_object(keyId, user=user).as2
key = key_actor.get("publicKey", {}).get('publicKeyPem')
logger.info(f'Verifying signature for {request.path} with key {key}')
try:
verified = HeaderVerifier(request.headers, key,
required_headers=['Digest'],
method=request.method,
path=request.path,
sign_header='signature').verify()
except BaseException as e:
error(f'HTTP Signature verification failed: {e}', status=401)
if verified:
logger.info('HTTP Signature verified!')
else:
error('HTTP Signature verification failed', status=401)
@classmethod
def accept_follow(cls, obj, user):
"""Replies to an AP Follow request with an Accept request.
TODO: move to Protocol
Args:
obj: :class:`Object`
user: :class:`User`
"""
logger.info('Replying to Follow with Accept')
followee = obj.as2.get('object')
followee_id = followee.get('id') if isinstance(followee, dict) else followee
follower = obj.as2.get('actor')
if not followee or not followee_id or not follower:
error(f'Follow activity requires object and actor. Got: {follow}')
inbox = follower.get('inbox')
follower_id = follower.get('id')
if not inbox or not follower_id:
error(f'Follow actor requires id and inbox. Got: {follower}')
# rendered mf2 HTML proxy pages (in render.py) fall back to redirecting to
# the follow's AS2 id field, but Mastodon's ids are URLs that don't load in
# browsers, eg https://jawns.club/ac33c547-ca6b-4351-80d5-d11a6879a7b0
# so, set a synthetic URL based on the follower's profile.
# https://github.com/snarfed/bridgy-fed/issues/336
follower_url = util.get_url(follower) or follower_id
followee_url = util.get_url(followee) or followee_id
obj.as2.setdefault('url', f'{follower_url}#followed-{followee_url}')
# store Follower
follower_obj = Follower.get_or_create(
dest=user.key.id(), src=follower_id, last_follow=obj.as2)
follower_obj.status = 'active'
follower_obj.put()
# send AP Accept
followee_actor_url = common.host_url(user.key.id())
accept = {
'@context': 'https://www.w3.org/ns/activitystreams',
'id': util.tag_uri(common.PRIMARY_DOMAIN,
f'accept/{user.key.id()}/{obj.key.id()}'),
'type': 'Accept',
'actor': followee_actor_url,
'object': {
'type': 'Follow',
'actor': follower_id,
'object': followee_actor_url,
}
}
# TODO: generic send()
return signed_post(inbox, data=accept, user=user)
def signed_get(url, user, **kwargs):
return signed_request(util.requests_get, url, user, **kwargs)
def signed_post(url, user, **kwargs):
assert user
return signed_request(util.requests_post, url, user, **kwargs)
def signed_request(fn, url, user, data=None, log_data=True, headers=None, **kwargs):
"""Wraps requests.* and adds HTTP Signature.
Args:
fn: :func:`util.requests_get` or :func:`util.requests_get`
url: str
user: :class:`User` to sign request with
data: optional AS2 object
log_data: boolean, whether to log full data object
kwargs: passed through to requests
Returns: :class:`requests.Response`
"""
if headers is None:
headers = {}
# prepare HTTP Signature and headers
if not user:
user = default_signature_user()
if data:
if log_data:
logger.info(f'Sending AS2 object: {json_dumps(data, indent=2)}')
data = json_dumps(data).encode()
headers = {
**headers,
# required for HTTP Signature
# https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3
'Date': util.now().strftime('%a, %d %b %Y %H:%M:%S GMT'),
# required by Mastodon
# https://github.com/tootsuite/mastodon/pull/14556#issuecomment-674077648
'Host': util.domain_from_link(url, minimize=False),
'Content-Type': as2.CONTENT_TYPE,
# required for HTTP Signature and Mastodon
'Digest': f'SHA-256={b64encode(sha256(data or b"").digest()).decode()}',
}
domain = user.key.id()
logger.info(f"Signing with {domain}'s key")
key_id = host_url(domain)
# (request-target) is a special HTTP Signatures header that some fediverse
# implementations require, eg Peertube.
# https://datatracker.ietf.org/doc/html/draft-cavage-http-signatures-12#section-2.3
# https://github.com/snarfed/bridgy-fed/issues/40
auth = HTTPSignatureAuth(secret=user.private_pem(), key_id=key_id,
algorithm='rsa-sha256', sign_header='signature',
headers=HTTP_SIG_HEADERS)
# make HTTP request
kwargs.setdefault('gateway', True)
resp = fn(url, data=data, auth=auth, headers=headers, allow_redirects=False,
**kwargs)
logger.info(f'Got {resp.status_code} headers: {resp.headers}')
# handle GET redirects manually so that we generate a new HTTP signature
if resp.is_redirect and fn == util.requests_get:
return signed_request(fn, resp.headers['Location'], data=data, user=user,
headers=headers, log_data=log_data, **kwargs)
type = common.content_type(resp)
if (type and type != 'text/html' and
(type.startswith('text/') or type.endswith('+json') or type.endswith('/json'))):
logger.info(resp.text)
return resp
def postprocess_as2(activity, user=None, target=None, create=True):
"""Prepare an AS2 object to be served or sent via ActivityPub.
Args:
activity: dict, AS2 object or activity
user: :class:`User`, required. populated into actor.id and
publicKey fields if needed.
target: dict, AS2 object, optional. The target of activity's inReplyTo or
Like/Announce/etc object, if any.
create: boolean, whether to wrap `Note` and `Article` objects in a
`Create` activity
"""
assert user
type = activity.get('type')
# actor objects
if type == 'Person':
postprocess_as2_actor(activity, user)
if not activity.get('publicKey'):
# underspecified, inferred from this issue and Mastodon's implementation:
# https://github.com/w3c/activitypub/issues/203#issuecomment-297553229
# https://github.com/tootsuite/mastodon/blob/bc2c263504e584e154384ecc2d804aeb1afb1ba3/app/services/activitypub/process_account_service.rb#L77
actor_url = host_url(activity.get('preferredUsername'))
activity.update({
'publicKey': {
'id': actor_url,
'owner': actor_url,
'publicKeyPem': user.public_pem().decode(),
},
'@context': (util.get_list(activity, '@context') +
['https://w3id.org/security/v1']),
})
return activity
for actor in (util.get_list(activity, 'attributedTo') +
util.get_list(activity, 'actor')):
postprocess_as2_actor(actor, user)
# inReplyTo: singly valued, prefer id over url
target_id = target.get('id') if target else None
in_reply_to = activity.get('inReplyTo')
if in_reply_to:
if target_id:
activity['inReplyTo'] = target_id
elif isinstance(in_reply_to, list):
if len(in_reply_to) > 1:
logger.warning(
"AS2 doesn't support multiple inReplyTo URLs! "
f'Only using the first: {in_reply_to[0]}')
activity['inReplyTo'] = in_reply_to[0]
# Mastodon evidently requires a Mention tag for replies to generate a
# notification to the original post's author. not required for likes,
# reposts, etc. details:
# https://github.com/snarfed/bridgy-fed/issues/34
if target:
for to in (util.get_list(target, 'attributedTo') +
util.get_list(target, 'actor')):
if isinstance(to, dict):
to = util.get_first(to, 'url') or to.get('id')
if to:
activity.setdefault('tag', []).append({
'type': 'Mention',
'href': to,
})
# activity objects (for Like, Announce, etc): prefer id over url
obj = activity.get('object')
if obj:
if isinstance(obj, dict) and not obj.get('id'):
obj['id'] = target_id or util.get_first(obj, 'url')
elif target_id and obj != target_id:
activity['object'] = target_id
# id is required for most things. default to url if it's not set.
if not activity.get('id'):
activity['id'] = util.get_first(activity, 'url')
# TODO: find a better way to check this, sometimes or always?
# removed for now since it fires on posts without u-id or u-url, eg
# https://chrisbeckstrom.com/2018/12/27/32551/
# assert activity.get('id') or (isinstance(obj, dict) and obj.get('id'))
activity['id'] = redirect_wrap(activity.get('id'))
activity['url'] = [redirect_wrap(u) for u in util.get_list(activity, 'url')]
if len(activity['url']) == 1:
activity['url'] = activity['url'][0]
# copy image(s) into attachment(s). may be Mastodon-specific.
# https://github.com/snarfed/bridgy-fed/issues/33#issuecomment-440965618
obj_or_activity = obj if isinstance(obj, dict) else activity
img = util.get_list(obj_or_activity, 'image')
if img:
obj_or_activity.setdefault('attachment', []).extend(img)
# cc target's author(s) and recipients
# https://www.w3.org/TR/activitystreams-vocabulary/#audienceTargeting
# https://w3c.github.io/activitypub/#delivery
if target and (type in as2.TYPE_TO_VERB or type in ('Article', 'Note')):
recips = itertools.chain(*(util.get_list(target, field) for field in
('actor', 'attributedTo', 'to', 'cc')))
activity['cc'] = util.dedupe_urls(util.get_url(recip) or recip.get('id')
for recip in recips)
# to public, since Mastodon interprets to public as public, cc public as unlisted:
# https://socialhub.activitypub.rocks/t/visibility-to-cc-mapping/284
# https://wordsmith.social/falkreon/securing-activitypub
to = activity.setdefault('to', [])
if as2.PUBLIC_AUDIENCE not in to:
to.append(as2.PUBLIC_AUDIENCE)
# wrap articles and notes in a Create activity
if create and type in ('Article', 'Note'):
activity = {
'@context': as2.CONTEXT,
'type': 'Create',
'id': f'{activity["id"]}#bridgy-fed-create',
'actor': postprocess_as2_actor({}, user),
'object': activity,
}
return util.trim_nulls(activity)
def postprocess_as2_actor(actor, user=None):
"""Prepare an AS2 actor object to be served or sent via ActivityPub.
Modifies actor in place.
Args:
actor: dict, AS2 actor object
user: :class:`User`
Returns:
actor dict
"""
url = user.homepage if user else None
urls = util.get_list(actor, 'url')
if not urls and url:
urls = [url]
domain = util.domain_from_link(urls[0], minimize=False)
urls[0] = redirect_wrap(urls[0])
actor.setdefault('id', host_url(domain))
actor.update({
'url': urls if len(urls) > 1 else urls[0],
# This has to be the domain for Mastodon interop/Webfinger discovery!
# See related comment in actor() below.
'preferredUsername': domain,
})
# Override the label for their home page to be "Web site"
for att in util.get_list(actor, 'attachment'):
if att.get('type') == 'PropertyValue':
val = att.get('value', '')
link = util.parse_html(val).find('a')
if url and (val == url or link.get('href') == url):
att['name'] = 'Web site'
# required by pixelfed. https://github.com/snarfed/bridgy-fed/issues/39
actor.setdefault('summary', '')
return actor
@app.get(f'/<regex("{common.DOMAIN_RE}"):domain>')
@ -73,7 +496,7 @@ def actor(domain):
# TODO: unify with common.actor()
actor = {
**common.postprocess_as2(user.actor_as2, user=user),
**postprocess_as2(user.actor_as2, user=user),
'id': host_url(domain),
# This has to be the domain for Mastodon etc interop! It seems like it
# should be the custom username from the acct: u-url in their h-card,
@ -110,241 +533,29 @@ def inbox(domain=None):
except (TypeError, ValueError, AssertionError):
error(f"Couldn't parse body as non-empty JSON mapping: {body}", exc_info=True)
actor = activity.get('actor')
actor_id = actor.get('id') if isinstance(actor, dict) else actor
actor_id = as1.get_object(activity, 'actor').get('id')
logger.info(f'Got {activity.get("type")} activity from {actor_id}: {json_dumps(activity, indent=2)}')
id = activity.get('id')
if not id:
error('Activity has no id')
# short circuit if we've already seen this activity id
with seen_ids_lock:
already_seen = id in seen_ids
seen_ids[id] = True
if already_seen or Object.get_by_id(id):
msg = f'Already handled this activity {id}'
logger.info(msg)
return msg, 200
obj = Object(id=id, as2=redirect_unwrap(activity), source_protocol='activitypub')
obj.put()
if obj.type == 'accept': # eg in response to a Follow
return 'OK' # noop
elif obj.type not in SUPPORTED_TYPES:
error(f'Sorry, {obj.type} activities are not supported yet.', status=501)
inner_obj = as1.get_object(obj.as1)
inner_obj_id = inner_obj.get('id')
# load user
# TODO: store in g instead of passing around
user = None
if domain:
user = User.get_by_id(domain)
if not user:
error(f'User {domain} not found', status=404)
verify_signature(user)
ActivityPub.verify_signature(user)
# check that this activity is public. only do this check for creates, not
# like, follow, or other activity types, since Mastodon doesn't currently
# mark those as explicitly public. Use as2's is_public instead of as1's
# because as1's interprets unlisted as true.
if obj.type in ('post', 'create') and not as2.is_public(obj.as2):
# check that this activity is public. only do this for creates, not likes,
# follows, or other activity types, since Mastodon doesn't currently mark
# those as explicitly public. Use as2's is_public instead of as1's because
# as1's interprets unlisted as true.
if activity.get('type') == 'Create' and not as2.is_public(activity):
logger.info('Dropping non-public activity')
return 'OK'
# store inner object
if obj.type in ('post', 'create', 'update') and inner_obj.keys() > set(['id']):
to_update = Object.get_by_id(inner_obj_id) or Object(id=inner_obj_id)
to_update.populate(as2=obj.as2['object'], source_protocol='activitypub')
to_update.put()
# handle activity!
if obj.type == 'stop-following':
# granary doesn't yet handle three-actor undo follows, eg Eve undoes
# Alice following Bob
follower = as1.get_object(as1.get_object(activity, 'object'), 'actor')
assert actor_id == follower.get('id')
if not actor_id or not inner_obj_id:
error(f'Undo of Follow requires object with actor and object. Got: {actor_id} {followee} {obj.as1}')
# deactivate Follower
followee_domain = util.domain_from_link(inner_obj_id, minimize=False)
follower = Follower.get_by_id(Follower._id(dest=followee_domain, src=actor_id))
if follower:
logging.info(f'Marking {follower} inactive')
follower.status = 'inactive'
follower.put()
else:
logger.warning(f'No Follower found for {followee_domain} {actor_id}')
# TODO send webmention with 410 of u-follow
obj.status = 'complete'
obj.put()
return 'OK'
elif obj.type == 'update':
if not inner_obj_id:
error("Couldn't find id of object to update")
obj.status = 'complete'
obj.put()
return 'OK'
elif obj.type == 'delete':
if not inner_obj_id:
error("Couldn't find id of object to delete")
to_delete = Object.get_by_id(inner_obj_id)
if to_delete:
logger.info(f'Marking Object {inner_obj_id} deleted')
to_delete.deleted = True
to_delete.put()
# assume this is an actor
# https://github.com/snarfed/bridgy-fed/issues/63
logger.info(f'Deactivating Followers with src or dest = {inner_obj_id}')
followers = Follower.query(OR(Follower.src == inner_obj_id,
Follower.dest == inner_obj_id)
).fetch()
for f in followers:
f.status = 'inactive'
obj.status = 'complete'
ndb.put_multi(followers + [obj])
return 'OK'
# fetch actor if necessary so we have name, profile photo, etc
if actor and isinstance(actor, str):
actor = obj.as2['actor'] = common.get_object(actor, user=user).as2
# fetch object if necessary so we can render it in feeds
if obj.type in FETCH_OBJECT_TYPES and inner_obj.keys() == set(['id']):
inner_obj = obj.as2['object'] = common.get_object(inner_obj_id, user=user).as2
if obj.type == 'follow':
resp = accept_follow(obj, user)
# send webmentions to each target
common.send_webmentions(as2.to_as1(activity), obj, proxy=True)
# deliver original posts and reposts to followers
if obj.type in ('share', 'create', 'post') and actor and actor_id:
logger.info(f'Delivering to followers of {actor_id}')
for f in Follower.query(Follower.dest == actor_id,
Follower.status == 'active',
projection=[Follower.src]):
if f.src not in obj.domains:
obj.domains.append(f.src)
if obj.domains and 'feed' not in obj.labels:
obj.labels.append('feed')
if obj.as1.get('objectType') == 'activity' and 'activity' not in obj.labels:
obj.labels.append('activity')
obj.put()
return 'OK'
def verify_signature(user):
"""Verifies the current request's HTTP Signature.
Args:
user: :class:`User`
Logs details of the result. Raises :class:`werkzeug.HTTPSignature` if the
signature is missing or invalid, otherwise does nothing and returns None.
"""
sig = request.headers.get('Signature')
if not sig:
error('No HTTP Signature', status=401)
logger.info(f'Headers: {json_dumps(dict(request.headers), indent=2)}')
# parse_signature_header lower-cases all keys
keyId = parse_signature_header(sig).get('keyid')
if not keyId:
error('HTTP Signature missing keyId', status=401)
digest = request.headers.get('Digest') or ''
if not digest:
error('Missing Digest header, required for HTTP Signature', status=401)
expected = b64encode(sha256(request.data).digest()).decode()
if digest.removeprefix('SHA-256=') != expected:
error('Invalid Digest header, required for HTTP Signature', status=401)
key_actor = common.get_object(keyId, user=user).as2
key = key_actor.get("publicKey", {}).get('publicKeyPem')
logger.info(f'Verifying signature for {request.path} with key {key}')
try:
verified = HeaderVerifier(request.headers, key,
required_headers=['Digest'],
method=request.method,
path=request.path,
sign_header='signature').verify()
except BaseException as e:
error(f'HTTP Signature verification failed: {e}', status=401)
if verified:
logger.info('HTTP Signature verified!')
else:
error('HTTP Signature verification failed', status=401)
def accept_follow(obj, user):
"""Replies to an AP Follow request with an Accept request.
Args:
obj: :class:`Object`
user: :class:`User`
"""
logger.info('Replying to Follow with Accept')
followee = obj.as2.get('object')
followee_id = followee.get('id') if isinstance(followee, dict) else followee
follower = obj.as2.get('actor')
if not followee or not followee_id or not follower:
error(f'Follow activity requires object and actor. Got: {follow}')
inbox = follower.get('inbox')
follower_id = follower.get('id')
if not inbox or not follower_id:
error(f'Follow actor requires id and inbox. Got: {follower}')
# rendered mf2 HTML proxy pages (in render.py) fall back to redirecting to
# the follow's AS2 id field, but Mastodon's ids are URLs that don't load in
# browsers, eg https://jawns.club/ac33c547-ca6b-4351-80d5-d11a6879a7b0
# so, set a synthetic URL based on the follower's profile.
# https://github.com/snarfed/bridgy-fed/issues/336
follower_url = util.get_url(follower) or follower_id
followee_url = util.get_url(followee) or followee_id
obj.as2.setdefault('url', f'{follower_url}#followed-{followee_url}')
# store Follower
follower_obj = Follower.get_or_create(dest=user.key.id(), src=follower_id,
last_follow=obj.as2)
follower_obj.status = 'active'
follower_obj.put()
# send AP Accept
followee_actor_url = host_url(user.key.id())
accept = {
'@context': 'https://www.w3.org/ns/activitystreams',
'id': util.tag_uri(common.PRIMARY_DOMAIN,
f'accept/{user.key.id()}/{obj.key.id()}'),
'type': 'Accept',
'actor': followee_actor_url,
'object': {
'type': 'Follow',
'actor': follower_id,
'object': followee_actor_url,
}
}
return common.signed_post(inbox, data=accept, user=user)
return ActivityPub.receive(activity.get('id'), user=user,
as2=redirect_unwrap(activity))
@app.get(f'/<regex("{common.DOMAIN_RE}"):domain>/<any(followers,following):collection>')
@ -360,7 +571,7 @@ def follower_collection(domain, collection):
return f'User {domain} not found', 404
# page
followers, new_before, new_after = common.fetch_followers(domain, collection)
followers, new_before, new_after = Follower.fetch_page(domain, collection)
items = []
for f in followers:
f_as2 = f.to_as2()

5
app.py
Wyświetl plik

@ -54,4 +54,7 @@ xrpc_server = Server(lexicons, validate=False)
init_flask(xrpc_server, app)
# import all modules to register their Flask handlers
import activitypub, follow, pages, redirect, render, superfeedr, webfinger, webmention, xrpc_actor, xrpc_feed, xrpc_graph
import activitypub
import follow, pages
import redirect
import render, superfeedr, webfinger, webmention, xrpc_actor, xrpc_feed, xrpc_graph

604
common.py
Wyświetl plik

@ -1,30 +1,18 @@
# coding=utf-8
"""Misc common utilities.
"""
from base64 import b64encode
import copy
from datetime import timedelta, timezone
from hashlib import sha256
import itertools
from datetime import timedelta
import logging
import os
import re
import threading
import urllib.parse
from cachetools import cached, LRUCache
from flask import request
from flask import abort, make_response, request
from granary import as1, as2, microformats2
from httpsig.requests_auth import HTTPSignatureAuth
import mf2util
from oauth_dropins.webutil import util, webmention
from oauth_dropins.webutil.flask_util import error
from oauth_dropins.webutil import util
from oauth_dropins.webutil.appengine_info import DEBUG
from oauth_dropins.webutil.util import json_dumps, json_loads
import requests
from werkzeug.exceptions import BadGateway
from models import Follower, Object, Target, User
logger = logging.getLogger(__name__)
@ -32,23 +20,8 @@ DOMAIN_RE = r'[^/:]+\.[^/:]+'
TLD_BLOCKLIST = ('7z', 'asp', 'aspx', 'gif', 'html', 'ico', 'jpg', 'jpeg', 'js',
'json', 'php', 'png', 'rar', 'txt', 'yaml', 'yml', 'zip')
CONTENT_TYPE_LD_PLAIN = 'application/ld+json'
CONTENT_TYPE_HTML = 'text/html; charset=utf-8'
CONNEG_HEADERS_AS2_HTML = copy.deepcopy(as2.CONNEG_HEADERS)
CONNEG_HEADERS_AS2_HTML['Accept'] += f', {CONTENT_TYPE_HTML}; q=0.7'
SUPPORTED_VERBS = (
'checkin',
'create',
'follow',
'like',
'post',
'share',
'tag',
'update',
)
PRIMARY_DOMAIN = 'fed.brid.gy'
OTHER_DOMAINS = (
'bridgy-federated.appspot.com',
@ -70,12 +43,7 @@ DOMAIN_BLOCKLIST = frozenset((
'twitter.com',
) + DOMAINS)
_DEFAULT_SIGNATURE_USER = None
CACHE_TIME = timedelta(seconds=60)
PAGE_SIZE = 20
HTTP_SIG_HEADERS = ('Date', 'Host', 'Digest', '(request-target)')
def host_url(path_query=None):
@ -88,11 +56,10 @@ def host_url(path_query=None):
return urllib.parse.urljoin(base, path_query)
def default_signature_user():
global _DEFAULT_SIGNATURE_USER
if _DEFAULT_SIGNATURE_USER is None:
_DEFAULT_SIGNATURE_USER = User.get_or_create('snarfed.org')
return _DEFAULT_SIGNATURE_USER
def error(msg, status=400):
"""Like flask_util.error, but wraps body in JSON."""
logger.info(f'Returning {status}: {msg}')
abort(status, response=make_response({'error': msg}, status))
def pretty_link(url, text=None, user=None):
@ -118,198 +85,6 @@ def pretty_link(url, text=None, user=None):
return util.pretty_link(url, text=text)
@cached(LRUCache(1000), key=lambda id, user=None: util.fragmentless(id),
lock=threading.Lock())
def get_object(id, user=None):
"""Loads and returns an Object from memory cache, datastore, or HTTP fetch.
Assumes id is a URL. Any fragment at the end is stripped before loading.
This is currently underspecified and somewhat inconsistent across AP
implementations:
https://socialhub.activitypub.rocks/t/problems-posting-to-mastodon-inbox/801/11
https://socialhub.activitypub.rocks/t/problems-posting-to-mastodon-inbox/801/23
https://socialhub.activitypub.rocks/t/s2s-create-activity/1647/5
https://github.com/mastodon/mastodon/issues/13879 (open!)
https://github.com/w3c/activitypub/issues/224
Note that :meth:`Object._post_put_hook` updates the cache.
Args:
id: str
user: optional, :class:`User` used to sign HTTP request, if necessary
Returns: Object, or None if it can't be fetched
"""
id = util.fragmentless(id)
logger.info(f'Loading Object {id}')
obj = Object.get_by_id(id)
if obj:
if obj.as2:
logger.info(' got from datastore')
return obj
else:
obj = Object(id=id)
logger.info(f'Object not in datastore or has no as2: {id}')
obj_as2 = get_as2(id, user=user)
if obj.mf2:
logging.warning(f'Wiping out mf2 property: {obj.mf2}')
obj.mf2 = None
obj.populate(as2=obj_as2, source_protocol='activitypub')
obj.put()
return obj
def signed_get(url, user, **kwargs):
return signed_request(util.requests_get, url, user, **kwargs)
def signed_post(url, user, **kwargs):
assert user
return signed_request(util.requests_post, url, user, **kwargs)
def signed_request(fn, url, user, data=None, log_data=True, headers=None, **kwargs):
"""Wraps requests.* and adds HTTP Signature.
Args:
fn: :func:`util.requests_get` or :func:`util.requests_get`
url: str
user: :class:`User` to sign request with
data: optional AS2 object
log_data: boolean, whether to log full data object
kwargs: passed through to requests
Returns: :class:`requests.Response`
"""
if headers is None:
headers = {}
# prepare HTTP Signature and headers
if not user:
user = default_signature_user()
if data:
if log_data:
logger.info(f'Sending AS2 object: {json_dumps(data, indent=2)}')
data = json_dumps(data).encode()
headers = copy.deepcopy(headers)
headers.update({
# required for HTTP Signature
# https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3
'Date': util.now().strftime('%a, %d %b %Y %H:%M:%S GMT'),
# required by Mastodon
# https://github.com/tootsuite/mastodon/pull/14556#issuecomment-674077648
'Host': util.domain_from_link(url, minimize=False),
'Content-Type': as2.CONTENT_TYPE,
# required for HTTP Signature and Mastodon
'Digest': f'SHA-256={b64encode(sha256(data or b"").digest()).decode()}',
})
domain = user.key.id()
logger.info(f"Signing with {domain}'s key")
key_id = host_url(domain)
# (request-target) is a special HTTP Signatures header that some fediverse
# implementations require, eg Peertube.
# https://datatracker.ietf.org/doc/html/draft-cavage-http-signatures-12#section-2.3
# https://github.com/snarfed/bridgy-fed/issues/40
auth = HTTPSignatureAuth(secret=user.private_pem(), key_id=key_id,
algorithm='rsa-sha256', sign_header='signature',
headers=HTTP_SIG_HEADERS)
# make HTTP request
kwargs.setdefault('gateway', True)
resp = fn(url, data=data, auth=auth, headers=headers, allow_redirects=False,
**kwargs)
logger.info(f'Got {resp.status_code} headers: {resp.headers}')
# handle GET redirects manually so that we generate a new HTTP signature
if resp.is_redirect and fn == util.requests_get:
return signed_request(fn, resp.headers['Location'], data=data, user=user,
headers=headers, log_data=log_data, **kwargs)
type = content_type(resp)
if (type and type != 'text/html' and
(type.startswith('text/') or type.endswith('+json') or type.endswith('/json'))):
logger.info(resp.text)
return resp
def get_as2(url, user=None):
"""Tries to fetch the given URL as ActivityStreams 2.
Uses HTTP content negotiation via the Content-Type header. If the url is
HTML and it has a rel-alternate link with an AS2 content type, fetches and
returns that URL.
Includes an HTTP Signature with the request.
https://w3c.github.io/activitypub/#authorization
https://tools.ietf.org/html/draft-cavage-http-signatures-07
https://github.com/mastodon/mastodon/pull/11269
Mastodon requires this signature if AUTHORIZED_FETCH aka secure mode is on:
https://docs.joinmastodon.org/admin/config/#authorized_fetch
If user is not provided, defaults to using @snarfed.org@snarfed.org's key.
Args:
url: string
user: :class:`User` used to sign request
Returns:
dict, AS2 object
Raises:
:class:`requests.HTTPError`, :class:`werkzeug.exceptions.HTTPException`
If we raise a werkzeug HTTPException, it will have an additional
requests_response attribute with the last requests.Response we received.
"""
def _error(resp, extra_msg=None):
msg = f"Couldn't fetch {url} as ActivityStreams 2"
if extra_msg:
msg += ': ' + extra_msg
logger.warning(msg)
err = BadGateway(msg)
err.requests_response = resp
raise err
def _get(url, headers):
resp = signed_get(url, user=user, headers=headers, gateway=True)
if not resp.content:
_error(resp, 'empty response')
elif content_type(resp) in (as2.CONTENT_TYPE, CONTENT_TYPE_LD_PLAIN):
try:
return resp.json()
except requests.JSONDecodeError:
_error(resp, "Couldn't decode as JSON")
return resp
resp = _get(url, CONNEG_HEADERS_AS2_HTML)
if not isinstance(resp, requests.Response):
return resp
# look in HTML to find AS2 link
if content_type(resp) != 'text/html':
_error(resp, 'no AS2 available')
parsed = util.parse_html(resp)
obj = parsed.find('link', rel=('alternate', 'self'), type=(
as2.CONTENT_TYPE, as2.CONTENT_TYPE_LD))
if not (obj and obj['href']):
_error(resp, 'no AS2 available')
resp = _get(obj['href'], as2.CONNEG_HEADERS)
if not isinstance(resp, requests.Response):
return resp
_error(resp)
def content_type(resp):
"""Returns a :class:`requests.Response`'s Content-Type, without charset suffix."""
type = resp.headers.get('Content-Type')
@ -329,274 +104,6 @@ def remove_blocklisted(urls):
util.domain_from_link(u), DOMAIN_BLOCKLIST)]
def send_webmentions(activity_wrapped, obj, proxy=None):
"""Sends webmentions for an incoming ActivityPub inbox delivery.
Args:
activity_wrapped: dict, AS1 activity
obj: :class:`Object`
proxy: boolean, whether to use our proxy URL as the webmention source
Returns: boolean, True if any webmentions were sent, False otherwise
"""
activity_unwrapped = obj.as1
verb = activity_unwrapped.get('verb')
if verb and verb not in SUPPORTED_VERBS:
error(f'{verb} activities are not supported yet.', status=501)
# extract source and targets
source = activity_unwrapped.get('url') or activity_unwrapped.get('id')
inner_obj = activity_unwrapped.get('object')
obj_url = util.get_url(inner_obj)
targets = util.get_list(activity_unwrapped, 'inReplyTo')
if isinstance(inner_obj, dict):
if not source or verb in ('create', 'post', 'update'):
source = obj_url or inner_obj.get('id')
targets.extend(util.get_list(inner_obj, 'inReplyTo'))
if not source:
error("Couldn't find original post URL")
tags = util.get_list(activity_wrapped, 'tags')
obj_wrapped = activity_wrapped.get('object')
if isinstance(obj_wrapped, dict):
tags.extend(util.get_list(obj_wrapped, 'tags'))
for tag in tags:
if tag.get('objectType') == 'mention':
url = tag.get('url')
if url and url.startswith(host_url()):
targets.append(redirect_unwrap(url))
if verb in ('follow', 'like', 'share'):
targets.append(obj_url)
targets = util.dedupe_urls(util.get_url(t) for t in targets)
targets = remove_blocklisted(t.lower() for t in targets)
if not targets:
logger.info("Couldn't find any IndieWeb target URLs in inReplyTo, object, or mention tags")
return False
logger.info(f'targets: {targets}')
# send webmentions and update Object
errors = [] # stores (code, body) tuples
targets = [Target(uri=uri, protocol='activitypub') for uri in targets]
obj.populate(
undelivered=targets,
status='in progress',
)
if obj.undelivered and 'notification' not in obj.labels:
obj.labels.append('notification')
while obj.undelivered:
target = obj.undelivered.pop()
domain = util.domain_from_link(target.uri, minimize=False)
if domain == util.domain_from_link(source, minimize=False):
logger.info(f'Skipping same-domain webmention from {source} to {target.uri}')
continue
if domain not in obj.domains:
obj.domains.append(domain)
wm_source = (obj.proxy_url()
if verb in ('follow', 'like', 'share') or proxy
else source)
logger.info(f'Sending webmention from {wm_source} to {target.uri}')
try:
endpoint = webmention.discover(target.uri).endpoint
if endpoint:
webmention.send(endpoint, wm_source, target.uri)
logger.info('Success!')
obj.delivered.append(target)
else:
logger.info('No webmention endpoint')
except BaseException as e:
code, body = util.interpret_http_exception(e)
if not code and not body:
raise
errors.append((code, body))
obj.failed.append(target)
obj.put()
obj.status = ('complete' if obj.delivered or obj.domains
else 'failed' if obj.failed
else 'ignored')
if errors:
msg = 'Errors: ' + ', '.join(f'{code} {body}' for code, body in errors)
error(msg, status=int(errors[0][0] or 502))
return True
def postprocess_as2(activity, user=None, target=None, create=True):
"""Prepare an AS2 object to be served or sent via ActivityPub.
Args:
activity: dict, AS2 object or activity
user: :class:`User`, required. populated into actor.id and
publicKey fields if needed.
target: dict, AS2 object, optional. The target of activity's inReplyTo or
Like/Announce/etc object, if any.
create: boolean, whether to wrap `Note` and `Article` objects in a
`Create` activity
"""
assert user
type = activity.get('type')
# actor objects
if type == 'Person':
postprocess_as2_actor(activity, user)
if not activity.get('publicKey'):
# underspecified, inferred from this issue and Mastodon's implementation:
# https://github.com/w3c/activitypub/issues/203#issuecomment-297553229
# https://github.com/tootsuite/mastodon/blob/bc2c263504e584e154384ecc2d804aeb1afb1ba3/app/services/activitypub/process_account_service.rb#L77
actor_url = host_url(activity.get('preferredUsername'))
activity.update({
'publicKey': {
'id': actor_url,
'owner': actor_url,
'publicKeyPem': user.public_pem().decode(),
},
'@context': (util.get_list(activity, '@context') +
['https://w3id.org/security/v1']),
})
return activity
for actor in (util.get_list(activity, 'attributedTo') +
util.get_list(activity, 'actor')):
postprocess_as2_actor(actor, user)
# inReplyTo: singly valued, prefer id over url
target_id = target.get('id') if target else None
in_reply_to = activity.get('inReplyTo')
if in_reply_to:
if target_id:
activity['inReplyTo'] = target_id
elif isinstance(in_reply_to, list):
if len(in_reply_to) > 1:
logger.warning(
"AS2 doesn't support multiple inReplyTo URLs! "
f'Only using the first: {in_reply_to[0]}')
activity['inReplyTo'] = in_reply_to[0]
# Mastodon evidently requires a Mention tag for replies to generate a
# notification to the original post's author. not required for likes,
# reposts, etc. details:
# https://github.com/snarfed/bridgy-fed/issues/34
if target:
for to in (util.get_list(target, 'attributedTo') +
util.get_list(target, 'actor')):
if isinstance(to, dict):
to = util.get_first(to, 'url') or to.get('id')
if to:
activity.setdefault('tag', []).append({
'type': 'Mention',
'href': to,
})
# activity objects (for Like, Announce, etc): prefer id over url
obj = activity.get('object')
if obj:
if isinstance(obj, dict) and not obj.get('id'):
obj['id'] = target_id or util.get_first(obj, 'url')
elif target_id and obj != target_id:
activity['object'] = target_id
# id is required for most things. default to url if it's not set.
if not activity.get('id'):
activity['id'] = util.get_first(activity, 'url')
# TODO: find a better way to check this, sometimes or always?
# removed for now since it fires on posts without u-id or u-url, eg
# https://chrisbeckstrom.com/2018/12/27/32551/
# assert activity.get('id') or (isinstance(obj, dict) and obj.get('id'))
activity['id'] = redirect_wrap(activity.get('id'))
activity['url'] = [redirect_wrap(u) for u in util.get_list(activity, 'url')]
if len(activity['url']) == 1:
activity['url'] = activity['url'][0]
# copy image(s) into attachment(s). may be Mastodon-specific.
# https://github.com/snarfed/bridgy-fed/issues/33#issuecomment-440965618
obj_or_activity = obj if isinstance(obj, dict) else activity
img = util.get_list(obj_or_activity, 'image')
if img:
obj_or_activity.setdefault('attachment', []).extend(img)
# cc target's author(s) and recipients
# https://www.w3.org/TR/activitystreams-vocabulary/#audienceTargeting
# https://w3c.github.io/activitypub/#delivery
if target and (type in as2.TYPE_TO_VERB or type in ('Article', 'Note')):
recips = itertools.chain(*(util.get_list(target, field) for field in
('actor', 'attributedTo', 'to', 'cc')))
activity['cc'] = util.dedupe_urls(util.get_url(recip) or recip.get('id')
for recip in recips)
# to public, since Mastodon interprets to public as public, cc public as unlisted:
# https://socialhub.activitypub.rocks/t/visibility-to-cc-mapping/284
# https://wordsmith.social/falkreon/securing-activitypub
to = activity.setdefault('to', [])
if as2.PUBLIC_AUDIENCE not in to:
to.append(as2.PUBLIC_AUDIENCE)
# wrap articles and notes in a Create activity
if create and type in ('Article', 'Note'):
activity = {
'@context': as2.CONTEXT,
'type': 'Create',
'id': f'{activity["id"]}#bridgy-fed-create',
'actor': postprocess_as2_actor({}, user),
'object': activity,
}
return util.trim_nulls(activity)
def postprocess_as2_actor(actor, user=None):
"""Prepare an AS2 actor object to be served or sent via ActivityPub.
Modifies actor in place.
Args:
actor: dict, AS2 actor object
user: :class:`User`
Returns:
actor dict
"""
url = user.homepage if user else None
urls = util.get_list(actor, 'url')
if not urls and url:
urls = [url]
domain = util.domain_from_link(urls[0], minimize=False)
urls[0] = redirect_wrap(urls[0])
actor.setdefault('id', host_url(domain))
actor.update({
'url': urls if len(urls) > 1 else urls[0],
# This has to be the domain for Mastodon interop/Webfinger discovery!
# See related comment in actor() below.
'preferredUsername': domain,
})
# Override the label for their home page to be "Web site"
for att in util.get_list(actor, 'attachment'):
if att.get('type') == 'PropertyValue':
val = att.get('value', '')
link = util.parse_html(val).find('a')
if url and (val == url or link.get('href') == url):
att['name'] = 'Web site'
# required by pixelfed. https://github.com/snarfed/bridgy-fed/issues/39
actor.setdefault('summary', '')
return actor
def redirect_wrap(url):
"""Returns a URL on our domain that redirects to this URL.
@ -675,7 +182,9 @@ def actor(user):
error(f"Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {mf2['url']}")
actor_as1 = microformats2.json_to_object(hcard, rel_urls=mf2.get('rel-urls'))
actor_as2 = postprocess_as2(as2.from_as1(actor_as1), user=user)
# TODO: fix circular dependency
import activitypub
actor_as2 = activitypub.postprocess_as2(as2.from_as1(actor_as1), user=user)
# TODO: unify with activitypub.actor()
actor_as2.update({
'id': host_url(domain),
@ -696,96 +205,3 @@ def actor(user):
logger.info(f'Generated AS2 actor: {json_dumps(actor_as2, indent=2)}')
return hcard, actor_as1, actor_as2
def fetch_followers(domain, collection):
"""Fetches a page of Follower entities.
Wraps :func:`common.fetch_page`. Paging uses the `before` and `after` query
parameters, if available in the request.
Args:
domain: str, user to fetch entities for
collection, str, 'followers' or 'following'
Returns:
(results, new_before, new_after) tuple with:
results: list of Follower entities
new_before, new_after: str query param values for `before` and `after`
to fetch the previous and next pages, respectively
"""
assert collection in ('followers', 'following'), collection
domain_prop = Follower.dest if collection == 'followers' else Follower.src
query = Follower.query(
Follower.status == 'active',
domain_prop == domain,
).order(-Follower.updated)
return fetch_page(query, Follower)
def fetch_page(query, model_class):
"""Fetches a page of results from a datastore query.
Uses the `before` and `after` query params (if provided; should be ISO8601
timestamps) and the queried model class's `updated` property to identify the
page to fetch.
Populates a `log_url_path` property on each result entity that points to a
its most recent logged request.
Args:
query: :class:`ndb.Query`
model_class: ndb model class
Returns:
(results, new_before, new_after) tuple with:
results: list of query result entities
new_before, new_after: str query param values for `before` and `after`
to fetch the previous and next pages, respectively
"""
# if there's a paging param ('before' or 'after'), update query with it
# TODO: unify this with Bridgy's user page
def get_paging_param(param):
val = request.values.get(param)
if val:
try:
dt = util.parse_iso8601(val.replace(' ', '+'))
except BaseException as e:
error(f"Couldn't parse {param}, {val!r} as ISO8601: {e}")
if dt.tzinfo:
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
return dt
before = get_paging_param('before')
after = get_paging_param('after')
if before and after:
error("can't handle both before and after")
elif after:
query = query.filter(model_class.updated >= after).order(model_class.updated)
elif before:
query = query.filter(model_class.updated < before).order(-model_class.updated)
else:
query = query.order(-model_class.updated)
query_iter = query.iter()
results = sorted(itertools.islice(query_iter, 0, PAGE_SIZE),
key=lambda r: r.updated, reverse=True)
# calculate new paging param(s)
has_next = results and query_iter.probably_has_next()
new_after = (
before if before
else results[0].updated if has_next and after
else None)
if new_after:
new_after = new_after.isoformat()
new_before = (
after if after else
results[-1].updated if has_next
else None)
if new_before:
new_before = new_before.isoformat()
return results, new_before, new_after

Wyświetl plik

@ -16,6 +16,8 @@ from oauth_dropins.webutil import util
from oauth_dropins.webutil.testutil import NOW
from oauth_dropins.webutil.util import json_dumps, json_loads
# import module instead of ActivityPub class to avoid circular import
import activitypub
from app import app
import common
from models import Follower, Object, User
@ -156,7 +158,8 @@ class FollowCallback(indieauth.Callback):
flash(f"Couldn't find ActivityPub profile link for {addr}")
return redirect(f'/user/{domain}/following')
followee = common.get_object(as2_url, user=user).as2
# TODO: make this generic across protocols
followee = activitypub.ActivityPub.get_object(as2_url, user=user).as2
id = followee.get('id')
inbox = followee.get('inbox')
if not id or not inbox:
@ -173,7 +176,7 @@ class FollowCallback(indieauth.Callback):
'actor': common.host_url(domain),
'to': [as2.PUBLIC_AUDIENCE],
}
common.signed_post(inbox, user=user, data=follow_as2)
activitypub.signed_post(inbox, user=user, data=follow_as2)
Follower.get_or_create(dest=id, src=domain, status='active',
last_follow=follow_as2)
@ -230,10 +233,11 @@ class UnfollowCallback(indieauth.Callback):
followee_id = follower.dest
followee = follower.last_follow['object']
# TODO: make this generic across protocols
if isinstance(followee, str):
# fetch as AS2 to get full followee with inbox
followee_id = followee
followee = common.get_object(followee_id, user=user).as2
followee = activitypub.ActivityPub.get_object(followee_id, user=user).as2
inbox = followee.get('inbox')
if not inbox:
@ -249,7 +253,7 @@ class UnfollowCallback(indieauth.Callback):
'actor': common.host_url(domain),
'object': follower.last_follow,
}
common.signed_post(inbox, user=user, data=unfollow_as2)
activitypub.signed_post(inbox, user=user, data=unfollow_as2)
follower.status = 'inactive'
follower.put()

119
models.py
Wyświetl plik

@ -1,6 +1,8 @@
"""Datastore model classes."""
import base64
from datetime import timezone
import difflib
import itertools
import logging
import urllib.parse
@ -19,14 +21,18 @@ from oauth_dropins.webutil import util
from oauth_dropins.webutil.util import json_dumps, json_loads
import common
# import module instead of Protocol class to avoid circular import
import protocol
# https://github.com/snarfed/bridgy-fed/issues/314
WWW_DOMAINS = frozenset((
'www.jvt.me',
))
# TODO: eventually load from Protocol subclasses' IDs instead?
PROTOCOLS = ('activitypub', 'bluesky', 'ostatus', 'webmention', 'ui')
# 2048 bits makes tests slow, so use 1024 for them
KEY_BITS = 1024 if DEBUG else 2048
PAGE_SIZE = 20
logger = logging.getLogger(__name__)
@ -275,6 +281,7 @@ class Object(StringIdModel):
# domains of the Bridgy Fed users this activity is to or from
domains = ndb.StringProperty(repeated=True)
status = ndb.StringProperty(choices=STATUSES)
# TODO: remove? is this redundant with the protocol-specific data fields below?
source_protocol = ndb.StringProperty(choices=PROTOCOLS)
labels = ndb.StringProperty(repeated=True, choices=LABELS)
@ -289,8 +296,8 @@ class Object(StringIdModel):
# TODO: switch back to assert
# assert (self.as2 is not None) ^ (self.bsky is not None) ^ (self.mf2 is not None), \
# f'{self.as2} {self.bsky} {self.mf2}'
if not (self.as2 is not None) ^ (self.bsky is not None) ^ (self.mf2 is not None):
logging.warning(f'{self.key} has multiple! {self.as2 is not None} {self.bsky is not None} {self.mf2 is not None}')
if bool(self.as2) + bool(self.bsky) + bool(self.mf2) > 1:
logging.warning(f'{self.key} has multiple! {bool(self.as2)} {bool(self.bsky)} {bool(self.mf2)}')
if self.as2 is not None:
return as2.to_as1(common.redirect_unwrap(self.as2))
@ -319,12 +326,21 @@ class Object(StringIdModel):
updated = ndb.DateTimeProperty(auto_now=True)
def _post_put_hook(self, future):
"""Update :func:`common.get_object` cache."""
"""Update :meth:`Protocol.get_object` cache."""
# TODO: assert that as1 id is same as key id? in pre put hook?
logger.info(f'Wrote Object {self.key.id()} {self.type} {self.status or ""} {self.labels} for {len(self.domains)} users')
if self.type != 'activity' and '#' not in self.key.id():
key = common.get_object.cache_key(self.key.id())
common.get_object.cache[key] = self
get_object = protocol.Protocol.get_object
key = get_object.cache_key(protocol.Protocol, self.key.id())
get_object.cache[key] = self
def clear(self):
"""Clears all data properties."""
for prop in 'as2', 'bsky', 'mf2':
val = getattr(self, prop, None)
if val:
logging.warning(f'Wiping out {prop}: {json_dumps(val, indent=2)}')
setattr(self, prop, None)
def proxy_url(self):
"""Returns the Bridgy Fed proxy URL to render this post as HTML."""
@ -408,3 +424,96 @@ class Follower(StringIdModel):
"""Returns this follower as an AS2 actor dict, if possible."""
if self.last_follow:
return self.last_follow.get('actor' if util.is_web(self.src) else 'object')
@staticmethod
def fetch_page(domain, collection):
"""Fetches a page of Follower entities.
Wraps :func:`fetch_page`. Paging uses the `before` and `after` query
parameters, if available in the request.
Args:
domain: str, user to fetch entities for
collection, str, 'followers' or 'following'
Returns:
(results, new_before, new_after) tuple with:
results: list of Follower entities
new_before, new_after: str query param values for `before` and `after`
to fetch the previous and next pages, respectively
"""
assert collection in ('followers', 'following'), collection
domain_prop = Follower.dest if collection == 'followers' else Follower.src
query = Follower.query(
Follower.status == 'active',
domain_prop == domain,
).order(-Follower.updated)
return fetch_page(query, Follower)
def fetch_page(query, model_class):
"""Fetches a page of results from a datastore query.
Uses the `before` and `after` query params (if provided; should be ISO8601
timestamps) and the queried model class's `updated` property to identify the
page to fetch.
Populates a `log_url_path` property on each result entity that points to a
its most recent logged request.
Args:
query: :class:`ndb.Query`
model_class: ndb model class
Returns:
(results, new_before, new_after) tuple with:
results: list of query result entities
new_before, new_after: str query param values for `before` and `after`
to fetch the previous and next pages, respectively
"""
# if there's a paging param ('before' or 'after'), update query with it
# TODO: unify this with Bridgy's user page
def get_paging_param(param):
val = request.values.get(param)
if val:
try:
dt = util.parse_iso8601(val.replace(' ', '+'))
except BaseException as e:
error(f"Couldn't parse {param}, {val!r} as ISO8601: {e}")
if dt.tzinfo:
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
return dt
before = get_paging_param('before')
after = get_paging_param('after')
if before and after:
error("can't handle both before and after")
elif after:
query = query.filter(model_class.updated >= after).order(model_class.updated)
elif before:
query = query.filter(model_class.updated < before).order(-model_class.updated)
else:
query = query.order(-model_class.updated)
query_iter = query.iter()
results = sorted(itertools.islice(query_iter, 0, PAGE_SIZE),
key=lambda r: r.updated, reverse=True)
# calculate new paging param(s)
has_next = results and query_iter.probably_has_next()
new_after = (
before if before
else results[0].updated if has_next and after
else None)
if new_after:
new_after = new_after.isoformat()
new_before = (
after if after else
results[-1].updated if has_next
else None)
if new_before:
new_before = new_before.isoformat()
return results, new_before, new_after

Wyświetl plik

@ -16,8 +16,8 @@ from oauth_dropins.webutil.util import json_dumps, json_loads
from app import app, cache
import common
from common import DOMAIN_RE, PAGE_SIZE
from models import Follower, Object, User
from common import DOMAIN_RE
from models import fetch_page, Follower, Object, PAGE_SIZE, User
FOLLOWERS_UI_LIMIT = 999
@ -111,7 +111,7 @@ def followers_or_following(domain, collection):
if not (user := User.get_by_id(domain)): # user var is used in template
return USER_NOT_FOUND_HTML, 404
followers, before, after = common.fetch_followers(domain, collection)
followers, before, after = Follower.fetch_page(domain, collection)
for f in followers:
f.url = f.src if collection == 'followers' else f.dest
@ -182,7 +182,7 @@ def fetch_objects(query, user):
new_before, new_after: str query param values for `before` and `after`
to fetch the previous and next pages, respectively
"""
objects, new_before, new_after = common.fetch_page(query, Object)
objects, new_before, new_after = fetch_page(query, Object)
seen = set()
# synthesize human-friendly content for objects

354
protocol.py 100644
Wyświetl plik

@ -0,0 +1,354 @@
"""Base protocol class and common code."""
import logging
import threading
from cachetools import cached, LRUCache
from google.cloud import ndb
from google.cloud.ndb import OR
from granary import as1, as2
import common
from common import error
# import module instead of individual classes to avoid circular import
import models
from oauth_dropins.webutil import util, webmention
SUPPORTED_TYPES = (
'accept',
'article',
'audio',
'comment',
'create',
'delete',
'follow',
'image',
'like',
'note',
'post',
'share',
'stop-following',
'undo',
'update',
'video',
)
# activity ids that we've already handled and can now ignore.
# used in Protocol.receive
seen_ids = LRUCache(100000)
seen_ids_lock = threading.Lock()
logger = logging.getLogger(__name__)
class Protocol:
"""Base protocol class. Not to be instantiated; classmethods only.
Attributes:
LABEL: str, label used in `Object.source_protocol`
"""
LABEL = None
def __init__(self):
assert False
@classmethod
def send(cls, activity, url):
"""Sends an outgoing activity.
To be implemented by subclasses.
Args:
activity: obj: :class:`Object` with incoming activity
url: str, destination URL to send to
Raises:
:class:`werkzeug.HTTPException` if the request fails
"""
raise NotImplementedError()
@classmethod
def fetch(cls, obj, id, *, user=None):
"""Fetches a protocol-specific object and populates it into an :class:`Object`.
To be implemented by subclasses.
Args:
obj: :class:`Object` to populate the fetched object into
id: str, object's URL id
user: optional :class:`User` we're fetching on behalf of
Raises:
:class:`werkzeug.HTTPException` if the fetch fails
"""
raise NotImplementedError()
@classmethod
def receive(cls, id, *, user=None, **props):
"""Handles an incoming activity.
Args:
id: str, activity id
user: :class:`User`, optional receiving Bridgy Fed user
props: property values to populate into the :class:`Object`
Returns:
(response body, HTTP status code) tuple for Flask response
Raises:
:class:`werkzeug.HTTPException` if the request is invalid
"""
if not id:
error('Activity has no id')
# short circuit if we've already seen this activity id
with seen_ids_lock:
already_seen = id in seen_ids
seen_ids[id] = True
if already_seen or models.Object.get_by_id(id):
msg = f'Already handled this activity {id}'
logger.info(msg)
return msg, 200
obj = models.Object.get_or_insert(id)
obj.clear()
obj.populate(source_protocol=cls.LABEL, **props)
obj.put()
if obj.type not in SUPPORTED_TYPES:
error(f'Sorry, {obj.type} activities are not supported yet.', status=501)
# store inner object
inner_obj = as1.get_object(obj.as1)
inner_obj_id = inner_obj.get('id')
if obj.type in ('post', 'create', 'update') and inner_obj.keys() > set(['id']):
to_update = (models.Object.get_by_id(inner_obj_id)
or models.Object(id=inner_obj_id))
to_update.populate(as2=obj.as2['object'], source_protocol=cls.LABEL)
to_update.put()
actor = as1.get_object(obj.as1, 'actor')
actor_id = actor.get('id')
# handle activity!
if obj.type == 'accept': # eg in response to a Follow
return 'OK' # noop
elif obj.type == 'stop-following':
if not actor_id or not inner_obj_id:
error(f'Undo of Follow requires actor id and object id. Got: {actor_id} {inner_obj_id} {obj.as1}')
# deactivate Follower
followee_domain = util.domain_from_link(inner_obj_id, minimize=False)
follower = models.Follower.get_by_id(
models.Follower._id(dest=followee_domain, src=actor_id))
if follower:
logging.info(f'Marking {follower} inactive')
follower.status = 'inactive'
follower.put()
else:
logger.warning(f'No Follower found for {followee_domain} {actor_id}')
# TODO send webmention with 410 of u-follow
obj.status = 'complete'
obj.put()
return 'OK'
elif obj.type == 'update':
if not inner_obj_id:
error("Couldn't find id of object to update")
obj.status = 'complete'
obj.put()
return 'OK'
elif obj.type == 'delete':
if not inner_obj_id:
error("Couldn't find id of object to delete")
to_delete = models.Object.get_by_id(inner_obj_id)
if to_delete:
logger.info(f'Marking Object {inner_obj_id} deleted')
to_delete.deleted = True
to_delete.put()
# assume this is an actor
# https://github.com/snarfed/bridgy-fed/issues/63
logger.info(f'Deactivating Followers with src or dest = {inner_obj_id}')
followers = models.Follower.query(OR(models.Follower.src == inner_obj_id,
models.Follower.dest == inner_obj_id)
).fetch()
for f in followers:
f.status = 'inactive'
obj.status = 'complete'
ndb.put_multi(followers + [obj])
return 'OK'
# fetch actor if necessary so we have name, profile photo, etc
if actor and actor.keys() == set(['id']):
actor = obj.as2['actor'] = cls.get_object(actor['id'], user=user).as2
# fetch object if necessary so we can render it in feeds
if obj.type == 'share' and inner_obj.keys() == set(['id']):
inner_obj = obj.as2['object'] = cls.get_object(inner_obj_id, user=user).as2
if obj.type == 'follow':
resp = cls.accept_follow(obj, user)
# send webmentions to each target
send_webmentions(obj, proxy=True)
# deliver original posts and reposts to followers
if obj.type in ('share', 'create', 'post') and actor and actor_id:
logger.info(f'Delivering to followers of {actor_id}')
for f in models.Follower.query(models.Follower.dest == actor_id,
models.Follower.status == 'active',
projection=[models.Follower.src]):
if f.src not in obj.domains:
obj.domains.append(f.src)
if obj.domains and 'feed' not in obj.labels:
obj.labels.append('feed')
if obj.as1.get('objectType') == 'activity' and 'activity' not in obj.labels:
obj.labels.append('activity')
obj.put()
return 'OK'
@classmethod
@cached(LRUCache(1000), key=lambda cls, id, user=None: util.fragmentless(id),
lock=threading.Lock())
def get_object(cls, id, *, user=None):
"""Loads and returns an Object from memory cache, datastore, or HTTP fetch.
Assumes id is a URL. Any fragment at the end is stripped before loading.
This is currently underspecified and somewhat inconsistent across AP
implementations:
https://socialhub.activitypub.rocks/t/problems-posting-to-mastodon-inbox/801/11
https://socialhub.activitypub.rocks/t/problems-posting-to-mastodon-inbox/801/23
https://socialhub.activitypub.rocks/t/s2s-create-activity/1647/5
https://github.com/mastodon/mastodon/issues/13879 (open!)
https://github.com/w3c/activitypub/issues/224
Note that :meth:`Object._post_put_hook` updates the cache.
Args:
id: str
user: optional, :class:`User` used to sign HTTP request, if necessary
Returns: Object
Raises:
:class:`requests.HTTPError`, anything else that :meth:`fetch` raises
"""
id = util.fragmentless(id)
logger.info(f'Loading Object {id}')
obj = models.Object.get_by_id(id)
if obj:
if obj.as1:
logger.info(' got from datastore')
return obj
else:
obj = models.Object(id=id)
logger.info(f'Object not in datastore or has no data: {id}')
obj.clear()
cls.fetch(id, obj, user=user)
obj.source_protocol = cls.LABEL
obj.put()
return obj
def send_webmentions(obj, proxy=None):
"""Sends webmentions for an incoming ActivityPub inbox delivery.
Args:
obj: :class:`Object`
proxy: boolean, whether to use our proxy URL as the webmention source
Returns: boolean, True if any webmentions were sent, False otherwise
"""
# extract source and targets
source = obj.as1.get('url') or obj.as1.get('id')
inner_obj = obj.as1.get('object')
obj_url = util.get_url(inner_obj)
targets = util.get_list(obj.as1, 'inReplyTo')
if isinstance(inner_obj, dict):
if not source or obj.type in ('create', 'post', 'update'):
source = obj_url or inner_obj.get('id')
targets.extend(util.get_list(inner_obj, 'inReplyTo'))
if not source:
error("Couldn't find original post URL")
for tag in (util.get_list(obj.as1, 'tags') +
util.get_list(as1.get_object(obj.as1), 'tags')):
if tag.get('objectType') == 'mention':
url = tag.get('url')
if url:
targets.append(url)
if obj.type in ('follow', 'like', 'share'):
targets.append(obj_url)
targets = util.dedupe_urls(util.get_url(t) for t in targets)
targets = common.remove_blocklisted(t.lower() for t in targets)
if not targets:
logger.info("Couldn't find any IndieWeb target URLs in inReplyTo, object, or mention tags")
return False
logger.info(f'targets: {targets}')
# send webmentions and update Object
errors = [] # stores (code, body) tuples
targets = [models.Target(uri=uri, protocol='activitypub') for uri in targets]
obj.populate(
undelivered=targets,
status='in progress',
)
if obj.undelivered and 'notification' not in obj.labels:
obj.labels.append('notification')
while obj.undelivered:
target = obj.undelivered.pop()
domain = util.domain_from_link(target.uri, minimize=False)
if domain == util.domain_from_link(source, minimize=False):
logger.info(f'Skipping same-domain webmention from {source} to {target.uri}')
continue
if domain not in obj.domains:
obj.domains.append(domain)
wm_source = (obj.proxy_url()
if obj.type in ('follow', 'like', 'share') or proxy
else source)
logger.info(f'Sending webmention from {wm_source} to {target.uri}')
try:
endpoint = webmention.discover(target.uri).endpoint
if endpoint:
webmention.send(endpoint, wm_source, target.uri)
logger.info('Success!')
obj.delivered.append(target)
else:
logger.info('No webmention endpoint')
except BaseException as e:
code, body = util.interpret_http_exception(e)
if not code and not body:
raise
errors.append((code, body))
obj.failed.append(target)
obj.put()
obj.status = ('complete' if obj.delivered or obj.domains
else 'failed' if obj.failed
else 'ignored')
if errors:
msg = 'Errors: ' + ', '.join(f'{code} {body}' for code, body in errors)
error(msg, status=int(errors[0][0] or 502))
return True

Wyświetl plik

@ -21,12 +21,10 @@ from oauth_dropins.webutil import flask_util, util
from oauth_dropins.webutil.flask_util import error
from oauth_dropins.webutil.util import json_dumps, json_loads
# import module instead of individual functions to avoid circular import
import activitypub
from app import app, cache
from common import (
CACHE_TIME,
CONTENT_TYPE_HTML,
postprocess_as2,
)
from common import CACHE_TIME, CONTENT_TYPE_HTML
from models import Object, User
logger = logging.getLogger(__name__)
@ -82,8 +80,8 @@ def redir(to):
obj = Object.get_by_id(to)
if not obj or obj.deleted:
return f'Object not found: {to}', 404
ret = postprocess_as2(as2.from_as1(obj.as1),
user, create=False)
ret = activitypub.postprocess_as2(as2.from_as1(obj.as1),
user, create=False)
logger.info(f'Returning: {json_dumps(ret, indent=2)}')
return ret, {
'Content-Type': type,

Wyświetl plik

@ -16,11 +16,16 @@ from oauth_dropins.webutil.testutil import requests_response
from oauth_dropins.webutil.util import json_dumps, json_loads
import requests
from urllib3.exceptions import ReadTimeoutError
from werkzeug.exceptions import BadGateway
import activitypub
from activitypub import ActivityPub
from app import app
import common
import models
from models import Follower, Object, User
import protocol
from protocol import Protocol
from . import testutil
ACTOR = {
@ -194,6 +199,22 @@ UPDATE_NOTE = {
WEBMENTION_DISCOVERY = requests_response(
'<html><head><link rel="webmention" href="/webmention"></html>')
HTML = requests_response('<html></html>', headers={
'Content-Type': common.CONTENT_TYPE_HTML,
})
HTML_WITH_AS2 = requests_response("""\
<html><meta>
<link href='http://as2' rel='alternate' type='application/activity+json'>
</meta></html>
""", headers={
'Content-Type': common.CONTENT_TYPE_HTML,
})
AS2_OBJ = {'foo': ['bar']}
AS2 = requests_response(AS2_OBJ, headers={
'Content-Type': as2.CONTENT_TYPE,
})
NOT_ACCEPTABLE = requests_response(status=406)
@patch('requests.post')
@patch('requests.get')
@ -489,10 +510,7 @@ class ActivityPubTest(testutil.TestCase):
got = self.post('/foo.com/inbox', json=not_public)
self.assertEqual(200, got.status_code, got.get_data(as_text=True))
obj = Object.get_by_id(not_public['id'])
self.assertEqual([], obj.labels)
self.assertEqual([], obj.domains)
self.assertIsNone(Object.get_by_id(not_public['id']))
self.assertIsNone(Object.get_by_id(not_public['object']['id']))
def test_inbox_mention_object(self, *mocks):
@ -525,8 +543,10 @@ class ActivityPubTest(testutil.TestCase):
type='note')
def _test_inbox_mention(self, mention, expected_props, mock_head, mock_get, mock_post):
mock_get.return_value = requests_response(
'<html><head><link rel="webmention" href="/webmention"></html>')
mock_get.side_effect = [
WEBMENTION_DISCOVERY,
HTML,
]
mock_post.return_value = requests_response()
got = self.post('/foo.com/inbox', json=mention)
@ -546,7 +566,7 @@ class ActivityPubTest(testutil.TestCase):
expected_as2 = common.redirect_unwrap(mention)
self.assert_object(mention['id'],
domains=['tar.get'],
domains=['tar.get', 'masto.foo'],
source_protocol='activitypub',
status='complete',
as2=expected_as2,
@ -798,10 +818,12 @@ class ActivityPubTest(testutil.TestCase):
self.assertIsNone(Object.get_by_id(bad_url))
@patch('activitypub.logger.info', side_effect=logging.info)
def test_inbox_verify_http_signature(self, mock_info, _, mock_get, ___):
@patch('common.logger.info', side_effect=logging.info)
def test_inbox_verify_http_signature(self, mock_common_log, mock_activitypub_log,
_, mock_get, ___):
# actor with a public key
self.key_id_obj.delete()
common.get_object.cache.clear()
Protocol.get_object.cache.clear()
mock_get.return_value = self.as2_resp({
**ACTOR,
'publicKey': {
@ -819,10 +841,10 @@ class ActivityPubTest(testutil.TestCase):
mock_get.assert_has_calls((
self.as2_req('http://my/key/id'),
))
mock_info.assert_any_call('HTTP Signature verified!')
mock_activitypub_log.assert_any_call('HTTP Signature verified!')
# invalid signature, missing keyId
activitypub.seen_ids.clear()
protocol.seen_ids.clear()
obj_key = ndb.Key(Object, NOTE['id'])
obj_key.delete()
@ -833,10 +855,10 @@ class ActivityPubTest(testutil.TestCase):
})
self.assertEqual(401, resp.status_code)
self.assertEqual({'error': 'HTTP Signature missing keyId'}, resp.json)
mock_info.assert_any_call('Returning 401: HTTP Signature missing keyId')
mock_common_log.assert_any_call('Returning 401: HTTP Signature missing keyId')
# invalid signature, content changed
activitypub.seen_ids.clear()
protocol.seen_ids.clear()
obj_key = ndb.Key(Object, NOTE['id'])
obj_key.delete()
@ -844,25 +866,25 @@ class ActivityPubTest(testutil.TestCase):
self.assertEqual(401, resp.status_code)
self.assertEqual({'error': 'Invalid Digest header, required for HTTP Signature'},
resp.json)
mock_info.assert_any_call('Returning 401: Invalid Digest header, required for HTTP Signature')
mock_common_log.assert_any_call('Returning 401: Invalid Digest header, required for HTTP Signature')
# invalid signature, header changed
activitypub.seen_ids.clear()
protocol.seen_ids.clear()
obj_key.delete()
orig_date = headers['Date']
resp = self.client.post('/inbox', data=body, headers={**headers, 'Date': 'X'})
self.assertEqual(401, resp.status_code)
self.assertEqual({'error': 'HTTP Signature verification failed'}, resp.json)
mock_info.assert_any_call('Returning 401: HTTP Signature verification failed')
mock_common_log.assert_any_call('Returning 401: HTTP Signature verification failed')
# no signature
activitypub.seen_ids.clear()
protocol.seen_ids.clear()
obj_key.delete()
resp = self.client.post('/inbox', json=NOTE)
self.assertEqual(401, resp.status_code, resp.get_data(as_text=True))
self.assertEqual({'error': 'No HTTP Signature'}, resp.json)
mock_info.assert_any_call('Returning 401: No HTTP Signature')
mock_common_log.assert_any_call('Returning 401: No HTTP Signature')
def test_delete_actor(self, _, mock_get, ___):
follower = Follower.get_or_create('foo.com', DELETE['actor'])
@ -901,7 +923,7 @@ class ActivityPubTest(testutil.TestCase):
status='complete')
obj.deleted = True
self.assert_entities_equal(obj, common.get_object.cache['http://an/obj'])
self.assert_entities_equal(obj, Protocol.get_object.cache['http://an/obj'])
def test_update_note(self, *mocks):
Object(id='https://a/note', as2={}).put()
@ -925,7 +947,7 @@ class ActivityPubTest(testutil.TestCase):
type='update', status='complete', as2=UPDATE_NOTE)
self.assert_entities_equal(Object.get_by_id('https://a/note'),
common.get_object.cache['https://a/note'])
Protocol.get_object.cache['https://a/note'])
def test_inbox_webmention_discovery_connection_fails(self, mock_head,
mock_get, mock_post):
@ -1021,7 +1043,7 @@ class ActivityPubTest(testutil.TestCase):
},
}, resp.json)
@patch('common.PAGE_SIZE', 1)
@patch('models.PAGE_SIZE', 1)
def test_followers_collection_page(self, *args):
User.get_or_create('foo.com')
self.store_followers()
@ -1089,7 +1111,7 @@ class ActivityPubTest(testutil.TestCase):
},
}, resp.json)
@patch('common.PAGE_SIZE', 1)
@patch('models.PAGE_SIZE', 1)
def test_following_collection_page(self, *args):
User.get_or_create('foo.com')
self.store_following()
@ -1123,3 +1145,246 @@ class ActivityPubTest(testutil.TestCase):
'items': [],
},
}, resp.json)
class ActivityPubUtilsTest(testutil.TestCase):
def setUp(self):
super().setUp()
self.user = User.get_or_create('foo.com', has_hcard=True, actor_as2=ACTOR)
self.app_context = app.test_request_context('/')
self.app_context.__enter__()
def tearDown(self):
self.app_context.__exit__(None, None, None)
super().tearDown()
def test_postprocess_as2_multiple_in_reply_tos(self):
self.assert_equals({
'id': 'http://localhost/r/xyz',
'inReplyTo': 'foo',
'to': [as2.PUBLIC_AUDIENCE],
}, activitypub.postprocess_as2({
'id': 'xyz',
'inReplyTo': ['foo', 'bar'],
}, user=User(id='site')))
def test_postprocess_as2_multiple_url(self):
self.assert_equals({
'id': 'http://localhost/r/xyz',
'url': ['http://localhost/r/foo', 'http://localhost/r/bar'],
'to': [as2.PUBLIC_AUDIENCE],
}, activitypub.postprocess_as2({
'id': 'xyz',
'url': ['foo', 'bar'],
}, user=User(id='site')))
def test_postprocess_as2_multiple_image(self):
self.assert_equals({
'id': 'http://localhost/r/xyz',
'attachment': [{'url': 'http://r/foo'}, {'url': 'http://r/bar'}],
'image': [{'url': 'http://r/foo'}, {'url': 'http://r/bar'}],
'to': [as2.PUBLIC_AUDIENCE],
}, activitypub.postprocess_as2({
'id': 'xyz',
'image': [{'url': 'http://r/foo'}, {'url': 'http://r/bar'}],
}, user=User(id='site')))
def test_postprocess_as2_actor_attributedTo(self):
self.assert_equals({
'actor': {
'id': 'baj',
'preferredUsername': 'site',
'url': 'http://localhost/r/https://site/',
},
'attributedTo': [{
'id': 'bar',
'preferredUsername': 'site',
'url': 'http://localhost/r/https://site/',
}, {
'id': 'baz',
'preferredUsername': 'site',
'url': 'http://localhost/r/https://site/',
}],
'to': [as2.PUBLIC_AUDIENCE],
}, activitypub.postprocess_as2({
'attributedTo': [{'id': 'bar'}, {'id': 'baz'}],
'actor': {'id': 'baj'},
}, user=User(id='site')))
def test_postprocess_as2_note(self):
self.assert_equals({
'@context': 'https://www.w3.org/ns/activitystreams',
'id': 'http://localhost/r/xyz#bridgy-fed-create',
'type': 'Create',
'actor': {
'id': 'http://localhost/site',
'url': 'http://localhost/r/https://site/',
'preferredUsername': 'site'
},
'object': {
'id': 'http://localhost/r/xyz',
'type': 'Note',
'to': [as2.PUBLIC_AUDIENCE],
},
}, activitypub.postprocess_as2({
'id': 'xyz',
'type': 'Note',
}, user=User(id='site')))
# TODO: make these generic and use FakeProtocol
@patch('requests.get')
def test_get_object_http(self, mock_get):
mock_get.return_value = AS2
id = 'http://the/id'
self.assertIsNone(Object.get_by_id(id))
# first time fetches over HTTP
got = ActivityPub.get_object(id)
self.assert_equals(id, got.key.id())
self.assert_equals(AS2_OBJ, got.as2)
mock_get.assert_has_calls([self.as2_req(id)])
# second time is in cache
got.key.delete()
mock_get.reset_mock()
got = ActivityPub.get_object(id)
self.assert_equals(id, got.key.id())
self.assert_equals(AS2_OBJ, got.as2)
mock_get.assert_not_called()
@patch('requests.get')
def test_get_object_datastore(self, mock_get):
id = 'http://the/id'
stored = Object(id=id, as2=AS2_OBJ)
stored.put()
Protocol.get_object.cache.clear()
# first time loads from datastore
got = ActivityPub.get_object(id)
self.assert_entities_equal(stored, got)
mock_get.assert_not_called()
# second time is in cache
stored.key.delete()
got = ActivityPub.get_object(id)
self.assert_entities_equal(stored, got)
mock_get.assert_not_called()
@patch('requests.get')
def test_get_object_strips_fragment(self, mock_get):
stored = Object(id='http://the/id', as2=AS2_OBJ)
stored.put()
Protocol.get_object.cache.clear()
got = ActivityPub.get_object('http://the/id#ignore')
self.assert_entities_equal(stored, got)
mock_get.assert_not_called()
@patch('requests.get')
def test_get_object_datastore_no_as2(self, mock_get):
"""If the stored Object has no as2, we should fall back to HTTP."""
id = 'http://the/id'
stored = Object(id=id, as2={}, status='in progress')
stored.put()
Protocol.get_object.cache.clear()
mock_get.return_value = AS2
got = ActivityPub.get_object(id)
mock_get.assert_has_calls([self.as2_req(id)])
self.assert_equals(id, got.key.id())
self.assert_equals(AS2_OBJ, got.as2)
mock_get.assert_has_calls([self.as2_req(id)])
self.assert_object(id, as2=AS2_OBJ, as1=AS2_OBJ,
source_protocol='activitypub',
# check that it reused our original Object
status='in progress')
@patch('requests.get')
def test_signed_get_redirects_manually_with_new_sig_headers(self, mock_get):
mock_get.side_effect = [
requests_response(status=302, redirected_url='http://second',
allow_redirects=False),
requests_response(status=200, allow_redirects=False),
]
resp = activitypub.signed_get('https://first', user=self.user)
first = mock_get.call_args_list[0][1]
second = mock_get.call_args_list[1][1]
self.assertNotEqual(first['headers'], second['headers'])
self.assertNotEqual(
first['auth'].header_signer.sign(first['headers'], method='GET', path='/'),
second['auth'].header_signer.sign(second['headers'], method='GET', path='/'))
@patch('requests.post')
def test_signed_post_ignores_redirect(self, mock_post):
mock_post.side_effect = [
requests_response(status=302, redirected_url='http://second',
allow_redirects=False),
]
resp = activitypub.signed_post('https://first', user=self.user)
mock_post.assert_called_once()
self.assertEqual(302, resp.status_code)
@patch('requests.get')
def test_fetch_direct(self, mock_get):
mock_get.return_value = AS2
obj = Object()
ActivityPub.fetch('http://orig', obj, user=self.user)
self.assertEqual(AS2_OBJ, obj.as2)
mock_get.assert_has_calls((
self.as2_req('http://orig'),
))
@patch('requests.get')
def test_fetch_via_html(self, mock_get):
mock_get.side_effect = [HTML_WITH_AS2, AS2]
obj = Object()
ActivityPub.fetch('http://orig', obj, user=self.user)
self.assertEqual(AS2_OBJ, obj.as2)
mock_get.assert_has_calls((
self.as2_req('http://orig'),
self.as2_req('http://as2', headers=common.as2.CONNEG_HEADERS),
))
@patch('requests.get')
def test_fetch_only_html(self, mock_get):
mock_get.return_value = HTML
with self.assertRaises(BadGateway):
ActivityPub.fetch('http://orig', Object(), user=self.user)
@patch('requests.get')
def test_fetch_not_acceptable(self, mock_get):
mock_get.return_value=NOT_ACCEPTABLE
with self.assertRaises(BadGateway):
ActivityPub.fetch('http://orig', Object(), user=self.user)
@patch('requests.get')
def test_fetch_ssl_error(self, mock_get):
mock_get.side_effect = requests.exceptions.SSLError
with self.assertRaises(BadGateway):
ActivityPub.fetch('http://orig', Object(), user=self.user)
@patch('requests.get')
def test_fetch_datastore_no_content(self, mock_get):
mock_get.return_value = self.as2_resp('')
with self.assertRaises(BadGateway):
got = ActivityPub.fetch('http://the/id', Object())
mock_get.assert_has_calls([self.as2_req('http://the/id')])
@patch('requests.get')
def test_fetch_datastore_not_json(self, mock_get):
mock_get.return_value = self.as2_resp('XYZ not JSON')
with self.assertRaises(BadGateway):
got = ActivityPub.fetch('http://the/id', Object())
mock_get.assert_has_calls([self.as2_req('http://the/id')])

Wyświetl plik

@ -1,4 +1,3 @@
# coding=utf-8
"""Unit tests for common.py."""
from unittest import mock
@ -6,29 +5,13 @@ from granary import as2
from oauth_dropins.webutil import appengine_config, util
from oauth_dropins.webutil.testutil import requests_response
import requests
from werkzeug.exceptions import BadGateway
from app import app
import common
from models import Object, User
import protocol
from . import testutil
HTML = requests_response('<html></html>', headers={
'Content-Type': common.CONTENT_TYPE_HTML,
})
HTML_WITH_AS2 = requests_response("""\
<html><meta>
<link href='http://as2' rel='alternate' type='application/activity+json'>
</meta></html>
""", headers={
'Content-Type': common.CONTENT_TYPE_HTML,
})
AS2_OBJ = {'foo': ['bar']}
AS2 = requests_response(AS2_OBJ, headers={
'Content-Type': as2.CONTENT_TYPE,
})
NOT_ACCEPTABLE = requests_response(status=406)
class CommonTest(testutil.TestCase):
@classmethod
@ -61,56 +44,6 @@ class CommonTest(testutil.TestCase):
'<a href="/user/site"><img src="" class="profile"> site</a>',
common.pretty_link('https://site/', user=self.user))
@mock.patch('requests.get', return_value=AS2)
def test_get_as2_direct(self, mock_get):
resp = common.get_as2('http://orig', user=self.user)
self.assertEqual(AS2_OBJ, resp)
mock_get.assert_has_calls((
self.as2_req('http://orig'),
))
@mock.patch('requests.get', side_effect=[HTML_WITH_AS2, AS2])
def test_get_as2_via_html(self, mock_get):
resp = common.get_as2('http://orig', user=self.user)
self.assertEqual(AS2_OBJ, resp)
mock_get.assert_has_calls((
self.as2_req('http://orig'),
self.as2_req('http://as2', headers=common.as2.CONNEG_HEADERS),
))
@mock.patch('requests.get', return_value=HTML)
def test_get_as2_only_html(self, mock_get):
with self.assertRaises(BadGateway):
resp = common.get_as2('http://orig', user=self.user)
@mock.patch('requests.get', return_value=NOT_ACCEPTABLE)
def test_get_as2_not_acceptable(self, mock_get):
with self.assertRaises(BadGateway):
resp = common.get_as2('http://orig', user=self.user)
@mock.patch('requests.get', side_effect=requests.exceptions.SSLError)
def test_get_as2_ssl_error(self, mock_get):
with self.assertRaises(BadGateway):
resp = common.get_as2('http://orig', user=self.user)
@mock.patch('requests.get')
def test_get_as2_datastore_no_content(self, mock_get):
mock_get.return_value = self.as2_resp('')
with self.assertRaises(BadGateway):
got = common.get_as2('http://the/id')
mock_get.assert_has_calls([self.as2_req('http://the/id')])
@mock.patch('requests.get')
def test_get_as2_datastore_not_json(self, mock_get):
mock_get.return_value = self.as2_resp('XYZ not JSON')
with self.assertRaises(BadGateway):
got = common.get_as2('http://the/id')
mock_get.assert_has_calls([self.as2_req('http://the/id')])
def test_redirect_wrap_empty(self):
self.assertIsNone(common.redirect_wrap(None))
self.assertEqual('', common.redirect_wrap(''))
@ -136,84 +69,6 @@ class CommonTest(testutil.TestCase):
{'object': {'id': 'https://foo.com/'}},
common.redirect_unwrap({'object': {'id': 'http://localhost/foo.com'}}))
def test_postprocess_as2_multiple_in_reply_tos(self):
with app.test_request_context('/'):
self.assert_equals({
'id': 'http://localhost/r/xyz',
'inReplyTo': 'foo',
'to': [as2.PUBLIC_AUDIENCE],
}, common.postprocess_as2({
'id': 'xyz',
'inReplyTo': ['foo', 'bar'],
}, user=User(id='site')))
def test_postprocess_as2_multiple_url(self):
with app.test_request_context('/'):
self.assert_equals({
'id': 'http://localhost/r/xyz',
'url': ['http://localhost/r/foo', 'http://localhost/r/bar'],
'to': [as2.PUBLIC_AUDIENCE],
}, common.postprocess_as2({
'id': 'xyz',
'url': ['foo', 'bar'],
}, user=User(id='site')))
def test_postprocess_as2_multiple_image(self):
with app.test_request_context('/'):
self.assert_equals({
'id': 'http://localhost/r/xyz',
'attachment': [{'url': 'http://r/foo'}, {'url': 'http://r/bar'}],
'image': [{'url': 'http://r/foo'}, {'url': 'http://r/bar'}],
'to': [as2.PUBLIC_AUDIENCE],
}, common.postprocess_as2({
'id': 'xyz',
'image': [{'url': 'http://r/foo'}, {'url': 'http://r/bar'}],
}, user=User(id='site')))
def test_postprocess_as2_actor_attributedTo(self):
with app.test_request_context('/'):
self.assert_equals({
'actor': {
'id': 'baj',
'preferredUsername': 'site',
'url': 'http://localhost/r/https://site/',
},
'attributedTo': [{
'id': 'bar',
'preferredUsername': 'site',
'url': 'http://localhost/r/https://site/',
}, {
'id': 'baz',
'preferredUsername': 'site',
'url': 'http://localhost/r/https://site/',
}],
'to': [as2.PUBLIC_AUDIENCE],
}, common.postprocess_as2({
'attributedTo': [{'id': 'bar'}, {'id': 'baz'}],
'actor': {'id': 'baj'},
}, user=User(id='site')))
def test_postprocess_as2_note(self):
with app.test_request_context('/'):
self.assert_equals({
'@context': 'https://www.w3.org/ns/activitystreams',
'id': 'http://localhost/r/xyz#bridgy-fed-create',
'type': 'Create',
'actor': {
'id': 'http://localhost/site',
'url': 'http://localhost/r/https://site/',
'preferredUsername': 'site'
},
'object': {
'id': 'http://localhost/r/xyz',
'type': 'Note',
'to': [as2.PUBLIC_AUDIENCE],
},
}, common.postprocess_as2({
'id': 'xyz',
'type': 'Note',
}, user=User(id='site')))
def test_host_url(self):
with app.test_request_context():
self.assertEqual('http://localhost/', common.host_url())
@ -227,96 +82,3 @@ class CommonTest(testutil.TestCase):
with app.test_request_context(base_url='http://bridgy-federated.uc.r.appspot.com'):
self.assertEqual('https://fed.brid.gy/asdf', common.host_url('asdf'))
@mock.patch('requests.get')
def test_signed_get_redirects_manually_with_new_sig_headers(self, mock_get):
mock_get.side_effect = [
requests_response(status=302, redirected_url='http://second',
allow_redirects=False),
requests_response(status=200, allow_redirects=False),
]
resp = common.signed_get('https://first', user=self.user)
first = mock_get.call_args_list[0][1]
second = mock_get.call_args_list[1][1]
self.assertNotEqual(first['headers'], second['headers'])
self.assertNotEqual(
first['auth'].header_signer.sign(first['headers'], method='GET', path='/'),
second['auth'].header_signer.sign(second['headers'], method='GET', path='/'))
@mock.patch('requests.post')
def test_signed_post_ignores_redirect(self, mock_post):
mock_post.side_effect = [
requests_response(status=302, redirected_url='http://second',
allow_redirects=False),
]
resp = common.signed_post('https://first', user=self.user)
mock_post.assert_called_once()
self.assertEqual(302, resp.status_code)
@mock.patch('requests.get', return_value=AS2)
def test_get_object_http(self, mock_get):
self.assertEqual(0, Object.query().count())
# first time fetches over HTTP
id = 'http://the/id'
got = common.get_object(id)
self.assert_equals(id, got.key.id())
self.assert_equals(AS2_OBJ, got.as2)
mock_get.assert_has_calls([self.as2_req(id)])
# second time is in cache
got.key.delete()
mock_get.reset_mock()
got = common.get_object(id)
self.assert_equals(id, got.key.id())
self.assert_equals(AS2_OBJ, got.as2)
mock_get.assert_not_called()
@mock.patch('requests.get')
def test_get_object_datastore(self, mock_get):
id = 'http://the/id'
stored = Object(id=id, as2=AS2_OBJ)
stored.put()
common.get_object.cache.clear()
# first time loads from datastore
got = common.get_object(id)
self.assert_entities_equal(stored, got)
mock_get.assert_not_called()
# second time is in cache
stored.key.delete()
got = common.get_object(id)
self.assert_entities_equal(stored, got)
mock_get.assert_not_called()
@mock.patch('requests.get')
def test_get_object_strips_fragment(self, mock_get):
stored = Object(id='http://the/id', as2=AS2_OBJ)
stored.put()
common.get_object.cache.clear()
got = common.get_object('http://the/id#ignore')
self.assert_entities_equal(stored, got)
mock_get.assert_not_called()
@mock.patch('requests.get', return_value=AS2)
def test_get_object_datastore_no_as2(self, mock_get):
"""If the stored Object has no as2, we should fall back to HTTP."""
id = 'http://the/id'
stored = Object(id=id, as2={}, status='in progress')
stored.put()
common.get_object.cache.clear()
got = common.get_object(id)
mock_get.assert_has_calls([self.as2_req(id)])
self.assert_equals(id, got.key.id())
self.assert_equals(AS2_OBJ, got.as2)
mock_get.assert_has_calls([self.as2_req(id)])
self.assert_object(id, as2=AS2_OBJ, as1=AS2_OBJ,
source_protocol='activitypub',
# check that it reused our original Object
status='in progress')

Wyświetl plik

@ -9,6 +9,7 @@ from oauth_dropins.webutil.testutil import requests_response
from app import app
import common
from models import Follower, Object, User
from protocol import Protocol
from . import testutil
from .test_activitypub import ACTOR
@ -284,15 +285,17 @@ class ObjectTest(testutil.TestCase):
def test_put_updates_get_object_cache(self):
obj = Object(id='x', as2={})
obj.put()
key = common.get_object.cache_key('x')
self.assert_entities_equal(obj, common.get_object.cache[key])
key = Protocol.get_object.cache_key(Protocol, 'x')
self.assert_entities_equal(obj, Protocol.get_object.cache[key])
def test_put_fragment_id_doesnt_update_get_object_cache(self):
obj = Object(id='x#y', as2={})
obj.put()
self.assertNotIn(common.get_object.cache_key('x#y'), common.get_object.cache)
self.assertNotIn(common.get_object.cache_key('x'), common.get_object.cache)
self.assertNotIn(Protocol.get_object.cache_key(Protocol, 'x#y'),
Protocol.get_object.cache)
self.assertNotIn(Protocol.get_object.cache_key(Protocol, 'x'),
Protocol.get_object.cache)
def test_computed_properties_without_as1(self):
Object(id='a').put()

Wyświetl plik

@ -17,7 +17,6 @@ import requests
import activitypub
from app import app
from common import (
CONNEG_HEADERS_AS2_HTML,
CONTENT_TYPE_HTML,
redirect_unwrap,
)
@ -626,8 +625,13 @@ class WebmentionTest(testutil.TestCase):
)
def test_link_rel_alternate_as2(self, mock_get, mock_post):
mock_get.side_effect = [self.reply, self.not_fediverse,
self.orig_html_as2, self.orig_as2, self.actor]
mock_get.side_effect = [
self.reply,
self.not_fediverse,
self.orig_html_as2,
self.orig_as2,
self.actor,
]
mock_post.return_value = requests_response('abc xyz')
got = self.client.post('/webmention', data={

Wyświetl plik

@ -1,5 +1,4 @@
"""Common test utility code.
"""
"""Common test utility code."""
import copy
import datetime
import unittest
@ -19,8 +18,11 @@ import requests
from app import app, cache
import activitypub, common
from models import Object, Target
import protocol
# TODO: FakeProtocol class
class TestCase(unittest.TestCase, testutil.Asserts):
maxDiff = None
@ -28,8 +30,8 @@ class TestCase(unittest.TestCase, testutil.Asserts):
super().setUp()
app.testing = True
cache.clear()
activitypub.seen_ids.clear()
common.get_object.cache.clear()
protocol.seen_ids.clear()
protocol.Protocol.get_object.cache.clear()
self.client = app.test_client()
self.client.__enter__()
@ -80,7 +82,7 @@ class TestCase(unittest.TestCase, testutil.Asserts):
'Host': util.domain_from_link(url, minimize=False),
'Content-Type': 'application/activity+json',
'Digest': ANY,
**common.CONNEG_HEADERS_AS2_HTML,
**activitypub.CONNEG_HEADERS_AS2_HTML,
**kwargs.pop('headers', {}),
}
return self.req(url, data=None, auth=ANY, headers=headers,

Wyświetl plik

@ -21,6 +21,7 @@ from oauth_dropins.webutil.util import json_dumps, json_loads
import requests
from werkzeug.exceptions import BadGateway, HTTPException
# import module instead of individual classes/functions to avoid circular import
import activitypub
from app import app
import common
@ -67,7 +68,7 @@ class Webmention(View):
'url': id,
'object': actor_as1,
}
self.source_as2 = common.postprocess_as2({
self.source_as2 = activitypub.postprocess_as2({
'@context': 'https://www.w3.org/ns/activitystreams',
'type': 'Update',
'id': id,
@ -191,7 +192,7 @@ class Webmention(View):
target_as2 = None
if not self.source_as2:
self.source_as2 = common.postprocess_as2(
self.source_as2 = activitypub.postprocess_as2(
as2.from_as1(self.source_as1), target=target_as2, user=self.user)
if not self.source_as2.get('actor'):
self.source_as2['actor'] = common.host_url(self.user.key.id())
@ -216,8 +217,9 @@ class Webmention(View):
last_follow=self.source_as2)
try:
last = common.signed_post(inbox, user=self.user, data=self.source_as2,
log_data=log_data)
last = activitypub.signed_post(inbox, user=self.user,
data=self.source_as2,
log_data=log_data)
obj.delivered.append(target)
last_success = last
except BaseException as e:
@ -307,11 +309,13 @@ class Webmention(View):
for target in targets:
# fetch target page as AS2 object
try:
target_obj = common.get_object(target, user=self.user).as2
# TODO: make this generic across protocols
target_obj = activitypub.ActivityPub.get_object(target, user=self.user).as2
except (requests.HTTPError, BadGateway) as e:
resp = getattr(e, 'requests_response', None)
if resp and resp.ok:
if (common.content_type(resp) or '').startswith('text/html'):
type = common.content_type(resp)
if type and type.startswith('text/html'):
continue # give up
raise
@ -330,7 +334,8 @@ class Webmention(View):
if not inbox_url:
# fetch actor as AS object
actor = common.get_object(actor, user=self.user).as2
# TODO: make this generic across protocols
actor = activitypub.ActivityPub.get_object(actor, user=self.user).as2
inbox_url = actor.get('inbox')
if not inbox_url:

Wyświetl plik

@ -8,8 +8,7 @@ import mf2util
from oauth_dropins.webutil import util
from app import xrpc_server
from common import PAGE_SIZE
from models import Object, User
from models import Object, PAGE_SIZE, User
logger = logging.getLogger(__name__)

Wyświetl plik

@ -29,7 +29,7 @@ def get_followers(query_prop, output_field, user=None, limit=50, before=None):
raise ValueError(f'Unknown user {user}')
collection = 'followers' if output_field == 'followers' else 'following'
followers, before, after = common.fetch_followers(user, collection)
followers, before, after = Follower.fetch_page(user, collection)
actors = []
for follower in followers: