chapeau/kepi/bowler_pub/delivery.py

529 wiersze
15 KiB
Python

# delivery.py
#
# Part of kepi, an ActivityPub daemon.
# Copyright (c) 2018-2019 Marnanel Thurman.
# Licensed under the GNU Public License v2.
"""
This module contains deliver(), which delivers objects
to their audiences.
"""
from __future__ import absolute_import, unicode_literals
from celery import shared_task
from kepi.bowler_pub.find import find, find_local, is_local
import kepi.bowler_pub.models
from httpsig.verify import HeaderVerifier
from urllib.parse import urlparse
from django.http.request import HttpRequest
from django.conf import settings
import django.urls
import django.utils.datastructures
import logging
import requests
import json
import datetime
import pytz
import httpsig
from collections.abc import Iterable
from . import PUBLIC_IDS
logger = logging.getLogger(name='kepi')
def _rfc822_datetime(when=None):
"""
Formats a datetime to the RFC822 standard.
(The standard is silly, because GMT should be UTC,
but we have to use it anyway.)
"""
if when is None:
when = datetime.datetime.utcnow()
else:
when.replace(tzinfo=pytz.UTC)
return datetime.datetime.utcnow().strftime("%a, %d %b %Y %T GMT")
def _find_local_actor(activity_form):
"""
Given an activity, as a dict, return the local AcActor
who apparently created it. If there is no such AcActor,
or if the AcActor is remote, or if there's no Actor
at all, return None.
If the activity has no "actor" field, we use
the "attributedTo" field.
"""
from kepi.bowler_pub.models.acobject import AcObject
parts = None
for fieldname in ['actor', 'attributedTo']:
if fieldname in activity_form:
value = activity_form[fieldname]
if isinstance(value, AcObject):
return value
else:
parts = urlparse(activity_form[fieldname])
break
if parts is None:
return None
if parts.hostname not in settings.ALLOWED_HOSTS:
return None
return find_local(parts.path)
def _recipients_to_inboxes(recipients,
local_actor=None):
"""
Find inbox URLs for a set of recipients.
"recipients" is an iterable of strings, each the ID of
a recipient of a message. (These IDs will be URLs.)
Returns a set of strings of URLs for their inboxes.
Shared inboxes are preferred. This being a set,
there will be no duplicates.
"""
logger.info('Looking up inboxes for: %s',
recipients)
inboxes = set()
recipients = sorted(list(recipients))
original_recipients = recipients.copy()
for recipient in recipients:
if recipient in PUBLIC_IDS:
if local_actor is not None:
logger.debug(' -- treating public as %s\'s outbox',
local_actor)
inboxes.add(local_actor['outbox'])
else:
logger.debug(' -- ignoring public')
continue
discovered = find(recipient)
if discovered is None:
logger.debug(' -- "%s" doesn\'t exist; dropping',
recipient)
continue
logger.debug(' -- "%s" found as %s',
recipient, discovered)
if is_local(recipient):
if hasattr(discovered, 'first'):
logger.debug(' -- %s is a local collection',
recipient)
new_recipients = set([f.follower for f in discovered])
new_recipients = new_recipients.difference(
original_recipients)
logger.debug(' -- we add: %s',
new_recipients)
recipients.extend(new_recipients)
else:
logger.debug(' -- %s is local; use directly',
recipient)
inboxes.add(recipient)
continue
# so, it exists and it's remote
if 'type' not in discovered:
logger.debug(' -- has no type (weird)')
elif discovered['type'] in ['Collection', 'OrderedCollection']:
if recipient not in original_recipients:
logger.debug(' -- is a collection, but we\'re too deep; ignoring')
continue
logger.debug(' -- is a collection')
# XXX add checks to make sure we don't loop forever on duff data
page_url = discovered.get('first', None)
while page_url is not None:
logger.debug(' -- loading page %s', page_url)
page = find(page_url)
page_url = None
items = []
if page is None:
logger.debug(' -- and that\'s missing')
elif page.get('type', None) not in ['CollectionPage', 'OrderedCollectionPage']:
logger.debug(' -- which has a weird type; ignoring')
elif page.get('partOf', None)!=recipient:
logger.debug(' -- which belongs to someone else; ignoring')
elif 'orderedItems' in page:
items = page['orderedItems']
elif 'items' in page:
items = page['items']
if items:
logger.debug(' -- items are %s', items)
for item in items:
if item not in recipients:
logger.debug(' -- adding %s to recipients', item)
recipients.append(item)
if page is not None:
page_url = page.get('next', None)
logger.debug(' -- all loaded')
elif discovered['type'] in ['Actor', 'Person']:
if 'endpoints' in discovered and \
isinstance(discovered['endpoints'], Iterable) and \
'sharedInbox' in discovered['endpoints']:
logger.debug(' -- has a shared inbox at %s',
discovered['endpoints']['sharedInbox'])
inboxes.add(discovered['endpoints']['sharedInbox'])
elif 'inbox' in discovered and discovered['inbox'] is not None:
logger.debug(' -- has a sole inbox at %s',
discovered['inbox'])
inboxes.add(discovered['inbox'])
else:
logger.debug(' -- has no obvious inbox; dropping')
else:
logger.warn(' -- remote object is an unexpected type')
logger.info('Found inboxes: %s', inboxes)
return inboxes
def _activity_form_to_outgoing_string(activity_form):
"""
Formats an activity ready to be sent out as
an HTTP response.
"""
from kepi.bowler_pub import ATSIGN_CONTEXT
from kepi.bowler_pub.utils import as_json
format_for_delivery = activity_form.copy()
for blind_field in ['bto', 'bcc']:
if blind_field in format_for_delivery:
del format_for_delivery[blind_field]
if '@context' not in format_for_delivery:
format_for_delivery['@context'] = ATSIGN_CONTEXT
message = as_json(
format_for_delivery,
)
return message
def _signer_for_local_actor(local_actor):
"""
Given an Actor object representing a local actor,
return an httpsig.HeaderSigner object which can
sign headers for them.
"""
if local_actor is None:
logger.info('not signing outgoing messages because we have no known actor')
return None
if local_actor.privateKey is None:
logger.warn('not signing outgoing messages because local actor %s '+\
'has no private key!', local_actor)
return None
try:
return httpsig.HeaderSigner(
key_id=local_actor.key_name,
secret=local_actor.privateKey,
algorithm='rsa-sha256',
headers=['(request-target)', 'host', 'date', 'content-type'],
sign_header='signature',
)
except httpsig.utils.HttpSigException as hse:
logger.warn('Local private key was not honoured.')
logger.warn('This should never happen!')
logger.warn('Error was: %s', str(hse))
logger.warn('Key was: %s', local_actor.privateKey)
return None
class LocalDeliveryRequest(HttpRequest):
"""
These are fake HttpRequests which we send to the views
as an ACTIVITY_STORE method. For more information,
see the docstring in views/.
"""
def __init__(self, content, activity, path, *args, **kwargs):
super().__init__(*args, **kwargs)
self.method = 'ACTIVITY_STORE'
self.headers = {
'Content-Type': 'application/activity+json',
}
self._content = bytes(content, encoding='UTF-8')
self.activity = activity
self.path = path
@property
def body(self):
return self._content
def _deliver_local(
activity,
inbox,
parsed_target_url,
message,
):
"""
Deliver an activity to a local actor.
Keyword arguments:
activity -- the activity we're delivering.
inbox -- the URL of the inbox, only used in logging
parsed_target_url -- the result of urlparse(url)
message -- the activity as a formatted string
(as it would appear if we were sending this out over HTTP)
"""
logger.debug('%s: %s is local',
activity, inbox)
try:
resolved = django.urls.resolve(parsed_target_url.path)
except django.urls.Resolver404:
logger.debug('%s: -- not found', parsed_target_url.path)
return
logger.debug('%s is handled by %s',
parsed_target_url.path, resolved)
request = LocalDeliveryRequest(
content = message,
activity = activity,
path = parsed_target_url.path,
)
result = resolved.func(request,
**resolved.kwargs,
local = True,
)
if result:
logger.debug('%s: resulting in %s %s', parsed_target_url.path,
result.status_code, result.reason_phrase)
else:
logger.debug('%s: done', parsed_target_url.path)
def _deliver_remote(
activity,
inbox,
parsed_target_url,
message,
signer,
):
"""
Deliver an activity to a remote actor.
Keyword arguments:
activity -- the activity we're delivering.
inbox -- the URL of the inbox
parsed_target_url -- the result of urlparse(url)
message -- the activity as a formatted string
(as it would appear if we were sending this out over HTTP)
signer -- an httpsig.HeaderSigner for the
local actor who sent this activity, or None
if there isn't one.
"""
headers = {
'Date': _rfc822_datetime(),
'Host': parsed_target_url.netloc,
# lowercase is deliberate, to work around
# an infelicity of the signer library
'content-type': "application/activity+json",
}
if signer is not None:
headers = signer.sign(
headers,
method = 'POST',
path = parsed_target_url.path,
)
logger.debug('%s: %s: headers are %s',
activity, inbox, headers)
response = requests.post(
inbox,
data=message,
headers=headers,
)
logger.debug('%s: %s: posted. Server replied: %s %s',
activity, inbox, response.status_code, response.reason)
if response.status_code>=400 and response.status_code<=499 and \
(response.status_code not in [404, 410]):
# The server thinks we made an error. Log the request we made
# so that we can debug it.
logger.debug(" -- for debugging: our signer was %s",
signer.__dict__)
logger.debug(" -- and this is how the message ran:")
logger.debug("%s\n%s", headers, message)
@shared_task()
def deliver(
activity_id,
incoming = False,
):
"""
Deliver an activity to an actor.
Keyword arguments:
activity_id -- the "id" field of an Activity
incoming -- True if we just received this, False otherwise
This function is a shared task; it will be run by Celery behind
the scenes.
"""
try:
activity = kepi.bowler_pub.models.AcActivity.objects.get(id=activity_id)
except kepi.bowler_pub.models.AcActivity.DoesNotExist:
logger.warn("Can't deliver activity %s because it doesn't exist",
activity_id)
return None
logger.info('%s: begin delivery; incoming==%s',
activity, incoming)
activity_form = activity.activity_form
logger.debug('%s: full form is %s',
activity, activity_form)
local_actor = _find_local_actor(activity_form)
logger.debug('%s: local actor is %s',
activity, local_actor)
recipients = set()
for field in ['to', 'bto', 'cc', 'bcc', 'audience']:
if field in activity_form:
recipients.update(activity_form[field])
if local_actor is not None:
if incoming:
# Actors don't get told about their own (incoming) activities
if local_actor.url in recipients:
logger.info(' -- removing actor from recipients')
recipients.remove(local_actor.url)
else:
# but if it originated locally, the status should appear in the
# actor's own inbox too
if local_actor.url not in recipients:
logger.info(' -- adding actor to recipients')
recipients.add(local_actor.url)
if not recipients:
logger.debug('%s: there are no recipients; giving up',
activity)
return
logger.debug('%s: recipients are %s',
activity, recipients)
if incoming:
inboxes = recipients
message = ''
signer = None
else:
# Dereference collections.
inboxes = _recipients_to_inboxes(recipients,
local_actor=local_actor)
if not inboxes:
logger.debug('%s: there are no inboxes to send to; giving up',
activity)
return
logger.debug('%s: inboxes are %s',
activity, inboxes)
message = _activity_form_to_outgoing_string(
activity_form = activity_form,
)
signer = _signer_for_local_actor(
local_actor = local_actor,
)
for inbox in inboxes:
logger.debug('%s: %s: begin delivery',
activity, inbox)
if inbox is None:
logger.warn(' -- attempt to deliver to None (but why?)')
continue
if inbox in PUBLIC_IDS:
logger.debug(" -- mustn't deliver to Public")
continue
parsed_target_url = urlparse(inbox,
allow_fragments = False,
)
is_local = parsed_target_url.hostname in settings.ALLOWED_HOSTS
if is_local:
_deliver_local(
activity,
inbox,
parsed_target_url,
message,
)
else:
if incoming:
logger.debug(' -- target is remote; ignoring')
continue
_deliver_remote(
activity,
inbox,
parsed_target_url,
message,
signer,
)
logger.debug('%s: message posted to all inboxes',
activity)