2017-08-15 14:39:22 +00:00
|
|
|
"""Handles inbound webmentions.
|
2017-08-26 22:20:54 +00:00
|
|
|
|
2017-09-02 03:49:00 +00:00
|
|
|
TODO tests:
|
|
|
|
* actor/attributedTo could be string URL
|
|
|
|
* salmon rel via webfinger via author.name + domain
|
2017-08-15 14:39:22 +00:00
|
|
|
"""
|
2017-09-19 14:15:38 +00:00
|
|
|
import datetime
|
2017-08-15 14:39:22 +00:00
|
|
|
import json
|
|
|
|
import logging
|
2017-08-23 15:14:51 +00:00
|
|
|
import urlparse
|
2017-08-15 14:39:22 +00:00
|
|
|
|
|
|
|
import appengine_config
|
|
|
|
|
2017-08-23 15:14:51 +00:00
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
import django_salmon
|
|
|
|
from django_salmon import magicsigs, utils
|
|
|
|
import feedparser
|
2017-10-16 00:34:21 +00:00
|
|
|
from google.appengine.api import mail
|
2017-10-18 02:39:00 +00:00
|
|
|
from granary import as2, atom, microformats2, source
|
2017-09-19 14:15:38 +00:00
|
|
|
from httpsig.requests_auth import HTTPSignatureAuth
|
2017-08-15 14:39:22 +00:00
|
|
|
import mf2py
|
|
|
|
import mf2util
|
|
|
|
from oauth_dropins.webutil import util
|
2017-08-15 14:42:29 +00:00
|
|
|
import requests
|
2017-08-15 14:39:22 +00:00
|
|
|
import webapp2
|
|
|
|
|
|
|
|
import activitypub
|
|
|
|
import common
|
2017-10-10 00:29:50 +00:00
|
|
|
from models import MagicKey, Response
|
2017-08-15 14:39:22 +00:00
|
|
|
|
|
|
|
|
|
|
|
class WebmentionHandler(webapp2.RequestHandler):
|
2017-10-10 00:29:50 +00:00
|
|
|
"""Handles inbound webmention, converts to ActivityPub or Salmon.
|
|
|
|
|
|
|
|
Instance attributes:
|
|
|
|
response: Response
|
|
|
|
"""
|
2017-08-15 14:39:22 +00:00
|
|
|
|
|
|
|
def post(self):
|
|
|
|
logging.info('Params: %s', self.request.params.items())
|
|
|
|
source = util.get_required_param(self, 'source')
|
|
|
|
target = util.get_required_param(self, 'target')
|
|
|
|
|
2017-10-16 00:34:21 +00:00
|
|
|
try:
|
2017-10-17 04:48:41 +00:00
|
|
|
msg = 'Bridgy Fed: new webmention from %s' % source
|
2017-10-16 00:34:21 +00:00
|
|
|
mail.send_mail(
|
|
|
|
sender='admin@bridgy-federated.appspotmail.com',
|
|
|
|
to='bridgy-fed@ryanb.org',
|
|
|
|
subject=msg, body=msg)
|
|
|
|
except BaseException:
|
|
|
|
logging.warning('Error sending email', exc_info=True)
|
|
|
|
|
2017-08-15 14:39:22 +00:00
|
|
|
# fetch source page, convert to ActivityStreams
|
|
|
|
resp = common.requests_get(source)
|
|
|
|
mf2 = mf2py.parse(resp.text, url=resp.url)
|
2017-09-02 03:49:00 +00:00
|
|
|
# logging.debug('Parsed mf2 for %s: %s', resp.url, json.dumps(mf2, indent=2))
|
2017-10-10 00:29:50 +00:00
|
|
|
source_url = resp.url or source
|
2017-08-15 14:39:22 +00:00
|
|
|
|
|
|
|
entry = mf2util.find_first_entry(mf2, ['h-entry'])
|
|
|
|
logging.info('First entry: %s', json.dumps(entry, indent=2))
|
|
|
|
source_obj = microformats2.json_to_object(entry)
|
|
|
|
logging.info('Converted to AS: %s', json.dumps(source_obj, indent=2))
|
|
|
|
|
2017-10-17 04:48:41 +00:00
|
|
|
# fetch target page as AS object. target is first in-reply-to, like-of,
|
|
|
|
# or repost-of, *not* target query param.)
|
|
|
|
target = util.get_url(util.get_first(source_obj, 'inReplyTo') or
|
|
|
|
util.get_first(source_obj, 'object'))
|
2017-10-02 04:43:01 +00:00
|
|
|
if not target:
|
2017-10-17 04:48:41 +00:00
|
|
|
common.error(self, 'No u-in-reply-to, u-like-of, or u-repost-of '
|
|
|
|
'found in %s' % source_url)
|
2017-10-02 04:43:01 +00:00
|
|
|
|
2017-08-23 15:14:51 +00:00
|
|
|
try:
|
2017-09-02 03:49:00 +00:00
|
|
|
resp = common.requests_get(target, headers=activitypub.CONNEG_HEADER,
|
|
|
|
log=True)
|
2017-10-10 00:29:50 +00:00
|
|
|
target_url = resp.url or target
|
2017-08-23 15:14:51 +00:00
|
|
|
except requests.HTTPError as e:
|
|
|
|
if e.response.status_code // 100 == 4:
|
2017-10-10 00:29:50 +00:00
|
|
|
return self.send_salmon(source_obj, target_url=target_url)
|
2017-08-23 15:14:51 +00:00
|
|
|
raise
|
|
|
|
|
2017-10-12 04:12:39 +00:00
|
|
|
self.response = Response.get_or_create(
|
|
|
|
source=source_url, target=target_url, direction='out',
|
2017-10-10 02:11:40 +00:00
|
|
|
source_mf2=json.dumps(mf2))
|
2017-08-26 22:20:54 +00:00
|
|
|
if resp.headers.get('Content-Type').startswith('text/html'):
|
2017-08-23 15:14:51 +00:00
|
|
|
return self.send_salmon(source_obj, target_resp=resp)
|
|
|
|
|
2017-08-26 22:20:54 +00:00
|
|
|
# find actor's inbox
|
2017-09-03 22:26:41 +00:00
|
|
|
target_obj = resp.json()
|
2017-08-26 22:20:54 +00:00
|
|
|
inbox_url = target_obj.get('inbox')
|
2017-08-15 14:39:22 +00:00
|
|
|
|
2017-08-26 22:20:54 +00:00
|
|
|
if not inbox_url:
|
2017-09-05 04:16:40 +00:00
|
|
|
# TODO: test actor/attributedTo and not, with/without inbox
|
|
|
|
actor = target_obj.get('actor') or target_obj.get('attributedTo')
|
|
|
|
if isinstance(actor, dict):
|
|
|
|
inbox_url = actor.get('inbox')
|
|
|
|
actor = actor.get('url')
|
|
|
|
if not inbox_url and not actor:
|
2017-10-15 23:57:33 +00:00
|
|
|
common.error(self, 'Target object has no actor or attributedTo URL')
|
2017-09-05 04:16:40 +00:00
|
|
|
|
|
|
|
if not inbox_url:
|
|
|
|
# fetch actor as AS object
|
|
|
|
actor = common.requests_get(actor, parse_json=True,
|
|
|
|
headers=activitypub.CONNEG_HEADER)
|
|
|
|
inbox_url = actor.get('inbox')
|
2017-08-15 14:39:22 +00:00
|
|
|
|
|
|
|
if not inbox_url:
|
2017-08-26 22:20:54 +00:00
|
|
|
# TODO: probably need a way to save errors like this so that we can
|
|
|
|
# return them if ostatus fails too.
|
2017-10-15 23:57:33 +00:00
|
|
|
# common.error(self, 'Target actor has no inbox')
|
2017-10-10 00:29:50 +00:00
|
|
|
return self.send_salmon(source_obj, target_url=target_url)
|
2017-08-26 22:20:54 +00:00
|
|
|
|
2017-09-25 00:03:03 +00:00
|
|
|
# convert to AS2
|
2017-10-10 00:29:50 +00:00
|
|
|
source_domain = urlparse.urlparse(source_url).netloc
|
|
|
|
key = MagicKey.get_or_create(source_domain)
|
2017-10-01 14:01:35 +00:00
|
|
|
source_activity = common.postprocess_as2(as2.from_as1(source_obj), key=key)
|
2017-09-03 22:26:41 +00:00
|
|
|
|
2017-10-10 01:12:17 +00:00
|
|
|
if self.response.status == 'complete':
|
|
|
|
source_activity['type'] = 'Update'
|
|
|
|
|
2017-09-19 14:15:38 +00:00
|
|
|
# prepare HTTP Signature (required by Mastodon)
|
|
|
|
# https://w3c.github.io/activitypub/#authorization-lds
|
|
|
|
# https://tools.ietf.org/html/draft-cavage-http-signatures-07
|
|
|
|
# https://github.com/tootsuite/mastodon/issues/4906#issuecomment-328844846
|
|
|
|
acct = 'acct:me@%s' % source_domain
|
|
|
|
auth = HTTPSignatureAuth(secret=key.private_pem(), key_id=acct,
|
|
|
|
algorithm='rsa-sha256')
|
|
|
|
|
|
|
|
# deliver source object to target actor's inbox.
|
|
|
|
headers = {
|
|
|
|
'Content-Type': activitypub.CONTENT_TYPE_AS,
|
|
|
|
# required for HTTP Signature
|
|
|
|
# https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3
|
|
|
|
'Date': datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT'),
|
|
|
|
}
|
2017-10-10 00:29:50 +00:00
|
|
|
common.requests_post(
|
|
|
|
urlparse.urljoin(target_url, inbox_url), json=source_activity, auth=auth,
|
2017-09-19 14:15:38 +00:00
|
|
|
headers=headers, log=True)
|
2017-08-23 15:14:51 +00:00
|
|
|
|
2017-10-10 00:29:50 +00:00
|
|
|
self.response.status = 'complete'
|
|
|
|
self.response.protocol = 'activitypub'
|
|
|
|
self.response.put()
|
|
|
|
|
2017-08-23 15:14:51 +00:00
|
|
|
def send_salmon(self, source_obj, target_url=None, target_resp=None):
|
|
|
|
# fetch target HTML page, extract Atom rel-alternate link
|
|
|
|
if target_url:
|
|
|
|
assert not target_resp
|
|
|
|
target_resp = common.requests_get(target_url)
|
|
|
|
else:
|
|
|
|
assert target_resp
|
|
|
|
# TODO: this could be different due to redirects
|
|
|
|
target_url = target_resp.url
|
|
|
|
|
|
|
|
parsed = BeautifulSoup(target_resp.content, from_encoding=target_resp.encoding)
|
|
|
|
atom_url = parsed.find('link', rel='alternate', type=common.ATOM_CONTENT_TYPE)
|
2017-09-14 13:52:18 +00:00
|
|
|
if not atom_url or not atom_url['href']:
|
|
|
|
common.error(self, 'Target post %s has no Atom link' % target_resp.url,
|
|
|
|
status=400)
|
2017-08-23 15:14:51 +00:00
|
|
|
|
2017-10-18 02:39:00 +00:00
|
|
|
# fetch Atom target post, extract and inject id into source object
|
2017-08-23 15:14:51 +00:00
|
|
|
feed = common.requests_get(atom_url['href']).text
|
|
|
|
parsed = feedparser.parse(feed)
|
2017-09-02 03:49:00 +00:00
|
|
|
logging.info('Parsed: %s', json.dumps(parsed, indent=2,
|
|
|
|
default=lambda key: '-'))
|
|
|
|
entry = parsed.entries[0]
|
|
|
|
target_id = entry.id
|
2017-10-18 02:39:00 +00:00
|
|
|
in_reply_to = source_obj.get('inReplyTo')
|
|
|
|
source_obj_obj = source_obj.get('object')
|
|
|
|
if in_reply_to:
|
|
|
|
in_reply_to[0]['id'] = target_id
|
|
|
|
elif isinstance(source_obj_obj, dict):
|
|
|
|
source_obj_obj['id'] = target_id
|
2017-08-23 15:14:51 +00:00
|
|
|
|
2017-09-02 03:49:00 +00:00
|
|
|
# Mastodon (and maybe others?) require a rel-mentioned link to the
|
|
|
|
# original post's author to make it show up as a reply:
|
|
|
|
# app/services/process_interaction_service.rb
|
|
|
|
# ...so add them as a tag, which atom renders as a rel-mention link.
|
2017-09-03 22:26:41 +00:00
|
|
|
authors = entry.get('authors', None)
|
|
|
|
if authors:
|
|
|
|
url = entry.authors[0].get('href')
|
2017-09-02 03:49:00 +00:00
|
|
|
if url:
|
|
|
|
source_obj.setdefault('tags', []).append({'url': url})
|
|
|
|
|
2017-10-18 02:39:00 +00:00
|
|
|
# extract and discover salmon endpoint
|
2017-08-23 15:14:51 +00:00
|
|
|
logging.info('Discovering Salmon endpoint in %s', atom_url['href'])
|
|
|
|
endpoint = django_salmon.discover_salmon_endpoint(feed)
|
2017-09-02 03:49:00 +00:00
|
|
|
|
|
|
|
if not endpoint:
|
|
|
|
# try webfinger
|
|
|
|
parsed = urlparse.urlparse(target_url)
|
2017-09-03 23:01:52 +00:00
|
|
|
# TODO: test missing email
|
2017-10-04 14:09:02 +00:00
|
|
|
email = entry.author_detail.get('email') or '@'.join(
|
2017-09-02 03:49:00 +00:00
|
|
|
(entry.author_detail.name, parsed.netloc))
|
|
|
|
try:
|
2017-10-04 14:09:02 +00:00
|
|
|
# TODO: always https?
|
2017-09-02 03:49:00 +00:00
|
|
|
resp = common.requests_get(
|
2017-10-04 14:09:02 +00:00
|
|
|
'%s://%s/.well-known/webfinger?resource=acct:%s' %
|
|
|
|
(parsed.scheme, parsed.netloc, email),
|
|
|
|
log=True, verify=False)
|
2017-09-02 03:49:00 +00:00
|
|
|
endpoint = django_salmon.get_salmon_replies_link(resp.json())
|
|
|
|
except requests.HTTPError as e:
|
|
|
|
pass
|
|
|
|
|
2017-08-23 15:14:51 +00:00
|
|
|
if not endpoint:
|
2017-08-26 22:20:54 +00:00
|
|
|
common.error(self, 'No salmon endpoint found!', status=400)
|
2017-08-23 15:14:51 +00:00
|
|
|
logging.info('Discovered Salmon endpoint %s', endpoint)
|
|
|
|
|
|
|
|
# construct reply Atom object
|
|
|
|
source_url = self.request.get('source')
|
2017-10-18 02:39:00 +00:00
|
|
|
activity = (source_obj if source_obj.get('verb') in source.VERBS_WITH_OBJECT
|
|
|
|
else {'object': source_obj})
|
|
|
|
entry = atom.activity_to_atom(activity, xml_base=source_url)
|
2017-08-26 22:20:54 +00:00
|
|
|
logging.info('Converted %s to Atom:\n%s', source_url, entry)
|
2017-08-23 15:14:51 +00:00
|
|
|
|
|
|
|
# sign reply and wrap in magic envelope
|
2017-10-04 14:12:03 +00:00
|
|
|
domain = urlparse.urlparse(source_url).netloc
|
2017-10-10 00:29:50 +00:00
|
|
|
key = MagicKey.get_or_create(domain)
|
2017-09-12 15:16:54 +00:00
|
|
|
logging.info('Using key for %s: %s', domain, key)
|
2017-08-23 15:14:51 +00:00
|
|
|
magic_envelope = magicsigs.magic_envelope(
|
2017-08-26 22:20:54 +00:00
|
|
|
entry, common.ATOM_CONTENT_TYPE, key)
|
2017-08-23 15:14:51 +00:00
|
|
|
|
|
|
|
logging.info('Sending Salmon slap to %s', endpoint)
|
|
|
|
common.requests_post(
|
2017-09-02 03:49:00 +00:00
|
|
|
endpoint, data=common.XML_UTF8 + magic_envelope, log=True,
|
2017-08-23 15:14:51 +00:00
|
|
|
headers={'Content-Type': common.MAGIC_ENVELOPE_CONTENT_TYPE})
|
2017-08-15 14:39:22 +00:00
|
|
|
|
2017-10-10 00:29:50 +00:00
|
|
|
self.response.status = 'complete'
|
|
|
|
self.response.protocol = 'ostatus'
|
|
|
|
self.response.put()
|
|
|
|
|
2017-08-15 14:39:22 +00:00
|
|
|
|
|
|
|
app = webapp2.WSGIApplication([
|
|
|
|
('/webmention', WebmentionHandler),
|
|
|
|
], debug=appengine_config.DEBUG)
|