2017-08-15 14:39:22 +00:00
|
|
|
"""Handles inbound webmentions.
|
2017-08-26 22:20:54 +00:00
|
|
|
|
2017-09-02 03:49:00 +00:00
|
|
|
TODO tests:
|
|
|
|
* actor/attributedTo could be string URL
|
|
|
|
* salmon rel via webfinger via author.name + domain
|
2017-08-15 14:39:22 +00:00
|
|
|
"""
|
2017-09-19 14:15:38 +00:00
|
|
|
import datetime
|
2017-08-15 14:39:22 +00:00
|
|
|
import json
|
|
|
|
import logging
|
2018-11-27 15:27:00 +00:00
|
|
|
import urllib
|
2017-08-23 15:14:51 +00:00
|
|
|
import urlparse
|
2017-08-15 14:39:22 +00:00
|
|
|
|
|
|
|
import appengine_config
|
|
|
|
|
2017-08-23 15:14:51 +00:00
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
import django_salmon
|
|
|
|
from django_salmon import magicsigs, utils
|
|
|
|
import feedparser
|
2017-10-16 00:34:21 +00:00
|
|
|
from google.appengine.api import mail
|
2018-11-13 15:26:50 +00:00
|
|
|
from google.appengine.ext.ndb import Key
|
2017-10-18 02:39:00 +00:00
|
|
|
from granary import as2, atom, microformats2, source
|
2017-08-15 14:39:22 +00:00
|
|
|
import mf2py
|
|
|
|
import mf2util
|
|
|
|
from oauth_dropins.webutil import util
|
2017-08-15 14:42:29 +00:00
|
|
|
import requests
|
2017-08-15 14:39:22 +00:00
|
|
|
import webapp2
|
2017-10-20 14:49:25 +00:00
|
|
|
from webob import exc
|
2017-08-15 14:39:22 +00:00
|
|
|
|
|
|
|
import activitypub
|
|
|
|
import common
|
2018-11-13 15:26:50 +00:00
|
|
|
from models import Follower, MagicKey, Response
|
2017-08-15 14:39:22 +00:00
|
|
|
|
2018-02-02 15:41:37 +00:00
|
|
|
SKIP_EMAIL_DOMAINS = frozenset(('localhost', 'snarfed.org'))
|
|
|
|
|
2017-08-15 14:39:22 +00:00
|
|
|
|
|
|
|
class WebmentionHandler(webapp2.RequestHandler):
|
2018-11-13 15:26:50 +00:00
|
|
|
"""Handles inbound webmention, converts to ActivityPub or Salmon."""
|
|
|
|
source_url = None # string
|
|
|
|
source_domain = None # string
|
|
|
|
source_mf2 = None # parsed mf2 dict
|
|
|
|
source_obj = None # parsed AS1 dict
|
2018-11-19 00:58:52 +00:00
|
|
|
target_resp = None # requests.Response
|
2017-08-15 14:39:22 +00:00
|
|
|
|
|
|
|
def post(self):
|
2017-10-26 14:30:52 +00:00
|
|
|
logging.info('(Params: %s )', self.request.params.items())
|
2017-08-15 14:39:22 +00:00
|
|
|
|
2018-11-13 15:26:50 +00:00
|
|
|
# fetch source page
|
2018-04-02 14:35:47 +00:00
|
|
|
source = util.get_required_param(self, 'source')
|
2018-11-13 15:26:50 +00:00
|
|
|
source_resp = common.requests_get(source)
|
|
|
|
self.source_url = source_resp.url or source
|
|
|
|
self.source_domain = urlparse.urlparse(self.source_url).netloc.split(':')[0]
|
|
|
|
self.source_mf2 = mf2py.parse(source_resp.text, url=self.source_url, img_with_alt=True)
|
|
|
|
# logging.debug('Parsed mf2 for %s: %s', source_resp.url, json.dumps(self.source_mf2, indent=2))
|
|
|
|
|
2018-11-27 15:27:00 +00:00
|
|
|
# check for backlink to bridgy fed (for webmention spec and to confirm
|
|
|
|
# source's intent to federate to mastodon)
|
|
|
|
if (self.request.host_url not in source_resp.text and
|
|
|
|
urllib.quote(self.request.host_url, safe='') not in source_resp.text):
|
|
|
|
common.error(self, "Couldn't find link to %s" % self.request.host_url)
|
|
|
|
|
2018-11-13 15:26:50 +00:00
|
|
|
# convert source page to ActivityStreams
|
|
|
|
entry = mf2util.find_first_entry(self.source_mf2, ['h-entry'])
|
|
|
|
if not entry:
|
|
|
|
common.error(self, 'No microformats2 found on %s' % self.source_url)
|
|
|
|
|
|
|
|
logging.info('First entry: %s', json.dumps(entry, indent=2))
|
|
|
|
# make sure it has url, since we use that for AS2 id, which is required
|
|
|
|
# for ActivityPub.
|
|
|
|
props = entry.setdefault('properties', {})
|
|
|
|
if not props.get('url'):
|
|
|
|
props['url'] = [self.source_url]
|
|
|
|
|
|
|
|
self.source_obj = microformats2.json_to_object(entry, fetch_mf2=True)
|
|
|
|
logging.info('Converted to AS1: %s', json.dumps(self.source_obj, indent=2))
|
|
|
|
|
|
|
|
self.try_activitypub() or self.try_salmon()
|
|
|
|
|
2018-11-24 05:28:02 +00:00
|
|
|
# if self.source_domain not in SKIP_EMAIL_DOMAINS:
|
|
|
|
# try:
|
|
|
|
# msg = 'Bridgy Fed: new webmention from %s' % source
|
|
|
|
# mail.send_mail(
|
|
|
|
# sender='admin@bridgy-federated.appspotmail.com',
|
|
|
|
# to='bridgy-fed@ryanb.org',
|
|
|
|
# subject=msg, body=msg)
|
|
|
|
# except BaseException:
|
|
|
|
# logging.warning('Error sending email', exc_info=True)
|
2018-04-02 14:35:47 +00:00
|
|
|
|
2017-10-26 14:30:52 +00:00
|
|
|
def try_activitypub(self):
|
2018-11-13 15:26:50 +00:00
|
|
|
"""Returns True if we attempted ActivityPub delivery, False otherwise."""
|
|
|
|
targets = self._activitypub_targets()
|
|
|
|
if not targets:
|
|
|
|
return False
|
2017-10-26 14:30:52 +00:00
|
|
|
|
2018-11-13 15:26:50 +00:00
|
|
|
key = MagicKey.get_or_create(self.source_domain)
|
2018-12-11 16:00:38 +00:00
|
|
|
error = None
|
2018-11-13 15:26:50 +00:00
|
|
|
delivered = set() # inboxes we've delivered to
|
2017-08-15 14:39:22 +00:00
|
|
|
|
2018-11-13 15:26:50 +00:00
|
|
|
# TODO: collect by inbox, add 'to' fields, de-dupe inboxes and recipients
|
2018-03-27 14:04:33 +00:00
|
|
|
|
2018-11-13 15:26:50 +00:00
|
|
|
for resp, inbox in targets:
|
2018-11-20 16:22:26 +00:00
|
|
|
target_obj = json.loads(resp.target_as2) if resp.target_as2 else None
|
2018-11-13 15:26:50 +00:00
|
|
|
source_activity = common.postprocess_as2(
|
2018-11-20 16:22:26 +00:00
|
|
|
as2.from_as1(self.source_obj), target=target_obj, key=key)
|
2018-11-13 15:26:50 +00:00
|
|
|
|
|
|
|
if resp.status == 'complete':
|
|
|
|
source_activity['type'] = 'Update'
|
|
|
|
|
|
|
|
try:
|
|
|
|
last = activitypub.send(source_activity, inbox, self.source_domain)
|
|
|
|
resp.status = 'complete'
|
2018-12-11 16:00:38 +00:00
|
|
|
except BaseException as e:
|
|
|
|
error = e
|
2018-11-13 15:26:50 +00:00
|
|
|
resp.status = 'error'
|
|
|
|
|
|
|
|
resp.put()
|
|
|
|
|
|
|
|
# Pass the AP response status code and body through as our response
|
2018-12-11 16:00:38 +00:00
|
|
|
if not error:
|
|
|
|
self.response.status_int = last.status_code
|
|
|
|
self.response.write(last.text)
|
|
|
|
elif isinstance(error, requests.HTTPError):
|
|
|
|
self.response.status_int = error.status_code
|
|
|
|
self.response.write(error.text)
|
|
|
|
else:
|
|
|
|
self.response.write(unicode(error))
|
2018-11-13 15:26:50 +00:00
|
|
|
|
|
|
|
return not error
|
|
|
|
|
2018-11-19 00:58:52 +00:00
|
|
|
def _single_target(self):
|
|
|
|
"""
|
|
|
|
Returns: string URL, the source's inReplyTo or object (if appropriate)
|
|
|
|
"""
|
|
|
|
target = util.get_first(self.source_obj, 'inReplyTo')
|
|
|
|
if target:
|
|
|
|
return util.get_url(target)
|
|
|
|
|
|
|
|
if self.source_obj.get('verb') in source.VERBS_WITH_OBJECT:
|
|
|
|
return util.get_url(util.get_first(self.source_obj, 'object'))
|
2018-11-13 15:26:50 +00:00
|
|
|
|
|
|
|
def _activitypub_targets(self):
|
|
|
|
"""
|
|
|
|
Returns: list of (Response, string inbox URL)
|
|
|
|
"""
|
|
|
|
# if there's an in-reply-to, like-of, or repost-of, that's the target.
|
|
|
|
# otherwise, it's all followers' inboxes.
|
2018-11-19 00:58:52 +00:00
|
|
|
target = self._single_target()
|
2017-08-15 14:39:22 +00:00
|
|
|
|
2017-10-02 04:43:01 +00:00
|
|
|
if not target:
|
2018-11-13 15:26:50 +00:00
|
|
|
# interpret this as a Create or Update, deliver it to followers
|
2018-11-20 16:37:57 +00:00
|
|
|
inboxes = []
|
|
|
|
for follower in Follower.query().filter(
|
2018-11-13 15:26:50 +00:00
|
|
|
Follower.key > Key('Follower', self.source_domain + ' '),
|
2018-11-20 16:37:57 +00:00
|
|
|
Follower.key < Key('Follower', self.source_domain + chr(ord(' ') + 1))):
|
|
|
|
if follower.last_follow:
|
2018-11-20 17:47:01 +00:00
|
|
|
actor = json.loads(follower.last_follow).get('actor')
|
|
|
|
if actor and isinstance(actor, dict):
|
|
|
|
inboxes.append(actor.get('endpoints', {}).get('sharedInbox') or
|
|
|
|
actor.get('publicInbox')or
|
|
|
|
actor.get('inbox'))
|
2018-11-13 15:26:50 +00:00
|
|
|
return [(Response.get_or_create(
|
|
|
|
source=self.source_url, target=inbox, direction='out',
|
|
|
|
protocol='activitypub', source_mf2=json.dumps(self.source_mf2)),
|
|
|
|
inbox)
|
|
|
|
for inbox in inboxes if inbox]
|
2017-10-02 04:43:01 +00:00
|
|
|
|
2018-11-13 15:26:50 +00:00
|
|
|
if not target:
|
|
|
|
return [] # give up
|
|
|
|
|
|
|
|
# fetch target page as AS2 object
|
2017-08-23 15:14:51 +00:00
|
|
|
try:
|
2018-11-19 00:58:52 +00:00
|
|
|
self.target_resp = common.get_as2(target)
|
2017-10-20 14:49:25 +00:00
|
|
|
except (requests.HTTPError, exc.HTTPBadGateway) as e:
|
2018-11-19 00:58:52 +00:00
|
|
|
self.target_resp = e.response
|
2017-10-20 14:49:25 +00:00
|
|
|
if (e.response.status_code // 100 == 2 and
|
2017-10-21 03:35:07 +00:00
|
|
|
common.content_type(e.response).startswith('text/html')):
|
2018-11-13 15:26:50 +00:00
|
|
|
# TODO: pass e.response to try_salmon()'s target_resp
|
|
|
|
return False # make post() try Salmon
|
|
|
|
else:
|
|
|
|
raise
|
2018-11-19 00:58:52 +00:00
|
|
|
target_url = self.target_resp.url or target
|
2017-08-23 15:14:51 +00:00
|
|
|
|
2018-11-13 15:26:50 +00:00
|
|
|
resp = Response.get_or_create(
|
|
|
|
source=self.source_url, target=target_url, direction='out',
|
|
|
|
protocol='activitypub', source_mf2=json.dumps(self.source_mf2))
|
|
|
|
|
|
|
|
# find target's inbox
|
2018-11-19 00:58:52 +00:00
|
|
|
target_obj = self.target_resp.json()
|
2018-11-20 16:22:26 +00:00
|
|
|
resp.target_as2 = json.dumps(target_obj)
|
2017-08-26 22:20:54 +00:00
|
|
|
inbox_url = target_obj.get('inbox')
|
2017-08-15 14:39:22 +00:00
|
|
|
|
2017-08-26 22:20:54 +00:00
|
|
|
if not inbox_url:
|
2017-09-05 04:16:40 +00:00
|
|
|
# TODO: test actor/attributedTo and not, with/without inbox
|
2018-12-11 15:45:14 +00:00
|
|
|
actor = (util.get_first(target_obj, 'actor') or
|
|
|
|
util.get_first(target_obj, 'attributedTo'))
|
2017-09-05 04:16:40 +00:00
|
|
|
if isinstance(actor, dict):
|
|
|
|
inbox_url = actor.get('inbox')
|
2018-12-11 15:45:14 +00:00
|
|
|
actor = actor.get('url') or actor.get('id')
|
2017-09-05 04:16:40 +00:00
|
|
|
if not inbox_url and not actor:
|
2018-12-11 15:45:14 +00:00
|
|
|
common.error(self, 'Target object has no actor or attributedTo with URL or id.')
|
|
|
|
elif not isinstance(actor, basestring):
|
|
|
|
common.error(self, 'Target actor or attributedTo has unexpected url or id object: %r' % actor)
|
2017-09-05 04:16:40 +00:00
|
|
|
|
|
|
|
if not inbox_url:
|
|
|
|
# fetch actor as AS object
|
2017-10-20 14:49:25 +00:00
|
|
|
actor = common.get_as2(actor).json()
|
2017-09-05 04:16:40 +00:00
|
|
|
inbox_url = actor.get('inbox')
|
2017-08-15 14:39:22 +00:00
|
|
|
|
|
|
|
if not inbox_url:
|
2017-08-26 22:20:54 +00:00
|
|
|
# TODO: probably need a way to save errors like this so that we can
|
|
|
|
# return them if ostatus fails too.
|
2017-10-15 23:57:33 +00:00
|
|
|
# common.error(self, 'Target actor has no inbox')
|
2018-11-13 15:26:50 +00:00
|
|
|
return []
|
2017-09-03 22:26:41 +00:00
|
|
|
|
2017-10-21 23:42:37 +00:00
|
|
|
inbox_url = urlparse.urljoin(target_url, inbox_url)
|
2018-11-13 15:26:50 +00:00
|
|
|
return [(resp, inbox_url)]
|
2017-08-23 15:14:51 +00:00
|
|
|
|
2018-11-19 00:58:52 +00:00
|
|
|
def try_salmon(self):
|
2018-11-13 15:26:50 +00:00
|
|
|
"""Returns True if we attempted OStatus delivery. Raises otherwise."""
|
2018-11-19 00:58:52 +00:00
|
|
|
target = self.target_resp.url if self.target_resp else self._single_target()
|
2018-11-27 15:44:27 +00:00
|
|
|
if not target:
|
|
|
|
logging.warning("No targets or followers. Ignoring.")
|
|
|
|
return False
|
|
|
|
|
2018-11-13 15:26:50 +00:00
|
|
|
resp = Response.get_or_create(
|
2018-11-19 00:58:52 +00:00
|
|
|
source=self.source_url, target=target, direction='out',
|
|
|
|
source_mf2=json.dumps(self.source_mf2))
|
2018-11-13 15:26:50 +00:00
|
|
|
resp.protocol = 'ostatus'
|
2017-10-10 00:29:50 +00:00
|
|
|
|
2018-11-13 15:26:50 +00:00
|
|
|
try:
|
2018-11-19 00:58:52 +00:00
|
|
|
ret = self._try_salmon(resp)
|
2018-11-13 15:26:50 +00:00
|
|
|
resp.status = 'complete'
|
2018-11-19 00:58:52 +00:00
|
|
|
return ret
|
2018-11-13 15:26:50 +00:00
|
|
|
except:
|
|
|
|
resp.status = 'error'
|
|
|
|
raise
|
|
|
|
finally:
|
|
|
|
resp.put()
|
|
|
|
|
2018-11-19 00:58:52 +00:00
|
|
|
def _try_salmon(self, resp):
|
2018-11-13 15:26:50 +00:00
|
|
|
"""
|
|
|
|
Args:
|
|
|
|
resp: Response
|
|
|
|
"""
|
2017-08-23 15:14:51 +00:00
|
|
|
# fetch target HTML page, extract Atom rel-alternate link
|
2018-11-19 00:58:52 +00:00
|
|
|
if not self.target_resp:
|
|
|
|
self.target_resp = common.requests_get(resp.target())
|
2017-08-23 15:14:51 +00:00
|
|
|
|
2018-11-19 00:58:52 +00:00
|
|
|
parsed = common.beautifulsoup_parse(self.target_resp.content,
|
|
|
|
from_encoding=self.target_resp.encoding)
|
2017-10-20 14:49:25 +00:00
|
|
|
atom_url = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM)
|
|
|
|
if not atom_url or not atom_url.get('href'):
|
2018-11-13 15:26:50 +00:00
|
|
|
common.error(self, 'Target post %s has no Atom link' % resp.target(),
|
2017-09-14 13:52:18 +00:00
|
|
|
status=400)
|
2017-08-23 15:14:51 +00:00
|
|
|
|
2017-10-18 02:39:00 +00:00
|
|
|
# fetch Atom target post, extract and inject id into source object
|
2017-08-23 15:14:51 +00:00
|
|
|
feed = common.requests_get(atom_url['href']).text
|
|
|
|
parsed = feedparser.parse(feed)
|
2017-09-02 03:49:00 +00:00
|
|
|
logging.info('Parsed: %s', json.dumps(parsed, indent=2,
|
|
|
|
default=lambda key: '-'))
|
|
|
|
entry = parsed.entries[0]
|
|
|
|
target_id = entry.id
|
2018-11-13 15:26:50 +00:00
|
|
|
in_reply_to = self.source_obj.get('inReplyTo')
|
|
|
|
source_obj_obj = self.source_obj.get('object')
|
2017-10-18 02:39:00 +00:00
|
|
|
if in_reply_to:
|
|
|
|
in_reply_to[0]['id'] = target_id
|
|
|
|
elif isinstance(source_obj_obj, dict):
|
|
|
|
source_obj_obj['id'] = target_id
|
2017-08-23 15:14:51 +00:00
|
|
|
|
2017-09-02 03:49:00 +00:00
|
|
|
# Mastodon (and maybe others?) require a rel-mentioned link to the
|
|
|
|
# original post's author to make it show up as a reply:
|
|
|
|
# app/services/process_interaction_service.rb
|
|
|
|
# ...so add them as a tag, which atom renders as a rel-mention link.
|
2017-09-03 22:26:41 +00:00
|
|
|
authors = entry.get('authors', None)
|
|
|
|
if authors:
|
|
|
|
url = entry.authors[0].get('href')
|
2017-09-02 03:49:00 +00:00
|
|
|
if url:
|
2018-11-13 15:26:50 +00:00
|
|
|
self.source_obj.setdefault('tags', []).append({'url': url})
|
2017-09-02 03:49:00 +00:00
|
|
|
|
2017-10-18 02:39:00 +00:00
|
|
|
# extract and discover salmon endpoint
|
2017-08-23 15:14:51 +00:00
|
|
|
logging.info('Discovering Salmon endpoint in %s', atom_url['href'])
|
|
|
|
endpoint = django_salmon.discover_salmon_endpoint(feed)
|
2017-09-02 03:49:00 +00:00
|
|
|
|
|
|
|
if not endpoint:
|
|
|
|
# try webfinger
|
2018-11-13 15:26:50 +00:00
|
|
|
parsed = urlparse.urlparse(resp.target())
|
2017-09-03 23:01:52 +00:00
|
|
|
# TODO: test missing email
|
2017-10-04 14:09:02 +00:00
|
|
|
email = entry.author_detail.get('email') or '@'.join(
|
2017-09-02 03:49:00 +00:00
|
|
|
(entry.author_detail.name, parsed.netloc))
|
|
|
|
try:
|
2017-10-04 14:09:02 +00:00
|
|
|
# TODO: always https?
|
2018-11-19 00:58:52 +00:00
|
|
|
profile = common.requests_get(
|
2017-10-04 14:09:02 +00:00
|
|
|
'%s://%s/.well-known/webfinger?resource=acct:%s' %
|
2017-10-20 14:49:25 +00:00
|
|
|
(parsed.scheme, parsed.netloc, email), verify=False)
|
2018-11-19 00:58:52 +00:00
|
|
|
endpoint = django_salmon.get_salmon_replies_link(profile.json())
|
2017-09-02 03:49:00 +00:00
|
|
|
except requests.HTTPError as e:
|
|
|
|
pass
|
|
|
|
|
2017-08-23 15:14:51 +00:00
|
|
|
if not endpoint:
|
2017-08-26 22:20:54 +00:00
|
|
|
common.error(self, 'No salmon endpoint found!', status=400)
|
2017-08-23 15:14:51 +00:00
|
|
|
logging.info('Discovered Salmon endpoint %s', endpoint)
|
|
|
|
|
|
|
|
# construct reply Atom object
|
2018-11-13 15:26:50 +00:00
|
|
|
self.source_url = resp.source()
|
|
|
|
activity = self.source_obj
|
|
|
|
if self.source_obj.get('verb') not in source.VERBS_WITH_OBJECT:
|
|
|
|
activity = {'object': self.source_obj}
|
|
|
|
entry = atom.activity_to_atom(activity, xml_base=self.source_url)
|
|
|
|
logging.info('Converted %s to Atom:\n%s', self.source_url, entry)
|
2017-08-23 15:14:51 +00:00
|
|
|
|
|
|
|
# sign reply and wrap in magic envelope
|
2018-11-13 15:26:50 +00:00
|
|
|
domain = urlparse.urlparse(self.source_url).netloc
|
2017-10-10 00:29:50 +00:00
|
|
|
key = MagicKey.get_or_create(domain)
|
2017-09-12 15:16:54 +00:00
|
|
|
logging.info('Using key for %s: %s', domain, key)
|
2017-08-23 15:14:51 +00:00
|
|
|
magic_envelope = magicsigs.magic_envelope(
|
2017-10-20 14:49:25 +00:00
|
|
|
entry, common.CONTENT_TYPE_ATOM, key)
|
2017-08-23 15:14:51 +00:00
|
|
|
|
|
|
|
logging.info('Sending Salmon slap to %s', endpoint)
|
|
|
|
common.requests_post(
|
2017-10-20 14:49:25 +00:00
|
|
|
endpoint, data=common.XML_UTF8 + magic_envelope,
|
|
|
|
headers={'Content-Type': common.CONTENT_TYPE_MAGIC_ENVELOPE})
|
2018-11-13 15:26:50 +00:00
|
|
|
return True
|
2017-08-15 14:39:22 +00:00
|
|
|
|
|
|
|
|
|
|
|
app = webapp2.WSGIApplication([
|
|
|
|
('/webmention', WebmentionHandler),
|
|
|
|
], debug=appengine_config.DEBUG)
|