translate incoming webmentions to outgoing salmon slaps

mastodon
Ryan Barrett 2017-08-23 08:14:51 -07:00
rodzic 0fe3e7c3d4
commit f6deddeb96
8 zmienionych plików z 229 dodań i 39 usunięć

Wyświetl plik

@ -1,8 +1,8 @@
"""Misc common utilities.
"""
import json
import logging
from oauth_dropins.webutil import util
import requests
from webob import exc
@ -10,28 +10,36 @@ DOMAIN_RE = r'([^/]+\.[^/]+)'
HEADERS = {
'User-Agent': 'Bridgy Fed (https://fed.brid.gy/)',
}
ATOM_CONTENT_TYPE = 'application/atom+xml'
MAGIC_ENVELOPE_CONTENT_TYPE = 'application/magic-envelope+xml'
def requests_get(url, **kwargs):
return _requests_fn(requests.get, url, **kwargs)
return _requests_fn(util.requests_get, url, **kwargs)
def requests_post(url, **kwargs):
return _requests_fn(requests.post, url, **kwargs)
return _requests_fn(util.requests_post, url, **kwargs)
def _requests_fn(fn, url, json=False, **kwargs):
def _requests_fn(fn, url, parse_json=False, **kwargs):
"""Wraps requests.* and adds raise_for_status() and User-Agent."""
kwargs.setdefault('headers', {}).update(HEADERS)
resp = fn(url, **kwargs)
resp.raise_for_status()
if json:
if parse_json:
try:
return resp.json()
except ValueError:
msg = "Couldn't parse response as JSON"
logging.error(msg, exc_info=True)
raise exc.HTTPBadRequest(400, msg)
raise exc.HTTPBadRequest(msg)
return resp
def error(handler, msg, status=400):
logging.info(msg)
handler.abort(status, msg)

Wyświetl plik

@ -1,5 +1,7 @@
-e git+https://github.com/snarfed/django-salmon.git#egg=django_salmon
-e git+https://github.com/snarfed/webmention-tools.git#egg=webmentiontools
bs4
feedparser
granary
mf2py>=1.0.4
mf2util>=0.5.0

Wyświetl plik

@ -36,17 +36,17 @@ class SlapHandler(webapp2.RequestHandler):
if ':' not in author:
author = 'acct:%s' % author
elif not author.startswith('acct:'):
self.error('Author URI %s has unsupported scheme; expected acct:' % author)
common.error(self, 'Author URI %s has unsupported scheme; expected acct:' % author)
logging.info('Fetching Salmon key for %s' % author)
if not magicsigs.verify(author, data, parsed['sig']):
self.error('Could not verify magic signature.')
common.error(self, 'Could not verify magic signature.')
logging.info('Verified magic signature.')
# verify that the timestamp is recent (required by spec)
updated = utils.parse_updated_from_atom(data)
if not utils.verify_timestamp(updated):
self.error('Timestamp is more than 1h old.')
common.error(self, 'Timestamp is more than 1h old.')
# find webmention source and target
source = None
@ -60,7 +60,7 @@ class SlapHandler(webapp2.RequestHandler):
targets.append(target.strip())
if not source:
self.error("Couldn't find post URL (link element)")
common.error(self, "Couldn't find post URL (link element)")
if not targets:
self.error("Couldn't find target URL (thr:in-reply-to or TODO)")
@ -79,10 +79,6 @@ class SlapHandler(webapp2.RequestHandler):
self.abort(errors[0].get('http_status') or 400,
'Errors:\n' + '\n'.join(json.dumps(e, indent=2) for e in errors))
def error(self, msg):
logging.info(msg)
self.abort(400, msg)
app = webapp2.WSGIApplication([
(r'/(?:acct)?@%s/salmon' % common.DOMAIN_RE, SlapHandler),

Wyświetl plik

@ -4,11 +4,13 @@
TODO: test error handling
"""
from __future__ import unicode_literals
import copy
import json
import unittest
import urllib
import mock
from oauth_dropins.webutil import util
import requests
import activitypub
@ -35,7 +37,8 @@ class ActivityPubTest(unittest.TestCase):
mock_get.return_value = resp
got = app.get_response('/foo.com')
mock_get.assert_called_once_with('http://foo.com/', headers=common.HEADERS)
mock_get.assert_called_once_with('http://foo.com/', headers=common.HEADERS,
timeout=util.HTTP_TIMEOUT)
self.assertEquals(200, got.status_int)
self.assertEquals(activitypub.CONTENT_TYPE_AS2, got.headers['Content-Type'])
self.assertEquals({
@ -61,7 +64,8 @@ class ActivityPubTest(unittest.TestCase):
mock_get.return_value = resp
got = app.get_response('/foo.com')
mock_get.assert_called_once_with('http://foo.com/', headers=common.HEADERS)
mock_get.assert_called_once_with('http://foo.com/', headers=common.HEADERS,
timeout=util.HTTP_TIMEOUT)
self.assertEquals(400, got.status_int)
self.assertIn('representative h-card', got.body)
# TODO
@ -92,6 +96,8 @@ class ActivityPubTest(unittest.TestCase):
'http://orig/post', headers=common.HEADERS, verify=False)
self.assertEquals(200, got.status_int)
expected_headers = copy.deepcopy(common.HEADERS)
expected_headers['Accept'] = '*/*'
mock_post.assert_called_once_with(
'http://orig/webmention',
data={
@ -99,5 +105,5 @@ class ActivityPubTest(unittest.TestCase):
'target': 'http://orig/post',
},
allow_redirects=False,
headers=common.HEADERS,
headers=expected_headers,
verify=False)

Wyświetl plik

@ -4,6 +4,7 @@
TODO: test error handling
"""
from __future__ import unicode_literals
import copy
import unittest
import urllib
@ -93,6 +94,8 @@ class SalmonTest(unittest.TestCase):
))
# check webmention discovery and post
expected_headers = copy.deepcopy(common.HEADERS)
expected_headers['Accept'] = '*/*'
mock_get.assert_called_once_with(
'http://orig/post', headers=common.HEADERS, verify=False)
mock_post.assert_called_once_with(
@ -102,5 +105,5 @@ class SalmonTest(unittest.TestCase):
'target': 'http://orig/post',
},
allow_redirects=False,
headers=common.HEADERS,
headers=expected_headers,
verify=False)

Wyświetl plik

@ -11,6 +11,7 @@ from google.appengine.datastore import datastore_stub_util
from google.appengine.ext import testbed
import mock
from oauth_dropins.webutil import util
import requests
import common
@ -73,7 +74,8 @@ class WebFingerTest(unittest.TestCase):
mock_get.return_value = resp
got = app.get_response('/@foo.com', headers={'Accept': 'application/json'})
mock_get.assert_called_once_with('http://foo.com/', headers=common.HEADERS)
mock_get.assert_called_once_with('http://foo.com/', headers=common.HEADERS,
timeout=util.HTTP_TIMEOUT)
self.assertEquals(200, got.status_int)
self.assertEquals('application/json; charset=utf-8',
got.headers['Content-Type'])
@ -144,7 +146,8 @@ class WebFingerTest(unittest.TestCase):
mock_get.return_value = resp
got = app.get_response('/@foo.com')
mock_get.assert_called_once_with('http://foo.com/', headers=common.HEADERS)
mock_get.assert_called_once_with('http://foo.com/', headers=common.HEADERS,
timeout=util.HTTP_TIMEOUT)
self.assertEquals(400, got.status_int)
self.assertIn('representative h-card', got.body)
# TODO

Wyświetl plik

@ -8,12 +8,20 @@ import copy
import json
import unittest
import urllib
import urllib2
import feedparser
from google.appengine.datastore import datastore_stub_util
from google.appengine.ext import testbed
import mock
from mock import call
from oauth_dropins.webutil import util
import requests
import activitypub
import common
from django_salmon import magicsigs, utils
import models
import webmention
from webmention import app
@ -22,22 +30,50 @@ from webmention import app
@mock.patch('requests.get')
class WebmentionTest(unittest.TestCase):
def test_webmention(self, mock_get, mock_post):
reply_html = u"""
<html><body>
maxDiff = None
def setUp(self):
self.testbed = testbed.Testbed()
self.testbed.activate()
hrd_policy = datastore_stub_util.PseudoRandomHRConsistencyPolicy(probability=.5)
self.testbed.init_datastore_v3_stub(consistency_policy=hrd_policy)
self.testbed.init_memcache_stub()
self.reply_html = u"""\
<html>
<body>
<div class="h-entry">
<a class="u-url" href="http://a/reply"></a>
<p class="e-content">
<a class="u-in-reply-to" href="http://orig/post">foo bar</a>
</p>
</div>
</body></html>
</body>
</html>
"""
reply = requests.Response()
reply.status_code = 200
reply._text = reply_html
reply._content = reply._text.encode('utf-8')
reply.encoding = 'utf-8'
self.reply = requests.Response()
self.reply.status_code = 200
self.reply._text = self.reply_html
self.reply._content = self.reply_html.encode('utf-8')
self.reply.encoding = 'utf-8'
self.reply_atom = u"""\
<?xml version="1.0" encoding="UTF-8"?>
<entry xmlns="http://www.w3.org/2005/Atom"
xmlns:thr="http://purl.org/syndication/thread/1.0">
<id>http://a/reply</id>
<thr:in-reply-to ref="tag:fed.brid.gy,2017-08-22:orig-post">
tag:fed.brid.gy,2017-08-22:orig-post
</thr:in-reply-to>
<content>foo bar</content>
<title></title>
</entry>
"""
def tearDown(self):
self.testbed.deactivate()
def test_webmention_activitypub(self, mock_get, mock_post):
article_as = {
'@context': ['https://www.w3.org/ns/activitystreams'],
'type': 'Article',
@ -62,7 +98,7 @@ class WebmentionTest(unittest.TestCase):
actor._content = actor._text.encode('utf-8')
actor.encoding = 'utf-8'
mock_get.side_effect = [reply, article, actor]
mock_get.side_effect = [self.reply, article, actor]
got = app.get_response(
'/webmention', method='POST', body=urllib.urlencode({
@ -71,10 +107,18 @@ class WebmentionTest(unittest.TestCase):
}))
self.assertEquals(200, got.status_int)
mock_get.assert_has_calls((
call('http://a/reply', headers=common.HEADERS, timeout=util.HTTP_TIMEOUT),
call('http://orig/post', headers=activitypub.CONNEG_HEADER,
timeout=util.HTTP_TIMEOUT),
call('http://orig/author', headers=activitypub.CONNEG_HEADER,
timeout=util.HTTP_TIMEOUT),))
args, kwargs = mock_post.call_args
self.assertEqual(('https://foo.com/inbox',), args)
self.assertEqual({
'objectType': 'comment',
'url': 'http://a/reply',
'displayName': u'foo ☕ bar',
'content': u' <a class="u-in-reply-to" href="http://orig/post">foo ☕ bar</a> ',
'inReplyTo': [{'url': 'http://orig/post'}],
@ -83,3 +127,69 @@ class WebmentionTest(unittest.TestCase):
expected_headers = copy.copy(common.HEADERS)
expected_headers['Content-Type'] = activitypub.CONTENT_TYPE_AS
self.assertEqual(expected_headers, kwargs['headers'])
def test_webmention_salmon(self, mock_get, mock_post):
target = requests.Response()
target.status_code = 200
target.headers['Content-Type'] = 'text/html'
target._content = """\
<html>
<meta>
<link href='http://orig/atom' rel='alternate' type='application/atom+xml'>
</meta>
</html>
""".encode('utf-8')
atom = requests.Response()
atom.status_code = 200
atom._content = """\
<?xml version="1.0"?>
<entry xmlns="http://www.w3.org/2005/Atom">
<id>tag:fed.brid.gy,2017-08-22:orig-post</id>
<link rel="salmon" href="http://orig/salmon"/>
<content type="html">baz baj</content>
</entry>
""".encode('utf-8')
mock_get.side_effect = [self.reply, target, atom]
got = app.get_response(
'/webmention', method='POST', body=urllib.urlencode({
'source': 'http://a/reply',
'target': 'http://orig/post',
}))
self.assertEquals(200, got.status_int)
mock_get.assert_has_calls((
call('http://a/reply', headers=common.HEADERS, timeout=util.HTTP_TIMEOUT),
call('http://orig/post', headers=activitypub.CONNEG_HEADER,
timeout=util.HTTP_TIMEOUT),
call('http://orig/atom', headers=common.HEADERS, timeout=util.HTTP_TIMEOUT),
))
args, kwargs = mock_post.call_args
self.assertEqual(('http://orig/salmon',), args)
self.assertEqual(common.MAGIC_ENVELOPE_CONTENT_TYPE,
kwargs['headers']['Content-Type'])
envelope = utils.parse_magic_envelope(kwargs['data'])
assert envelope['sig']
feed = utils.decode(envelope['data'])
parsed = feedparser.parse(feed)
entry = parsed.entries[0]
self.assertEquals('http://a/reply', entry.id)
self.assertIn({
'rel': 'alternate',
'href': 'http://a/reply',
'type': 'text/html',
}, entry.links)
self.assertEquals({
'type': 'text/html',
'href': 'http://orig/post',
'ref': 'tag:fed.brid.gy,2017-08-22:orig-post'
}, entry['thr_in-reply-to'])
self.assertEquals(
u'<a class="u-in-reply-to" href="http://orig/post">foo ☕ bar</a>',
entry.content[0]['value'])

Wyświetl plik

@ -1,12 +1,16 @@
"""Handles inbound webmentions.
"""
import copy
import json
import logging
import urlparse
import appengine_config
from granary import microformats2
from bs4 import BeautifulSoup
import django_salmon
from django_salmon import magicsigs, utils
import feedparser
from granary import atom, microformats2
import mf2py
import mf2util
from oauth_dropins.webutil import util
@ -15,10 +19,11 @@ import webapp2
import activitypub
import common
import models
class WebmentionHandler(webapp2.RequestHandler):
"""Handles inbound webmention, converts to ActivityPub inbox delivery."""
"""Handles inbound webmention, converts to ActivityPub or Salmon."""
def post(self):
logging.info('Params: %s', self.request.params.items())
@ -36,15 +41,24 @@ class WebmentionHandler(webapp2.RequestHandler):
logging.info('Converted to AS: %s', json.dumps(source_obj, indent=2))
# fetch target page as AS object
target_obj = common.requests_get(target, json=True,
headers=activitypub.CONNEG_HEADER)
try:
resp = common.requests_get(target, headers=activitypub.CONNEG_HEADER)
except requests.HTTPError as e:
if e.response.status_code // 100 == 4:
return self.send_salmon(source_obj, target_url=target)
raise
if resp.headers.get('Content-Type') == 'text/html':
return self.send_salmon(source_obj, target_resp=resp)
target_obj = resp.json()
# fetch actor as AS object
actor_url = target_obj.get('actor') or target_obj.get('attributedTo')
if not actor_url:
self.abort(400, 'Target object has no actor or attributedTo')
actor = common.requests_get(actor_url, json=True,
actor = common.requests_get(actor_url, parse_json=True,
headers=activitypub.CONNEG_HEADER)
# deliver source object to target actor's inbox
@ -52,9 +66,57 @@ class WebmentionHandler(webapp2.RequestHandler):
if not inbox_url:
self.abort(400, 'Target actor has no inbox')
headers = copy.copy(common.HEADERS)
headers['Content-Type'] = activitypub.CONTENT_TYPE_AS
requests.post(inbox_url, json=source_obj, headers=headers)
common.requests_post(inbox_url, json=source_obj,
headers={'Content-Type': activitypub.CONTENT_TYPE_AS})
def send_salmon(self, source_obj, target_url=None, target_resp=None):
# fetch target HTML page, extract Atom rel-alternate link
if target_url:
assert not target_resp
target_resp = common.requests_get(target_url)
else:
assert target_resp
# TODO: this could be different due to redirects
target_url = target_resp.url
parsed = BeautifulSoup(target_resp.content, from_encoding=target_resp.encoding)
atom_url = parsed.find('link', rel='alternate', type=common.ATOM_CONTENT_TYPE)
assert atom_url['href'] # TODO
# fetch Atom target post, extract id and salmon endpoint
feed = common.requests_get(atom_url['href']).text
parsed = feedparser.parse(feed)
target_id = parsed.entries[0].id
source_obj['inReplyTo'][0]['id'] = target_id
logging.info('Discovering Salmon endpoint in %s', atom_url['href'])
endpoint = django_salmon.discover_salmon_endpoint(feed)
if not endpoint:
author = source_obj.get('author') or {}
common.error(self,
'No salmon endpoint found for %s' %
(author.get('id') or author.get('url')),
status=400)
logging.info('Discovered Salmon endpoint %s', endpoint)
# construct reply Atom object
source_url = self.request.get('source')
feed = atom.activities_to_atom(
[{'object': source_obj}], {}, host_url=source_url,
xml_base=source_url)
logging.info('Converted %s to Atom:\n%s', source_url, feed)
# sign reply and wrap in magic envelope
# TODO: use author h-card's u-url?
domain = urlparse.urlparse(source_url).netloc.split(':')[0]
key = models.MagicKey.get_or_create(domain)
magic_envelope = magicsigs.magic_envelope(
feed, common.ATOM_CONTENT_TYPE, key)
logging.info('Sending Salmon slap to %s', endpoint)
common.requests_post(
endpoint, data=magic_envelope,
headers={'Content-Type': common.MAGIC_ENVELOPE_CONTENT_TYPE})
app = webapp2.WSGIApplication([