diff --git a/common.py b/common.py index 14f9c203..3245e10d 100644 --- a/common.py +++ b/common.py @@ -1,8 +1,8 @@ """Misc common utilities. """ -import json import logging +from oauth_dropins.webutil import util import requests from webob import exc @@ -10,28 +10,36 @@ DOMAIN_RE = r'([^/]+\.[^/]+)' HEADERS = { 'User-Agent': 'Bridgy Fed (https://fed.brid.gy/)', } +ATOM_CONTENT_TYPE = 'application/atom+xml' +MAGIC_ENVELOPE_CONTENT_TYPE = 'application/magic-envelope+xml' def requests_get(url, **kwargs): - return _requests_fn(requests.get, url, **kwargs) + return _requests_fn(util.requests_get, url, **kwargs) def requests_post(url, **kwargs): - return _requests_fn(requests.post, url, **kwargs) + return _requests_fn(util.requests_post, url, **kwargs) -def _requests_fn(fn, url, json=False, **kwargs): +def _requests_fn(fn, url, parse_json=False, **kwargs): """Wraps requests.* and adds raise_for_status() and User-Agent.""" kwargs.setdefault('headers', {}).update(HEADERS) + resp = fn(url, **kwargs) resp.raise_for_status() - if json: + if parse_json: try: return resp.json() except ValueError: msg = "Couldn't parse response as JSON" logging.error(msg, exc_info=True) - raise exc.HTTPBadRequest(400, msg) + raise exc.HTTPBadRequest(msg) return resp + + +def error(handler, msg, status=400): + logging.info(msg) + handler.abort(status, msg) diff --git a/requirements.txt b/requirements.txt index 3229db7c..7e877172 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ -e git+https://github.com/snarfed/django-salmon.git#egg=django_salmon -e git+https://github.com/snarfed/webmention-tools.git#egg=webmentiontools +bs4 +feedparser granary mf2py>=1.0.4 mf2util>=0.5.0 diff --git a/salmon.py b/salmon.py index a1f55980..0ba109c2 100644 --- a/salmon.py +++ b/salmon.py @@ -36,17 +36,17 @@ class SlapHandler(webapp2.RequestHandler): if ':' not in author: author = 'acct:%s' % author elif not author.startswith('acct:'): - self.error('Author URI %s has unsupported scheme; expected acct:' % author) + common.error(self, 'Author URI %s has unsupported scheme; expected acct:' % author) logging.info('Fetching Salmon key for %s' % author) if not magicsigs.verify(author, data, parsed['sig']): - self.error('Could not verify magic signature.') + common.error(self, 'Could not verify magic signature.') logging.info('Verified magic signature.') # verify that the timestamp is recent (required by spec) updated = utils.parse_updated_from_atom(data) if not utils.verify_timestamp(updated): - self.error('Timestamp is more than 1h old.') + common.error(self, 'Timestamp is more than 1h old.') # find webmention source and target source = None @@ -60,7 +60,7 @@ class SlapHandler(webapp2.RequestHandler): targets.append(target.strip()) if not source: - self.error("Couldn't find post URL (link element)") + common.error(self, "Couldn't find post URL (link element)") if not targets: self.error("Couldn't find target URL (thr:in-reply-to or TODO)") @@ -79,10 +79,6 @@ class SlapHandler(webapp2.RequestHandler): self.abort(errors[0].get('http_status') or 400, 'Errors:\n' + '\n'.join(json.dumps(e, indent=2) for e in errors)) - def error(self, msg): - logging.info(msg) - self.abort(400, msg) - app = webapp2.WSGIApplication([ (r'/(?:acct)?@%s/salmon' % common.DOMAIN_RE, SlapHandler), diff --git a/test/test_activitypub.py b/test/test_activitypub.py index c2d7baa3..6003494a 100644 --- a/test/test_activitypub.py +++ b/test/test_activitypub.py @@ -4,11 +4,13 @@ TODO: test error handling """ from __future__ import unicode_literals +import copy import json import unittest import urllib import mock +from oauth_dropins.webutil import util import requests import activitypub @@ -35,7 +37,8 @@ class ActivityPubTest(unittest.TestCase): mock_get.return_value = resp got = app.get_response('/foo.com') - mock_get.assert_called_once_with('http://foo.com/', headers=common.HEADERS) + mock_get.assert_called_once_with('http://foo.com/', headers=common.HEADERS, + timeout=util.HTTP_TIMEOUT) self.assertEquals(200, got.status_int) self.assertEquals(activitypub.CONTENT_TYPE_AS2, got.headers['Content-Type']) self.assertEquals({ @@ -61,7 +64,8 @@ class ActivityPubTest(unittest.TestCase): mock_get.return_value = resp got = app.get_response('/foo.com') - mock_get.assert_called_once_with('http://foo.com/', headers=common.HEADERS) + mock_get.assert_called_once_with('http://foo.com/', headers=common.HEADERS, + timeout=util.HTTP_TIMEOUT) self.assertEquals(400, got.status_int) self.assertIn('representative h-card', got.body) # TODO @@ -92,6 +96,8 @@ class ActivityPubTest(unittest.TestCase): 'http://orig/post', headers=common.HEADERS, verify=False) self.assertEquals(200, got.status_int) + expected_headers = copy.deepcopy(common.HEADERS) + expected_headers['Accept'] = '*/*' mock_post.assert_called_once_with( 'http://orig/webmention', data={ @@ -99,5 +105,5 @@ class ActivityPubTest(unittest.TestCase): 'target': 'http://orig/post', }, allow_redirects=False, - headers=common.HEADERS, + headers=expected_headers, verify=False) diff --git a/test/test_salmon.py b/test/test_salmon.py index 155e1203..72b652d9 100644 --- a/test/test_salmon.py +++ b/test/test_salmon.py @@ -4,6 +4,7 @@ TODO: test error handling """ from __future__ import unicode_literals +import copy import unittest import urllib @@ -93,6 +94,8 @@ class SalmonTest(unittest.TestCase): )) # check webmention discovery and post + expected_headers = copy.deepcopy(common.HEADERS) + expected_headers['Accept'] = '*/*' mock_get.assert_called_once_with( 'http://orig/post', headers=common.HEADERS, verify=False) mock_post.assert_called_once_with( @@ -102,5 +105,5 @@ class SalmonTest(unittest.TestCase): 'target': 'http://orig/post', }, allow_redirects=False, - headers=common.HEADERS, + headers=expected_headers, verify=False) diff --git a/test/test_webfinger.py b/test/test_webfinger.py index 67cd6a10..ca8871ee 100644 --- a/test/test_webfinger.py +++ b/test/test_webfinger.py @@ -11,6 +11,7 @@ from google.appengine.datastore import datastore_stub_util from google.appengine.ext import testbed import mock +from oauth_dropins.webutil import util import requests import common @@ -73,7 +74,8 @@ class WebFingerTest(unittest.TestCase): mock_get.return_value = resp got = app.get_response('/@foo.com', headers={'Accept': 'application/json'}) - mock_get.assert_called_once_with('http://foo.com/', headers=common.HEADERS) + mock_get.assert_called_once_with('http://foo.com/', headers=common.HEADERS, + timeout=util.HTTP_TIMEOUT) self.assertEquals(200, got.status_int) self.assertEquals('application/json; charset=utf-8', got.headers['Content-Type']) @@ -144,7 +146,8 @@ class WebFingerTest(unittest.TestCase): mock_get.return_value = resp got = app.get_response('/@foo.com') - mock_get.assert_called_once_with('http://foo.com/', headers=common.HEADERS) + mock_get.assert_called_once_with('http://foo.com/', headers=common.HEADERS, + timeout=util.HTTP_TIMEOUT) self.assertEquals(400, got.status_int) self.assertIn('representative h-card', got.body) # TODO diff --git a/test/test_webmention.py b/test/test_webmention.py index b23178ad..bc5bf192 100644 --- a/test/test_webmention.py +++ b/test/test_webmention.py @@ -8,12 +8,20 @@ import copy import json import unittest import urllib +import urllib2 +import feedparser +from google.appengine.datastore import datastore_stub_util +from google.appengine.ext import testbed import mock +from mock import call +from oauth_dropins.webutil import util import requests import activitypub import common +from django_salmon import magicsigs, utils +import models import webmention from webmention import app @@ -22,22 +30,50 @@ from webmention import app @mock.patch('requests.get') class WebmentionTest(unittest.TestCase): - def test_webmention(self, mock_get, mock_post): - reply_html = u""" - + maxDiff = None + + def setUp(self): + self.testbed = testbed.Testbed() + self.testbed.activate() + hrd_policy = datastore_stub_util.PseudoRandomHRConsistencyPolicy(probability=.5) + self.testbed.init_datastore_v3_stub(consistency_policy=hrd_policy) + self.testbed.init_memcache_stub() + + self.reply_html = u"""\ + +
+

foo ☕ bar

- + + """ - reply = requests.Response() - reply.status_code = 200 - reply._text = reply_html - reply._content = reply._text.encode('utf-8') - reply.encoding = 'utf-8' + self.reply = requests.Response() + self.reply.status_code = 200 + self.reply._text = self.reply_html + self.reply._content = self.reply_html.encode('utf-8') + self.reply.encoding = 'utf-8' + self.reply_atom = u"""\ + + + http://a/reply + + tag:fed.brid.gy,2017-08-22:orig-post + + foo ☕ bar + + +""" + + def tearDown(self): + self.testbed.deactivate() + + def test_webmention_activitypub(self, mock_get, mock_post): article_as = { '@context': ['https://www.w3.org/ns/activitystreams'], 'type': 'Article', @@ -62,7 +98,7 @@ class WebmentionTest(unittest.TestCase): actor._content = actor._text.encode('utf-8') actor.encoding = 'utf-8' - mock_get.side_effect = [reply, article, actor] + mock_get.side_effect = [self.reply, article, actor] got = app.get_response( '/webmention', method='POST', body=urllib.urlencode({ @@ -71,10 +107,18 @@ class WebmentionTest(unittest.TestCase): })) self.assertEquals(200, got.status_int) + mock_get.assert_has_calls(( + call('http://a/reply', headers=common.HEADERS, timeout=util.HTTP_TIMEOUT), + call('http://orig/post', headers=activitypub.CONNEG_HEADER, + timeout=util.HTTP_TIMEOUT), + call('http://orig/author', headers=activitypub.CONNEG_HEADER, + timeout=util.HTTP_TIMEOUT),)) + args, kwargs = mock_post.call_args self.assertEqual(('https://foo.com/inbox',), args) self.assertEqual({ 'objectType': 'comment', + 'url': 'http://a/reply', 'displayName': u'foo ☕ bar', 'content': u' foo ☕ bar ', 'inReplyTo': [{'url': 'http://orig/post'}], @@ -83,3 +127,69 @@ class WebmentionTest(unittest.TestCase): expected_headers = copy.copy(common.HEADERS) expected_headers['Content-Type'] = activitypub.CONTENT_TYPE_AS self.assertEqual(expected_headers, kwargs['headers']) + + def test_webmention_salmon(self, mock_get, mock_post): + target = requests.Response() + target.status_code = 200 + target.headers['Content-Type'] = 'text/html' + target._content = """\ + + + + + +""".encode('utf-8') + + atom = requests.Response() + atom.status_code = 200 + atom._content = """\ + + + tag:fed.brid.gy,2017-08-22:orig-post + + baz ☕ baj + +""".encode('utf-8') + + mock_get.side_effect = [self.reply, target, atom] + + got = app.get_response( + '/webmention', method='POST', body=urllib.urlencode({ + 'source': 'http://a/reply', + 'target': 'http://orig/post', + })) + self.assertEquals(200, got.status_int) + + mock_get.assert_has_calls(( + call('http://a/reply', headers=common.HEADERS, timeout=util.HTTP_TIMEOUT), + call('http://orig/post', headers=activitypub.CONNEG_HEADER, + timeout=util.HTTP_TIMEOUT), + call('http://orig/atom', headers=common.HEADERS, timeout=util.HTTP_TIMEOUT), + )) + + args, kwargs = mock_post.call_args + self.assertEqual(('http://orig/salmon',), args) + self.assertEqual(common.MAGIC_ENVELOPE_CONTENT_TYPE, + kwargs['headers']['Content-Type']) + + envelope = utils.parse_magic_envelope(kwargs['data']) + assert envelope['sig'] + + feed = utils.decode(envelope['data']) + parsed = feedparser.parse(feed) + entry = parsed.entries[0] + + self.assertEquals('http://a/reply', entry.id) + self.assertIn({ + 'rel': 'alternate', + 'href': 'http://a/reply', + 'type': 'text/html', + }, entry.links) + self.assertEquals({ + 'type': 'text/html', + 'href': 'http://orig/post', + 'ref': 'tag:fed.brid.gy,2017-08-22:orig-post' + }, entry['thr_in-reply-to']) + self.assertEquals( + u'foo ☕ bar', + entry.content[0]['value']) diff --git a/webmention.py b/webmention.py index 8e67fa92..5c22454b 100644 --- a/webmention.py +++ b/webmention.py @@ -1,12 +1,16 @@ """Handles inbound webmentions. """ -import copy import json import logging +import urlparse import appengine_config -from granary import microformats2 +from bs4 import BeautifulSoup +import django_salmon +from django_salmon import magicsigs, utils +import feedparser +from granary import atom, microformats2 import mf2py import mf2util from oauth_dropins.webutil import util @@ -15,10 +19,11 @@ import webapp2 import activitypub import common +import models class WebmentionHandler(webapp2.RequestHandler): - """Handles inbound webmention, converts to ActivityPub inbox delivery.""" + """Handles inbound webmention, converts to ActivityPub or Salmon.""" def post(self): logging.info('Params: %s', self.request.params.items()) @@ -36,15 +41,24 @@ class WebmentionHandler(webapp2.RequestHandler): logging.info('Converted to AS: %s', json.dumps(source_obj, indent=2)) # fetch target page as AS object - target_obj = common.requests_get(target, json=True, - headers=activitypub.CONNEG_HEADER) + try: + resp = common.requests_get(target, headers=activitypub.CONNEG_HEADER) + except requests.HTTPError as e: + if e.response.status_code // 100 == 4: + return self.send_salmon(source_obj, target_url=target) + raise + + if resp.headers.get('Content-Type') == 'text/html': + return self.send_salmon(source_obj, target_resp=resp) + + target_obj = resp.json() # fetch actor as AS object actor_url = target_obj.get('actor') or target_obj.get('attributedTo') if not actor_url: self.abort(400, 'Target object has no actor or attributedTo') - actor = common.requests_get(actor_url, json=True, + actor = common.requests_get(actor_url, parse_json=True, headers=activitypub.CONNEG_HEADER) # deliver source object to target actor's inbox @@ -52,9 +66,57 @@ class WebmentionHandler(webapp2.RequestHandler): if not inbox_url: self.abort(400, 'Target actor has no inbox') - headers = copy.copy(common.HEADERS) - headers['Content-Type'] = activitypub.CONTENT_TYPE_AS - requests.post(inbox_url, json=source_obj, headers=headers) + common.requests_post(inbox_url, json=source_obj, + headers={'Content-Type': activitypub.CONTENT_TYPE_AS}) + + def send_salmon(self, source_obj, target_url=None, target_resp=None): + # fetch target HTML page, extract Atom rel-alternate link + if target_url: + assert not target_resp + target_resp = common.requests_get(target_url) + else: + assert target_resp + # TODO: this could be different due to redirects + target_url = target_resp.url + + parsed = BeautifulSoup(target_resp.content, from_encoding=target_resp.encoding) + atom_url = parsed.find('link', rel='alternate', type=common.ATOM_CONTENT_TYPE) + assert atom_url['href'] # TODO + + # fetch Atom target post, extract id and salmon endpoint + feed = common.requests_get(atom_url['href']).text + parsed = feedparser.parse(feed) + target_id = parsed.entries[0].id + source_obj['inReplyTo'][0]['id'] = target_id + + logging.info('Discovering Salmon endpoint in %s', atom_url['href']) + endpoint = django_salmon.discover_salmon_endpoint(feed) + if not endpoint: + author = source_obj.get('author') or {} + common.error(self, + 'No salmon endpoint found for %s' % + (author.get('id') or author.get('url')), + status=400) + logging.info('Discovered Salmon endpoint %s', endpoint) + + # construct reply Atom object + source_url = self.request.get('source') + feed = atom.activities_to_atom( + [{'object': source_obj}], {}, host_url=source_url, + xml_base=source_url) + logging.info('Converted %s to Atom:\n%s', source_url, feed) + + # sign reply and wrap in magic envelope + # TODO: use author h-card's u-url? + domain = urlparse.urlparse(source_url).netloc.split(':')[0] + key = models.MagicKey.get_or_create(domain) + magic_envelope = magicsigs.magic_envelope( + feed, common.ATOM_CONTENT_TYPE, key) + + logging.info('Sending Salmon slap to %s', endpoint) + common.requests_post( + endpoint, data=magic_envelope, + headers={'Content-Type': common.MAGIC_ENVELOPE_CONTENT_TYPE}) app = webapp2.WSGIApplication([