diff --git a/README.md b/README.md index fe0c595..b7350a7 100644 --- a/README.md +++ b/README.md @@ -106,4 +106,4 @@ Here are in progress notes on how I'm testing interoperability with various fede * [Mastodon](https://joinmastodon.org/) * [snarfed@mastodon.technology](https://mastodon.technology/@snarfed) * Example post: [HTML](https://mastodon.technology/@snarfed/2604611), [Atom](https://mastodon.technology/users/snarfed/updates/73978.atom) - * Atom has Salmon link rel, `author.email` is snarfed@mastodon.technology + * Profile HTML/Atom have Salmon link rel. Individual post HTML/Atom don't. `author.email` is snarfed@mastodon.technology diff --git a/app.yaml b/app.yaml index 6e256ab..d1b8b5b 100644 --- a/app.yaml +++ b/app.yaml @@ -78,7 +78,7 @@ handlers: script: webfinger.app secure: always -- url: /.well-known/webfinger +- url: /.well-known/.* script: webfinger.app secure: always diff --git a/appengine_config.py b/appengine_config.py index ed6c6ca..430cf46 100644 --- a/appengine_config.py +++ b/appengine_config.py @@ -18,3 +18,20 @@ from granary.appengine_config import * # http://stackoverflow.com/questions/34574740 from requests_toolbelt.adapters import appengine appengine.monkeypatch() + + +# suppresses these INFO logs: +# Sandbox prevented access to file "/usr/local/Caskroom/google-cloud-sdk" +# If it is a static file, check that `application_readable: true` is set in your app.yaml + +import logging + +class StubsFilter(logging.Filter): + def filter(self, record): + msg = record.getMessage() + if (msg.startswith('Sandbox prevented access to file') or + msg.startswith('If it is a static file, check that')): + return 0 + return 1 + +logging.getLogger().addFilter(StubsFilter()) diff --git a/common.py b/common.py index 3245e10..f0961eb 100644 --- a/common.py +++ b/common.py @@ -12,7 +12,7 @@ HEADERS = { } ATOM_CONTENT_TYPE = 'application/atom+xml' MAGIC_ENVELOPE_CONTENT_TYPE = 'application/magic-envelope+xml' - +XML_UTF8 = "\n" def requests_get(url, **kwargs): return _requests_fn(util.requests_get, url, **kwargs) @@ -22,11 +22,14 @@ def requests_post(url, **kwargs): return _requests_fn(util.requests_post, url, **kwargs) -def _requests_fn(fn, url, parse_json=False, **kwargs): +def _requests_fn(fn, url, parse_json=False, log=False, **kwargs): """Wraps requests.* and adds raise_for_status() and User-Agent.""" kwargs.setdefault('headers', {}).update(HEADERS) resp = fn(url, **kwargs) + if log: + logging.info('Got %s\n headers:%s\n%s', resp.status_code, resp.headers, + resp.text) resp.raise_for_status() if parse_json: diff --git a/requirements.txt b/requirements.txt index 7e87717..5bd1f0b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ -e git+https://github.com/snarfed/webmention-tools.git#egg=webmentiontools bs4 feedparser -granary +granary>=1.8 mf2py>=1.0.4 mf2util>=0.5.0 mock diff --git a/salmon.py b/salmon.py index 0ac4e88..dbdf0a5 100644 --- a/salmon.py +++ b/salmon.py @@ -39,14 +39,15 @@ class SlapHandler(webapp2.RequestHandler): common.error(self, 'Author URI %s has unsupported scheme; expected acct:' % author) logging.info('Fetching Salmon key for %s' % author) - if not magicsigs.verify(author, data, parsed['sig']): + if not magicsigs.verify(data, parsed['sig'], author_uri=author): common.error(self, 'Could not verify magic signature.') logging.info('Verified magic signature.') - # verify that the timestamp is recent (required by spec) - updated = utils.parse_updated_from_atom(data) - if not utils.verify_timestamp(updated): - common.error(self, 'Timestamp is more than 1h old.') + # Mastodon doesn't do this! so screw it. + # # verify that the timestamp is recent (required by spec) + # updated = utils.parse_updated_from_atom(data) + # if not utils.verify_timestamp(updated): + # common.error(self, 'Timestamp is more than 1h old.') # find webmention source and target source = None diff --git a/test/test_webmention.py b/test/test_webmention.py index 50b5308..3aeecf5 100644 --- a/test/test_webmention.py +++ b/test/test_webmention.py @@ -31,6 +31,14 @@ class WebmentionTest(testutil.TestCase): def setUp(self): super(WebmentionTest, self).setUp() + self.orig = requests_response("""\ + + + + + +""", content_type='text/html; charset=utf-8') + self.reply = requests_response("""\ @@ -44,7 +52,7 @@ class WebmentionTest(testutil.TestCase): """, content_type='text/html; charset=utf-8') - def test_webmention_activitypub(self, mock_get, mock_post): + def test_activitypub(self, mock_get, mock_post): article = requests_response({ '@context': ['https://www.w3.org/ns/activitystreams'], 'type': 'Article', @@ -94,14 +102,7 @@ class WebmentionTest(testutil.TestCase): expected_headers['Content-Type'] = activitypub.CONTENT_TYPE_AS self.assertEqual(expected_headers, kwargs['headers']) - def test_webmention_salmon(self, mock_get, mock_post): - target = requests_response("""\ - - - - - -""", content_type='text/html; charset=utf-8') + def test_salmon(self, mock_get, mock_post): atom = requests_response("""\ @@ -110,7 +111,7 @@ class WebmentionTest(testutil.TestCase): baz ☕ baj """) - mock_get.side_effect = [self.reply, target, atom] + mock_get.side_effect = [self.reply, self.orig, atom] got = app.get_response( '/webmention', method='POST', body=urllib.urlencode({ @@ -152,3 +153,34 @@ class WebmentionTest(testutil.TestCase): self.assertEquals( u'foo ☕ bar', entry.content[0]['value']) + + def test_salmon_get_salmon_from_webfinger(self, mock_get, mock_post): + atom = requests_response("""\ + + + + ryan + ryan@orig + + tag:fed.brid.gy,2017-08-22:orig-post + +""") + webfinger = requests_response({ + 'subject': 'acct:ryan@orig', + 'links': [{ + 'rel': 'salmon', + 'href': 'http://orig/@ryan/salmon', + }], + }) + mock_get.side_effect = [self.reply, self.orig, atom, webfinger] + + got = app.get_response('/webmention', method='POST', body=urllib.urlencode({ + 'source': 'http://a/reply', + 'target': 'http://orig/post', + })) + self.assertEquals(200, got.status_int) + + mock_get.assert_any_call( + 'http://orig/.well-known/webfinger?resource=ryan@orig', + headers=common.HEADERS, timeout=util.HTTP_TIMEOUT) + self.assertEqual(('http://orig/@ryan/salmon',), mock_post.call_args[0]) diff --git a/webfinger.py b/webfinger.py index 5e814a9..2d842e0 100644 --- a/webfinger.py +++ b/webfinger.py @@ -33,13 +33,14 @@ class UserHandler(handlers.XrdOrJrdHandler): def template_prefix(self): return 'templates/webfinger_user' - def template_vars(self, domain): + def template_vars(self, acct): + username, domain = util.parse_acct_uri(acct) url = 'http://%s/' % domain # TODO: unify with activitypub resp = common.requests_get(url) mf2 = mf2py.parse(resp.text, url=resp.url) - logging.info('Parsed mf2 for %s: %s', resp.url, json.dumps(mf2, indent=2)) + # logging.debug('Parsed mf2 for %s: %s', resp.url, json.dumps(mf2, indent=2)) hcard = mf2util.representative_hcard(mf2, resp.url) logging.info('Representative h-card: %s', json.dumps(hcard, indent=2)) @@ -48,23 +49,30 @@ class UserHandler(handlers.XrdOrJrdHandler): Couldn't find a \ representative h-card on %s""" % resp.url) - uri = '@%s' % domain + uri = '%s@%s' % (username, domain) key = models.MagicKey.get_or_create(uri) props = hcard.get('properties', {}) urls = util.dedupe_urls(props.get('url', []) + [resp.url]) - return util.trim_nulls({ + data = util.trim_nulls({ 'subject': 'acct:' + uri, 'aliases': urls, 'magic_keys': [{'value': key.href()}], - 'links': [{ + 'links': sum(([{ 'rel': 'http://webfinger.net/rel/profile-page', 'type': 'text/html', 'href': url, - } for url in urls] + [{ + }, { + 'rel': 'canonical_uri', + 'type': 'text/html', + 'href': url, + }] for url in urls), []) + [{ 'rel': 'http://webfinger.net/rel/avatar', 'href': url, } for url in props.get('photo', [])] + [{ + 'rel': 'http://schemas.google.com/g/2010#updates-from', + 'href': 'https://granary-demo.appspot.com/url?input=html&output=atom&url=https://snarfed.org/&hub=https://snarfed.org/', + }, { 'rel': 'magic-public-key', 'href': key.href(), }, { @@ -72,6 +80,8 @@ representative h-card on %s""" % resp.url) 'href': '%s/@%s/salmon' % (self.request.host_url, domain), }] }) + logging.info('Returning WebFinger data: %s', json.dumps(data, indent=2)) + return data class WebfingerHandler(UserHandler): @@ -81,16 +91,16 @@ class WebfingerHandler(UserHandler): def template_vars(self): resource = util.get_required_param(self, 'resource') - try: - username, domain = util.parse_acct_uri(resource) - url = 'http://%s/' % domain - except ValueError: - url = resource - domain = urlparse.urlparse(url).netloc - if not domain: - common.error(self, 'No domain found in resource %s' % url) + # try: + # username, domain = util.parse_acct_uri(resource) + # url = 'http://%s/' % domain + # except ValueError: + # url = resource + # domain = urlparse.urlparse(url).netloc + # if not domain: + # common.error(self, 'No domain found in resource %s' % url) - return super(WebfingerHandler, self).template_vars(domain) + return super(WebfingerHandler, self).template_vars(resource) app = webapp2.WSGIApplication([ diff --git a/webmention.py b/webmention.py index cd05c95..0a83fa8 100644 --- a/webmention.py +++ b/webmention.py @@ -2,6 +2,10 @@ TODO: mastodon doesn't advertise salmon endpoint in their individual post atom?! https://mastodon.technology/users/snarfed/updates/73978.atom + +TODO tests: +* actor/attributedTo could be string URL +* salmon rel via webfinger via author.name + domain """ import json import logging @@ -36,16 +40,20 @@ class WebmentionHandler(webapp2.RequestHandler): # fetch source page, convert to ActivityStreams resp = common.requests_get(source) mf2 = mf2py.parse(resp.text, url=resp.url) - logging.info('Parsed mf2 for %s: %s', resp.url, json.dumps(mf2, indent=2)) + # logging.debug('Parsed mf2 for %s: %s', resp.url, json.dumps(mf2, indent=2)) entry = mf2util.find_first_entry(mf2, ['h-entry']) logging.info('First entry: %s', json.dumps(entry, indent=2)) source_obj = microformats2.json_to_object(entry) logging.info('Converted to AS: %s', json.dumps(source_obj, indent=2)) + return self.send_salmon(source_obj, target_url=target) + # fetch target page as AS object try: - resp = common.requests_get(target, headers=activitypub.CONNEG_HEADER) + resp = common.requests_get(target, headers=activitypub.CONNEG_HEADER, + log=True) + target_obj = resp.json() except requests.HTTPError as e: if e.response.status_code // 100 == 4: return self.send_salmon(source_obj, target_url=target) @@ -54,10 +62,6 @@ class WebmentionHandler(webapp2.RequestHandler): if resp.headers.get('Content-Type').startswith('text/html'): return self.send_salmon(source_obj, target_resp=resp) - logging.info('Got %s', resp.headers.get('Content-Type')) - target_obj = resp.json() - logging.info(json.dumps(target_obj, indent=2)) - # post-process AS1 to look enough like AS2 to work in_reply_tos = util.get_list(source_obj, 'inReplyTo') if in_reply_tos: @@ -75,8 +79,9 @@ class WebmentionHandler(webapp2.RequestHandler): if not inbox_url: # fetch actor as AS object - actor = target_obj.get('actor') or target_obj.get('attributedTo') or {} - actor_url = actor.get('url') + actor_url = target_obj.get('actor') or target_obj.get('attributedTo') + if isinstance(actor_url, dict): + actor_url = actor.get('url') if not actor_url: self.abort(400, 'Target object has no actor or attributedTo URL') @@ -93,8 +98,7 @@ class WebmentionHandler(webapp2.RequestHandler): # deliver source object to target actor's inbox resp = common.requests_post( urlparse.urljoin(target, inbox_url), json=source_obj, - headers={'Content-Type': activitypub.CONTENT_TYPE_AS}) - logging.info('Got: %s\n%s', resp.headers, resp.text) + headers={'Content-Type': activitypub.CONTENT_TYPE_AS}, log=True) def send_salmon(self, source_obj, target_url=None, target_resp=None): # fetch target HTML page, extract Atom rel-alternate link @@ -114,11 +118,38 @@ class WebmentionHandler(webapp2.RequestHandler): # fetch Atom target post, extract id and salmon endpoint feed = common.requests_get(atom_url['href']).text parsed = feedparser.parse(feed) - target_id = parsed.entries[0].id + logging.info('Parsed: %s', json.dumps(parsed, indent=2, + default=lambda key: '-')) + entry = parsed.entries[0] + target_id = entry.id source_obj['inReplyTo'][0]['id'] = target_id + # Mastodon (and maybe others?) require a rel-mentioned link to the + # original post's author to make it show up as a reply: + # app/services/process_interaction_service.rb + # ...so add them as a tag, which atom renders as a rel-mention link. + if entry.authors: + url = entry.authors[0].href + if url: + source_obj.setdefault('tags', []).append({'url': url}) + logging.info('Discovering Salmon endpoint in %s', atom_url['href']) endpoint = django_salmon.discover_salmon_endpoint(feed) + + if not endpoint: + # try webfinger + parsed = urlparse.urlparse(target_url) + acct = entry.author_detail.email or '@'.join( + (entry.author_detail.name, parsed.netloc)) + try: + resp = common.requests_get( + '%s://%s/.well-known/webfinger?resource=%s' % + (parsed.scheme, parsed.netloc, acct), + log=True) + endpoint = django_salmon.get_salmon_replies_link(resp.json()) + except requests.HTTPError as e: + pass + if not endpoint: common.error(self, 'No salmon endpoint found!', status=400) logging.info('Discovered Salmon endpoint %s', endpoint) @@ -137,7 +168,7 @@ class WebmentionHandler(webapp2.RequestHandler): logging.info('Sending Salmon slap to %s', endpoint) common.requests_post( - endpoint, data=magic_envelope, + endpoint, data=common.XML_UTF8 + magic_envelope, log=True, headers={'Content-Type': common.MAGIC_ENVELOPE_CONTENT_TYPE})