From f0a5f1745edef093e571be71f92dab251822d695 Mon Sep 17 00:00:00 2001 From: Ryan Barrett Date: Wed, 25 Jan 2023 20:41:29 -0800 Subject: [PATCH] webfinger: return data even if site has no representative h-card fixes #384 --- tests/test_webfinger.py | 69 ++++++++++++++++++++++++------------ webfinger.py | 78 ++++++++++++++++++++++++----------------- 2 files changed, 92 insertions(+), 55 deletions(-) diff --git a/tests/test_webfinger.py b/tests/test_webfinger.py index ef75ca4..8309f43 100644 --- a/tests/test_webfinger.py +++ b/tests/test_webfinger.py @@ -67,6 +67,12 @@ class WebfingerTest(testutil.TestCase): 'rel': 'sharedInbox', 'type': 'application/activity+json', 'href': 'http://localhost/inbox' + }, { + 'rel': 'magic-public-key', + 'href': self.key.href(), + }, { + 'rel': 'http://ostatus.org/schema/1.0/subscribe', + 'template': 'http://localhost/user/foo.com?url={uri}', }, { 'rel': 'http://schemas.google.com/g/2010#updates-from', 'type': 'application/atom+xml', @@ -74,12 +80,6 @@ class WebfingerTest(testutil.TestCase): }, { 'rel': 'hub', 'href': 'https://bridgy-fed.superfeedr.com/' - }, { - 'rel': 'magic-public-key', - 'href': self.key.href(), - }, { - 'rel': 'http://ostatus.org/schema/1.0/subscribe', - 'template': 'http://localhost/user/foo.com?url={uri}', }] } @@ -168,11 +168,49 @@ class WebfingerTest(testutil.TestCase):

foo bar

-""") +""", url='https://foo.com/') got = self.client.get('/acct:foo.com') self.assert_req(mock_get, 'https://foo.com/') - self.assertEqual(400, got.status_code) - self.assertIn('representative h-card', got.get_data(as_text=True)) + self.assertEqual(200, got.status_code) + self.assert_equals({ + 'subject': 'acct:foo.com@foo.com', + 'aliases': ['https://foo.com/'], + 'magic_keys': [{'value': self.key.href()}], + 'links': [{ + 'rel': 'http://webfinger.net/rel/profile-page', + 'type': 'text/html', + 'href': 'https://foo.com/' + }, { + 'rel': 'canonical_uri', + 'type': 'text/html', + 'href': 'https://foo.com/' + }, { + 'rel': 'self', + 'type': 'application/activity+json', + 'href': 'http://localhost/foo.com' + }, { + 'rel': 'inbox', + 'type': 'application/activity+json', + 'href': 'http://localhost/foo.com/inbox' + }, { + 'rel': 'sharedInbox', + 'type': 'application/activity+json', + 'href': 'http://localhost/inbox' + }, { + 'rel': 'http://schemas.google.com/g/2010#updates-from', + 'type': 'application/atom+xml', + 'href': 'https://granary.io/url?input=html&output=atom&url=https%3A%2F%2Ffoo.com%2F&hub=https%3A%2F%2Ffoo.com%2F', + }, { + 'rel': 'hub', + 'href': 'https://bridgy-fed.superfeedr.com/' + }, { + 'rel': 'magic-public-key', + 'href': self.key.href(), + }, { + 'rel': 'http://ostatus.org/schema/1.0/subscribe', + 'template': 'http://localhost/user/foo.com?url={uri}', + }] + }, got.json) def test_user_bad_tld(self): got = self.client.get('/acct:foo.json') @@ -248,16 +286,3 @@ class WebfingerTest(testutil.TestCase): got = self.client.get('/.well-known/webfinger?resource=acct%3A%40localhost') self.assertEqual(400, got.status_code, got.get_data(as_text=True)) - - def test_webfinger_bad_resources(self): - # TODO: remove now that we check the User exists first? we won't create - # users with keys like this, right? - models.User.get_or_create('acct:k') - for resource in ( - # https://console.cloud.google.com/errors/detail/CKGv-b6impW3Jg;time=P30D?project=bridgy-federated - 'acct:k', - ): - with self.subTest(resource=resource): - url = f'/.well-known/webfinger?resource={resource}' - got = self.client.get(url, headers={'Accept': 'application/json'}) - self.assertEqual(400, got.status_code, got.get_data(as_text=True)) diff --git a/webfinger.py b/webfinger.py index 43bc9ae..b22d8db 100644 --- a/webfinger.py +++ b/webfinger.py @@ -51,6 +51,7 @@ class Actor(flask_util.XrdOrJrd): if url: urls = [url, urllib.parse.urljoin(url, '/')] + urls + resp = None for candidate in urls: try: resp = util.requests_get(candidate) @@ -63,39 +64,22 @@ class Actor(flask_util.XrdOrJrd): hcard = mf2util.representative_hcard(mf2, resp.url) if hcard: logger.info(f'Representative h-card: {json_dumps(hcard, indent=2)}') + user.actor_as2 = json_dumps(common.postprocess_as2( + as2.from_as1(microformats2.json_to_object(hcard)), user=user)) + user.put() break else: - error(f"didn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) in any of {urls}") + logger.info(f"didn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) in any of {urls}") + hcard = {'properties': { + 'url': [f'https://{domain}/'], + }} logger.info(f'Generating WebFinger data for {domain}') props = hcard.get('properties', {}) - urls = util.dedupe_urls(props.get('url', []) + [resp.url]) + urls = util.dedupe_urls(props.get('url', []) + + ([resp.url] if resp else [])) canonical_url = urls[0] - user.actor_as2 = json_dumps(common.postprocess_as2( - as2.from_as1(microformats2.json_to_object(hcard)), user=user)) - user.put() - - # discover atom feed, if any - feed = parsed.find('link', rel='alternate', type=atom.CONTENT_TYPE) - if feed and feed['href']: - feed = urllib.parse.urljoin(resp.url, feed['href']) - else: - feed = 'https://granary.io/url?' + urllib.parse.urlencode({ - 'input': 'html', - 'output': 'atom', - 'url': resp.url, - 'hub': resp.url, - }) - - # discover PuSH, if any - for link in resp.headers.get('Link', '').split(','): - match = common.LINK_HEADER_RE.match(link) - if match and match.group(2) == 'hub': - hub = match.group(1) - else: - hub = 'https://bridgy-fed.superfeedr.com/' - # generate webfinger content data = util.trim_nulls({ 'subject': 'acct:' + user.address().lstrip('@'), @@ -136,14 +120,8 @@ class Actor(flask_util.XrdOrJrd): }, # OStatus + # TODO: remove? { - 'rel': 'http://schemas.google.com/g/2010#updates-from', - 'type': atom.CONTENT_TYPE, - 'href': feed, - }, { - 'rel': 'hub', - 'href': hub, - }, { 'rel': 'magic-public-key', 'href': user.href(), }, @@ -156,6 +134,40 @@ class Actor(flask_util.XrdOrJrd): 'template': common.host_url(f'user/{domain}?url={{uri}}'), }] }) + + # OStatus: discover atom feed, if any + # TODO: remove? + if resp: + feed = parsed.find('link', rel='alternate', type=atom.CONTENT_TYPE) + if feed and feed['href']: + feed = urllib.parse.urljoin(resp.url, feed['href']) + else: + feed = 'https://granary.io/url?' + urllib.parse.urlencode({ + 'input': 'html', + 'output': 'atom', + 'url': resp.url, + 'hub': resp.url, + }) + data['links'].append({ + 'rel': 'http://schemas.google.com/g/2010#updates-from', + 'type': atom.CONTENT_TYPE, + 'href': feed, + }) + + # OStatus: discover PuSH, if any + # TODO: remove? + if resp: + for link in resp.headers.get('Link', '').split(','): + match = common.LINK_HEADER_RE.match(link) + if match and match.group(2) == 'hub': + hub = match.group(1) + else: + hub = 'https://bridgy-fed.superfeedr.com/' + data['links'].append({ + 'rel': 'hub', + 'href': hub, + }) + logger.info(f'Returning WebFinger data: {json_dumps(data, indent=2)}') return data