flask: port XrdOrJrdHandler, finish porting webfinger

flask
Ryan Barrett 2021-07-11 16:30:14 -07:00
rodzic 007f8f16fd
commit 371a92a5db
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 6BE31FDF4776E9D4
13 zmienionych plików z 322 dodań i 146 usunięć

Wyświetl plik

@ -79,7 +79,7 @@ def send(activity, inbox_url, user_domain):
headers=headers)
@app.route('/<string:domain>')
@app.get('/<domain>')
@cache.cached(CACHE_TIME.total_seconds())
def actor(domain):
"""Serves /[DOMAIN], fetches its mf2, converts to AS Actor, and serves it."""
@ -117,7 +117,7 @@ Coul find a representative h-card (http://microformats.org/wiki/representative-h
})
@app.route('/<string:domain>/inbox', methods=['POST'])
@app.post('/<domain>/inbox')
def inbox(domain):
"""Accepts POSTs to /[DOMAIN]/inbox and converts to outbound webmentions."""
body = request.get_data(as_text=True)

Wyświetl plik

@ -15,7 +15,7 @@ LINK_HEADER = '<%s>; rel="webmention"'
CACHE_TIME = datetime.timedelta(seconds=15)
@app.route(r'/wm/<path:url>')
@app.get(r'/wm/<path:url>')
@cache.cached(timeout=CACHE_TIME.total_seconds(), query_string=True,
response_filter=common.not_5xx)
def add_wm(url=None):

103
common.py
Wyświetl plik

@ -3,10 +3,12 @@
"""
import itertools
import logging
import os
import re
import urllib.parse
from flask import request
from flask import render_template, request
from flask.views import View
from granary import as2
from oauth_dropins.webutil import util, webmention
import requests
@ -67,6 +69,29 @@ OTHER_DOMAINS = (
DOMAINS = (PRIMARY_DOMAIN,) + OTHER_DOMAINS
# TODO: add to all handlers:
# self.response.headers.update({
# 'Access-Control-Allow-Headers': '*',
# 'Access-Control-Allow-Methods': '*',
# 'Access-Control-Allow-Origin': '*',
# # see https://content-security-policy.com/
# 'Content-Security-Policy':
# "script-src https: localhost:8080 my.dev.com:8080 'unsafe-inline'; "
# "frame-ancestors 'self'; "
# "report-uri /csp-report; ",
# # 16070400 seconds is 6 months
# 'Strict-Transport-Security': 'max-age=16070400; preload',
# 'X-Content-Type-Options': 'nosniff',
# # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Frame-Options
# 'X-Frame-Options': 'SAMEORIGIN',
# 'X-XSS-Protection': '1; mode=block',
# })
# def options(self, *args, **kwargs):
# """Respond to CORS pre-flight OPTIONS requests."""
# pass
def not_5xx(resp):
return isinstance(resp, tuple) and resp[1] // 100 != 5
@ -423,3 +448,79 @@ def redirect_unwrap(val):
return util.follow_redirects(domain).url
return val
class XrdOrJrd(View):
"""Renders and serves an XRD or JRD file.
JRD is served if the request path ends in .jrd or .json, or the format query
parameter is 'jrd' or 'json', or the request's Accept header includes 'jrd' or
'json'.
XRD is served if the request path ends in .xrd or .xml, or the format query
parameter is 'xml' or 'xrd', or the request's Accept header includes 'xml' or
'xrd'.
Otherwise, defaults to DEFAULT_TYPE.
Subclasses must override :meth:`template_prefix()` and
:meth:`template_vars()`. URL route variables are passed through to
:meth:`template_vars()` as keyword args.
Class members:
DEFAULT_TYPE: either JRD or XRD, which type to return by default if the
request doesn't ask for one explicitly with the Accept header.
"""
JRD = 'jrd'
XRD = 'xrd'
DEFAULT_TYPE = JRD # either JRD or XRD
def template_prefix(self):
"""Returns template filename, without extension."""
raise NotImplementedError()
def template_vars(self, **kwargs):
"""Returns a dict with template variables.
URL route variables are passed through as kwargs.
"""
raise NotImplementedError()
def _type(self):
"""Returns XRD or JRD."""
format = request.args.get('format', '').lower()
ext = os.path.splitext(request.path)[1]
if ext in ('.jrd', '.json') or format in ('jrd', 'json'):
return self.JRD
elif ext in ('.xrd', '.xml') or format in ('xrd', 'xml'):
return self.XRD
# We don't do full content negotiation (Accept Header parsing); we just
# check whether jrd/json and xrd/xml are in the header, and if they both
# are, which one comes first. :/
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Content_negotiation
accept = request.headers.get('Accept', '').lower()
jrd = re.search(r'jrd|json', accept)
xrd = re.search(r'xrd|xml', accept)
if jrd and (not xrd or jrd.start() < xrd.start()):
return self.JRD
elif xrd and (not jrd or xrd.start() < jrd.start()):
return self.XRD
assert self.DEFAULT_TYPE in (self.JRD, self.XRD)
return self.DEFAULT_TYPE
def dispatch_request(self, **kwargs):
data = self.template_vars(**kwargs)
if not isinstance(data, dict):
return data
# Content-Types are from https://tools.ietf.org/html/rfc7033#section-10.2
if self._type() == self.JRD:
return data, {'Content-Type': 'application/jrd+json'}
else:
template = f'{self.template_prefix()}.{self._type()}'
return (render_template(template, **data),
{'Content-Type': 'application/xrd+xml; charset=utf-8'})

Wyświetl plik

@ -14,7 +14,7 @@ class LogHandler(logs.LogHandler):
VERSION_IDS = ['1']
@app.route('/responses')
@app.get('/responses')
def responses():
"""Renders recent Responses, with links to logs."""
responses = Response.query().order(-Response.updated).fetch(20)

Wyświetl plik

@ -28,7 +28,7 @@ from models import MagicKey
CACHE_TIME = datetime.timedelta(seconds=15)
@app.route(r'/r/<path:to>')
@app.get(r'/r/<path:to>')
@cache.cached(timeout=CACHE_TIME.total_seconds(), query_string=True,
response_filter=common.not_5xx)
def redir(to=None):

Wyświetl plik

@ -13,7 +13,7 @@ from models import Response
CACHE_TIME = datetime.timedelta(minutes=15)
@app.route('/render')
@app.get('/render')
@cache.cached(timeout=CACHE_TIME.total_seconds(), query_string=True,
response_filter=common.not_5xx)
def render():

Wyświetl plik

@ -31,7 +31,7 @@ SUPPORTED_VERBS = (
)
@app.route('/<string:acct>/salmon', methods=['POST'])
@app.post('/<acct>/salmon')
def slap(acct):
"""Accepts POSTs to /[ACCT]/salmon and converts to outbound webmentions."""
# TODO: unify with activitypub

Wyświetl plik

@ -1,8 +1,10 @@
# coding=utf-8
"""Unit tests for common.py."""
import logging
import os
from unittest import mock
from flask import Flask
from oauth_dropins.webutil import util
from oauth_dropins.webutil.testutil import requests_response
import requests
@ -75,3 +77,69 @@ class CommonTest(testutil.TestCase):
'id': 'xyz',
'inReplyTo': ['foo', 'bar'],
}))
class XrdOrJrdTest(testutil.TestCase):
def setUp(self):
super().setUp()
class View(common.XrdOrJrd):
def template_prefix(self):
return 'test_template'
def template_vars(self, **kwargs):
return {'foo': 'bar'}
self.View = View
self.app = Flask('XrdOrJrdTest')
self.app.template_folder = os.path.dirname(__file__)
view_func = View.as_view('XrdOrJrdTest')
self.app.add_url_rule('/', view_func=view_func)
self.app.add_url_rule('/<path>', view_func=view_func)
self.client = self.app.test_client()
def assert_jrd(self, resp, expected={'foo': 'bar'}):
self.assertEqual(200, resp.status_code)
self.assertEqual('application/jrd+json', resp.headers['Content-Type'])
self.assertEqual(expected, resp.json)
def assert_xrd(self, resp, expected='<XRD><Foo>bar</Foo></XRD>'):
self.assertEqual(200, resp.status_code)
self.assertEqual('application/xrd+xml; charset=utf-8',
resp.headers['Content-Type'])
self.assertEqual(expected, resp.get_data(as_text=True))
def test_xrd_or_jrd_handler_default_jrd(self):
self.assert_jrd(self.client.get('/'))
for resp in (self.client.get('/x.xrd'),
self.client.get('/x.xml'),
self.client.get('/?format=xrd'),
self.client.get('/?format=xml'),
self.client.get('/', headers={'Accept': 'application/xrd+xml'}),
self.client.get('/', headers={'Accept': 'application/xml'}),
):
self.assert_xrd(resp)
def test_xrd_or_jrd_handler_default_xrd(self):
self.View.DEFAULT_TYPE = common.XrdOrJrd.XRD
self.assert_xrd(self.client.get('/'))
for resp in (self.client.get('/x.jrd'),
self.client.get('/x.json'),
self.client.get('/?format=jrd'),
self.client.get('/?format=json'),
self.client.get('/', headers={'Accept': 'application/jrd+json'}),
self.client.get('/', headers={'Accept': 'application/json'}),
):
self.assert_jrd(resp)
def test_xrd_or_jrd_handler_accept_header_order(self):
self.assert_jrd(self.client.get('/', headers={
'Accept': 'application/jrd+json,application/xrd+xml',
}))
self.assert_xrd(self.client.get('/', headers={
'Accept': 'application/xrd+xml,application/jrd+json',
}))

Wyświetl plik

@ -0,0 +1 @@
{"foo": "{{ foo }}" }

Wyświetl plik

@ -0,0 +1 @@
<XRD><Foo>{{ foo }}</Foo></XRD>

Wyświetl plik

@ -88,21 +88,23 @@ class WebfingerTest(testutil.TestCase):
self.assertEqual(200, got.status_code)
self.assertEqual('application/xrd+xml; charset=utf-8',
got.headers['Content-Type'])
self.assertTrue(body.startswith('<?xml'), got.get_data(as_text=True))
body = got.get_data(as_text=True)
self.assertTrue(body.startswith('<?xml'), body)
def test_host_meta_handler_xrds(self):
got = client.get('/.well-known/host-meta.xrds')
self.assertEqual(200, got.status_code)
self.assertEqual('application/xrds+xml; charset=utf-8',
got.headers['Content-Type'])
self.assertTrue(body.startswith('<XRDS'), got.get_data(as_text=True))
body = got.get_data(as_text=True)
self.assertTrue(body.startswith('<XRDS'), body)
def test_host_meta_handler_jrd(self):
got = client.get('/.well-known/host-meta.json')
self.assertEqual(200, got.status_code)
self.assertEqual('application/jrd+json; charset=utf-8',
got.headers['Content-Type'])
self.assertTrue(body.startswith('{'), got.get_data(as_text=True))
self.assertEqual('application/jrd+json', got.headers['Content-Type'])
body = got.get_data(as_text=True)
self.assertTrue(body.startswith('{'), body)
@mock.patch('requests.get')
def test_user_handler(self, mock_get):
@ -110,16 +112,15 @@ class WebfingerTest(testutil.TestCase):
got = client.get('/acct:foo.com', headers={'Accept': 'application/json'})
self.assertEqual(200, got.status_code)
self.assertEqual('application/jrd+json; charset=utf-8',
got.headers['Content-Type'])
self.assertEqual('application/jrd+json', got.headers['Content-Type'])
mock_get.assert_called_once_with('http://foo.com/', headers=common.HEADERS,
stream=True, timeout=util.HTTP_TIMEOUT)
self.assertEqual(self.expected_webfinger, got.json)
# check that magic key is persistent
again = json_loads(client.get(
'/acct:foo.com', headers={'Accept': 'application/json'}).body.decode())
again = client.get('/acct:foo.com',
headers={'Accept': 'application/json'}).json
self.assertEqual(self.key.href(), again['magic_keys'][0]['value'])
links = {l['rel']: l['href'] for l in again['links']}
@ -191,8 +192,7 @@ class WebfingerTest(testutil.TestCase):
{'resource': resource})
got = client.get(url, headers={'Accept': 'application/json'})
self.assertEqual(200, got.status_code, got.get_data(as_text=True))
self.assertEqual('application/jrd+json; charset=utf-8',
got.headers['Content-Type'])
self.assertEqual('application/jrd+json', got.headers['Content-Type'])
self.assertEqual(self.expected_webfinger, got.json)
@mock.patch('requests.get')
@ -235,6 +235,5 @@ class WebfingerTest(testutil.TestCase):
{'resource': resource})
got = client.get(url, headers={'Accept': 'application/json'})
self.assertEqual(200, got.status_code, got.get_data(as_text=True))
self.assertEqual('application/jrd+json; charset=utf-8',
got.headers['Content-Type'])
self.assertEqual(self.expected_webfinger, json_loads(body))
self.assertEqual('application/jrd+json', got.headers['Content-Type'])
self.assertEqual(self.expected_webfinger, got.json)

Wyświetl plik

@ -16,7 +16,7 @@ class TestCase(unittest.TestCase, testutil.Asserts):
maxDiff = None
def setUp(self):
super(TestCase, self).setUp()
super().setUp()
# clear datastore
requests.post('http://%s/reset' % ndb_client.host)
@ -29,7 +29,7 @@ class TestCase(unittest.TestCase, testutil.Asserts):
def tearDown(self):
self.ndb_context.__exit__(None, None, None)
super(TestCase, self).tearDown()
super().tearDown()
def req(self, url, **kwargs):
"""Returns a mock requests call."""

Wyświetl plik

@ -11,6 +11,7 @@ import re
import urllib.parse
from flask import render_template, request
from flask.views import View
from granary.microformats2 import get_text
import mf2util
from oauth_dropins.webutil import handlers, util
@ -26,130 +27,130 @@ CACHE_TIME = datetime.timedelta(seconds=15)
NON_TLDS = frozenset(('html', 'json', 'php', 'xml'))
@app.route('/acct:<string:domain>')
@cache.cached(
CACHE_TIME.total_seconds(),
make_cache_key=lambda domain: f'{request.path} {request.headers.get("Accept")}')
def user(domain):
# TODO
# @cache.cached(
# CACHE_TIME.total_seconds(),
# make_cache_key=lambda domain: f'{request.path} {request.headers.get("Accept")}')
class User(common.XrdOrJrd):
"""Fetches a site's home page, converts its mf2 to WebFinger, and serves."""
return _user(domain, None)
def template_prefix(self):
return 'webfinger_user'
def template_vars(self, domain=None, url=None):
if not re.match(common.DOMAIN_RE, domain):
return error(f'{domain} is not a domain', status=404)
def _user(domain, url):
if not re.match(common.DOMAIN_RE, domain):
return error(f'{domain} is not a domain', status=404)
logging.debug(f'Headers: {list(request.headers.items())}')
logging.debug(f'Headers: {list(request.headers.items())}')
if domain.split('.')[-1] in NON_TLDS:
return error(f"{domain} doesn't look like a domain", status=404)
if domain.split('.')[-1] in NON_TLDS:
return error(f"{domain} doesn't look like a domain", status=404)
# find representative h-card. try url, then url's home page, then domain
urls = [f'http://{domain}/']
if url:
urls = [url, urllib.parse.urljoin(url, '/')] + urls
# find representative h-card. try url, then url's home page, then domain
urls = [f'http://{domain}/']
if url:
urls = [url, urllib.parse.urljoin(url, '/')] + urls
for candidate in urls:
resp = common.requests_get(candidate)
parsed = util.parse_html(resp)
mf2 = util.parse_mf2(parsed, url=resp.url)
# logging.debug('Parsed mf2 for %s: %s', resp.url, json_dumps(mf2, indent=2))
hcard = mf2util.representative_hcard(mf2, resp.url)
if hcard:
logging.info(f'Representative h-card: {json_dumps(hcard, indent=2)}')
break
else:
return error(f"didn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {resp.url}")
logging.info(f'Generating WebFinger data for {domain}')
key = models.MagicKey.get_or_create(domain)
props = hcard.get('properties', {})
urls = util.dedupe_urls(props.get('url', []) + [resp.url])
canonical_url = urls[0]
acct = f'{domain}@{domain}'
for url in urls:
if url.startswith('acct:'):
urluser, urldomain = util.parse_acct_uri(url)
if urldomain == domain:
acct = f'{urluser}@{domain}'
logging.info(f'Found custom username: acct:{acct}')
for candidate in urls:
resp = common.requests_get(candidate)
parsed = util.parse_html(resp)
mf2 = util.parse_mf2(parsed, url=resp.url)
# logging.debug('Parsed mf2 for %s: %s', resp.url, json_dumps(mf2, indent=2))
hcard = mf2util.representative_hcard(mf2, resp.url)
if hcard:
logging.info(f'Representative h-card: {json_dumps(hcard, indent=2)}')
break
# discover atom feed, if any
atom = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM)
if atom and atom['href']:
atom = urllib.parse.urljoin(resp.url, atom['href'])
else:
atom = 'https://granary.io/url?' + urllib.parse.urlencode({
'input': 'html',
'output': 'atom',
'url': resp.url,
'hub': resp.url,
})
# discover PuSH, if any
for link in resp.headers.get('Link', '').split(','):
match = common.LINK_HEADER_RE.match(link)
if match and match.group(2) == 'hub':
hub = match.group(1)
else:
hub = 'https://bridgy-fed.superfeedr.com/'
return error(f"didn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {resp.url}")
# generate webfinger content
data = util.trim_nulls({
'subject': 'acct:' + acct,
'aliases': urls,
'magic_keys': [{'value': key.href()}],
'links': sum(([{
'rel': 'http://webfinger.net/rel/profile-page',
'type': 'text/html',
'href': url,
}] for url in urls if url.startswith("http")), []) + [{
'rel': 'http://webfinger.net/rel/avatar',
'href': get_text(url),
} for url in props.get('photo', [])] + [{
'rel': 'canonical_uri',
'type': 'text/html',
'href': canonical_url,
},
logging.info(f'Generating WebFinger data for {domain}')
key = models.MagicKey.get_or_create(domain)
props = hcard.get('properties', {})
urls = util.dedupe_urls(props.get('url', []) + [resp.url])
canonical_url = urls[0]
# ActivityPub
{
'rel': 'self',
'type': common.CONTENT_TYPE_AS2,
# WARNING: in python 2 sometimes request.host_url lost port,
# http://localhost:8080 would become just http://localhost. no
# clue how or why. pay attention here if that happens again.
'href': f'{request.host_url}{domain}',
}, {
'rel': 'inbox',
'type': common.CONTENT_TYPE_AS2,
'href': f'{request.host_url}{domain}/inbox',
},
acct = f'{domain}@{domain}'
for url in urls:
if url.startswith('acct:'):
urluser, urldomain = util.parse_acct_uri(url)
if urldomain == domain:
acct = f'{urluser}@{domain}'
logging.info(f'Found custom username: acct:{acct}')
break
# OStatus
{
'rel': 'http://schemas.google.com/g/2010#updates-from',
'type': common.CONTENT_TYPE_ATOM,
'href': atom,
}, {
'rel': 'hub',
'href': hub,
}, {
'rel': 'magic-public-key',
'href': key.href(),
}, {
'rel': 'salmon',
'href': f'{request.host_url}{domain}/salmon',
}]
})
logging.info(f'Returning WebFinger data: {json_dumps(data, indent=2)}')
return render_template('webfinger_user.xrd', **data)
# discover atom feed, if any
atom = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM)
if atom and atom['href']:
atom = urllib.parse.urljoin(resp.url, atom['href'])
else:
atom = 'https://granary.io/url?' + urllib.parse.urlencode({
'input': 'html',
'output': 'atom',
'url': resp.url,
'hub': resp.url,
})
# discover PuSH, if any
for link in resp.headers.get('Link', '').split(','):
match = common.LINK_HEADER_RE.match(link)
if match and match.group(2) == 'hub':
hub = match.group(1)
else:
hub = 'https://bridgy-fed.superfeedr.com/'
# generate webfinger content
data = util.trim_nulls({
'subject': 'acct:' + acct,
'aliases': urls,
'magic_keys': [{'value': key.href()}],
'links': sum(([{
'rel': 'http://webfinger.net/rel/profile-page',
'type': 'text/html',
'href': url,
}] for url in urls if url.startswith("http")), []) + [{
'rel': 'http://webfinger.net/rel/avatar',
'href': get_text(url),
} for url in props.get('photo', [])] + [{
'rel': 'canonical_uri',
'type': 'text/html',
'href': canonical_url,
},
# ActivityPub
{
'rel': 'self',
'type': common.CONTENT_TYPE_AS2,
# WARNING: in python 2 sometimes request.host_url lost port,
# http://localhost:8080 would become just http://localhost. no
# clue how or why. pay attention here if that happens again.
'href': f'{request.host_url}{domain}',
}, {
'rel': 'inbox',
'type': common.CONTENT_TYPE_AS2,
'href': f'{request.host_url}{domain}/inbox',
},
# OStatus
{
'rel': 'http://schemas.google.com/g/2010#updates-from',
'type': common.CONTENT_TYPE_ATOM,
'href': atom,
}, {
'rel': 'hub',
'href': hub,
}, {
'rel': 'magic-public-key',
'href': key.href(),
}, {
'rel': 'salmon',
'href': f'{request.host_url}{domain}/salmon',
}]
})
logging.info(f'Returning WebFinger data: {json_dumps(data, indent=2)}')
return data
@app.route('/.well-known/webfinger')
def webfinger():
class Webfinger(User):
"""Handles Webfinger requests.
https://webfinger.net/
@ -157,16 +158,21 @@ def webfinger():
Supports both JRD and XRD; defaults to JRD.
https://tools.ietf.org/html/rfc7033#section-4
"""
resource = common.get_required_param('resource')
try:
user, domain = util.parse_acct_uri(resource)
if domain in common.DOMAINS:
domain = user
except ValueError:
domain = urllib.parse.urlparse(resource).netloc or resource
def template_vars(self):
resource = common.get_required_param('resource')
try:
user, domain = util.parse_acct_uri(resource)
if domain in common.DOMAINS:
domain = user
except ValueError:
domain = urllib.parse.urlparse(resource).netloc or resource
url = None
if resource.startswith('http://') or resource.startswith('https://'):
url = resource
url = None
if resource.startswith('http://') or resource.startswith('https://'):
url = resource
return _user(domain, url)
return super().template_vars(domain=domain, url=url)
app.add_url_rule('/acct:<domain>', view_func=User.as_view('user'))
app.add_url_rule('/.well-known/webfinger', view_func=Webfinger.as_view('webfinger'))