bridgy-fed/redirect.py

124 wiersze
4.0 KiB
Python
Czysty Zwykły widok Historia

"""Simple conneg endpoint that serves AS2 or redirects to to the original post.
Serves /r/https://foo.com/bar URL paths, where https://foo.com/bar is an
original post. Needed for Mastodon interop, they require that AS2 object ids and
urls are on the same domain that serves them. Background:
https://github.com/snarfed/bridgy-fed/issues/16#issuecomment-424799599
https://github.com/tootsuite/mastodon/pull/6219#issuecomment-429142747
The conneg makes these /r/ URLs searchable in Mastodon:
https://github.com/snarfed/bridgy-fed/issues/352
"""
import datetime
import logging
import re
import urllib.parse
2021-07-08 04:02:13 +00:00
from flask import redirect, request
from granary import as2, microformats2
import mf2util
from negotiator import ContentNegotiator, AcceptParameters, ContentType, Language
from oauth_dropins.webutil import flask_util, util
from oauth_dropins.webutil.flask_util import error
from oauth_dropins.webutil.util import json_dumps
2021-07-08 04:02:13 +00:00
from werkzeug.exceptions import abort
2021-07-08 04:02:13 +00:00
from app import app, cache
from common import (
CONTENT_TYPE_AS2,
CONTENT_TYPE_AS2_LD,
CONTENT_TYPE_HTML,
postprocess_as2,
)
2022-11-16 06:00:28 +00:00
from models import User
logger = logging.getLogger(__name__)
2021-07-08 14:25:44 +00:00
CACHE_TIME = datetime.timedelta(seconds=15)
_negotiator = ContentNegotiator(acceptable=[
AcceptParameters(ContentType(CONTENT_TYPE_HTML)),
AcceptParameters(ContentType(CONTENT_TYPE_AS2)),
AcceptParameters(ContentType(CONTENT_TYPE_AS2_LD)),
])
@app.get(r'/r/<path:to>')
2021-08-28 14:18:46 +00:00
@flask_util.cached(cache, CACHE_TIME)
def redir(to):
2021-07-08 04:02:13 +00:00
"""301 redirect to the embedded fully qualified URL.
e.g. redirects /r/https://foo.com/bar?baz to https://foo.com/bar?baz
"""
2021-07-08 04:02:13 +00:00
if request.args:
to += '?' + urllib.parse.urlencode(request.args)
# some browsers collapse repeated /s in the path down to a single slash.
# if that happened to this URL, expand it back to two /s.
to = re.sub(r'^(https?:/)([^/])', r'\1/\2', to)
2022-12-10 17:04:05 +00:00
if not util.is_web(to):
error(f'Expected fully qualified URL; got {to}')
2021-07-08 04:02:13 +00:00
# check that we've seen this domain before so we're not an open redirect
domains = set((util.domain_from_link(to, minimize=True),
util.domain_from_link(to, minimize=False),
2021-07-08 04:02:13 +00:00
urllib.parse.urlparse(to).hostname))
for domain in domains:
if domain:
2022-11-16 06:00:28 +00:00
user = User.get_by_id(domain)
if user:
logger.info(f'Found User for domain {domain}')
break
2021-07-08 04:02:13 +00:00
else:
logger.info(f'No user found for any of {domains}; returning 404')
2021-07-08 04:02:13 +00:00
abort(404)
# check conneg, serve AS2 if requested
accept = request.headers.get('Accept')
if accept:
negotiated = _negotiator.negotiate(accept)
if negotiated:
type = str(negotiated.content_type)
if type in (CONTENT_TYPE_AS2, CONTENT_TYPE_AS2_LD):
return convert_to_as2(to, user), {
'Content-Type': type,
'Access-Control-Allow-Origin': '*',
}
2021-07-08 04:02:13 +00:00
# redirect
logger.info(f'redirecting to {to}')
2021-07-08 04:02:13 +00:00
return redirect(to, code=301)
# offically-supported-monkey-patch flask_caching to include Accept header in
# cache key:
# https://flask-caching.readthedocs.io/en/latest/api.html#flask_caching.Cache.cached
# requires this pending bug fix:
# https://github.com/pallets-eco/flask-caching/pull/431
orig_cache_key = redir.make_cache_key
def accept_header_cache_key(*args, **kwargs):
return f'{orig_cache_key(*args, **kwargs)} {request.headers.get("Accept")}'
redir.make_cache_key = accept_header_cache_key
2021-07-08 04:02:13 +00:00
def convert_to_as2(url, domain):
2021-07-08 04:02:13 +00:00
"""Fetch a URL as HTML, convert it to AS2, and return it.
Args:
url: str
2022-11-16 06:00:28 +00:00
domain: :class:`User`
Returns:
dict: AS2 object
2021-07-08 04:02:13 +00:00
"""
mf2 = util.fetch_mf2(url)
entry = mf2util.find_first_entry(mf2, ['h-entry'])
logger.info(f"Parsed mf2 for {mf2['url']}: {json_dumps(entry, indent=2)}")
2021-07-08 04:02:13 +00:00
obj = postprocess_as2(as2.from_as1(microformats2.json_to_object(entry)),
domain, create=False)
logger.info(f'Returning: {json_dumps(obj, indent=2)}')
return obj