2023-10-06 06:32:31 +00:00
|
|
|
"""Serves ``/convert/...`` URLs to convert data from one protocol to another.
|
2023-05-24 22:18:31 +00:00
|
|
|
|
2023-10-06 06:32:31 +00:00
|
|
|
URL pattern is ``/convert/SOURCE/DEST``, where ``SOURCE`` and ``DEST`` are the
|
|
|
|
``LABEL`` constants from the :class:`protocol.Protocol` subclasses.
|
2023-05-24 22:18:31 +00:00
|
|
|
"""
|
|
|
|
import logging
|
|
|
|
import re
|
2023-07-26 15:34:44 +00:00
|
|
|
from urllib.parse import quote, unquote
|
2023-05-24 22:18:31 +00:00
|
|
|
|
2024-05-08 00:01:01 +00:00
|
|
|
from flask import redirect, request
|
2023-05-24 23:00:41 +00:00
|
|
|
from granary import as1
|
2023-05-24 22:18:31 +00:00
|
|
|
from oauth_dropins.webutil import flask_util, util
|
|
|
|
from oauth_dropins.webutil.flask_util import error
|
|
|
|
|
|
|
|
from activitypub import ActivityPub
|
2024-06-04 21:19:04 +00:00
|
|
|
from common import CACHE_CONTROL, LOCAL_DOMAINS, subdomain_wrap, SUPERDOMAIN
|
2024-06-01 14:07:00 +00:00
|
|
|
from flask_app import app
|
2023-05-26 23:07:36 +00:00
|
|
|
from models import Object, PROTOCOLS
|
2023-06-14 22:02:59 +00:00
|
|
|
from protocol import Protocol
|
2023-05-27 00:40:29 +00:00
|
|
|
from web import Web
|
2023-05-24 22:18:31 +00:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2023-11-07 21:24:54 +00:00
|
|
|
|
|
|
|
@app.get(f'/convert/<dest>/<path:_>')
|
2024-06-04 21:19:04 +00:00
|
|
|
@flask_util.headers(CACHE_CONTROL)
|
2023-10-12 20:55:00 +00:00
|
|
|
def convert(dest, _, src=None):
|
2023-05-24 22:18:31 +00:00
|
|
|
"""Converts data from one protocol to another and serves it.
|
|
|
|
|
|
|
|
Fetches the source data if it's not already stored.
|
2023-10-12 20:55:00 +00:00
|
|
|
|
|
|
|
Args:
|
|
|
|
dest (str): protocol
|
|
|
|
src (str): protocol, only used when called by
|
|
|
|
:func:`convert_source_path_redirect`
|
2023-05-24 22:18:31 +00:00
|
|
|
"""
|
2023-10-12 20:55:00 +00:00
|
|
|
if src:
|
2024-01-08 15:53:09 +00:00
|
|
|
src_cls = PROTOCOLS.get(src)
|
|
|
|
if not src_cls:
|
|
|
|
error(f'No protocol found for {src}', status=404)
|
2023-10-12 20:55:00 +00:00
|
|
|
logger.info(f'Overriding any domain protocol with {src}')
|
|
|
|
else:
|
|
|
|
src_cls = Protocol.for_request(fed=Protocol)
|
2023-06-14 22:02:59 +00:00
|
|
|
if not src_cls:
|
|
|
|
error(f'Unknown protocol {request.host.removesuffix(SUPERDOMAIN)}', status=404)
|
|
|
|
|
2023-11-07 21:24:54 +00:00
|
|
|
dest_cls = PROTOCOLS.get(dest)
|
|
|
|
if not dest_cls:
|
|
|
|
error('Unknown protocol {dest}', status=404)
|
|
|
|
|
2023-05-24 23:00:41 +00:00
|
|
|
# don't use urllib.parse.urlencode(request.args) because that doesn't
|
|
|
|
# guarantee us the same query param string as in the original URL, and we
|
|
|
|
# want exactly the same thing since we're looking up the URL's Object by id
|
2023-06-14 22:02:59 +00:00
|
|
|
path_prefix = f'convert/{dest}/'
|
2023-11-07 22:02:29 +00:00
|
|
|
id = unquote(request.url.removeprefix(request.root_url).removeprefix(path_prefix))
|
2023-05-24 23:00:41 +00:00
|
|
|
|
2023-05-24 23:31:42 +00:00
|
|
|
# our redirects evidently collapse :// down to :/ , maybe to prevent URL
|
|
|
|
# parsing bugs? if that happened to this URL, expand it back to ://
|
2023-11-07 22:02:29 +00:00
|
|
|
id = re.sub(r'^(https?:/)([^/])', r'\1/\2', id)
|
2023-05-24 22:18:31 +00:00
|
|
|
|
2023-11-07 22:02:29 +00:00
|
|
|
logger.info(f'Converting from {src_cls.LABEL} to {dest}: {id}')
|
2023-10-12 20:55:00 +00:00
|
|
|
|
2023-05-24 23:00:41 +00:00
|
|
|
# load, and maybe fetch. if it's a post/update, redirect to inner object.
|
2023-11-07 22:02:29 +00:00
|
|
|
obj = src_cls.load(id)
|
2023-07-14 19:45:47 +00:00
|
|
|
if not obj:
|
2023-11-07 22:02:29 +00:00
|
|
|
error(f"Couldn't load {id}", status=404)
|
2023-07-14 19:45:47 +00:00
|
|
|
elif not obj.as1:
|
2023-05-24 23:00:41 +00:00
|
|
|
error(f'Stored object for {id} has no data', status=404)
|
|
|
|
|
|
|
|
type = as1.object_type(obj.as1)
|
|
|
|
if type in ('post', 'update', 'delete'):
|
|
|
|
obj_id = as1.get_object(obj.as1).get('id')
|
|
|
|
if obj_id:
|
2023-11-07 22:02:29 +00:00
|
|
|
obj_obj = src_cls.load(obj_id, remote=False)
|
2023-06-20 18:22:54 +00:00
|
|
|
if (obj_obj and obj_obj.as1
|
|
|
|
and not obj_obj.as1.keys() <= set(['id', 'url', 'objectType'])):
|
2023-05-24 23:00:41 +00:00
|
|
|
logger.info(f'{type} activity, redirecting to Object {obj_id}')
|
|
|
|
return redirect(f'/{path_prefix}{obj_id}', code=301)
|
|
|
|
|
2024-05-30 21:55:35 +00:00
|
|
|
headers = {
|
|
|
|
'Content-Type': dest_cls.CONTENT_TYPE,
|
|
|
|
'Vary': 'Accept',
|
|
|
|
}
|
|
|
|
|
2023-05-24 23:00:41 +00:00
|
|
|
# don't serve deletes or deleted objects
|
|
|
|
if obj.deleted or type == 'delete':
|
2024-05-30 21:55:35 +00:00
|
|
|
return '', 410, headers
|
2023-05-24 23:00:41 +00:00
|
|
|
|
|
|
|
# convert and serve
|
2024-05-30 21:55:35 +00:00
|
|
|
return dest_cls.convert(obj), headers
|
2023-05-24 23:09:44 +00:00
|
|
|
|
|
|
|
|
|
|
|
@app.get('/render')
|
|
|
|
def render_redirect():
|
|
|
|
"""Redirect from old /render?id=... endpoint to /convert/..."""
|
|
|
|
id = flask_util.get_required_param('id')
|
2023-10-23 22:44:32 +00:00
|
|
|
return redirect(subdomain_wrap(ActivityPub, f'/convert/web/{id}'), code=301)
|
2023-06-17 20:48:23 +00:00
|
|
|
|
|
|
|
|
2023-11-07 21:24:54 +00:00
|
|
|
@app.get(f'/convert/<src>/<dest>/<path:_>')
|
2023-06-17 20:48:23 +00:00
|
|
|
def convert_source_path_redirect(src, dest, _):
|
2023-09-26 20:32:27 +00:00
|
|
|
"""Old route that included source protocol in path instead of subdomain.
|
|
|
|
|
|
|
|
DEPRECATED! Only kept to support old webmention source URLs.
|
|
|
|
"""
|
2023-06-17 20:48:23 +00:00
|
|
|
if Protocol.for_request() not in (None, 'web'): # no per-protocol subdomains
|
|
|
|
error(f'Try again on fed.brid.gy', status=404)
|
|
|
|
|
2023-07-26 15:34:44 +00:00
|
|
|
# in prod, eg gunicorn, the path somehow gets URL-decoded before we see
|
|
|
|
# it, so we need to re-encode.
|
|
|
|
new_path = quote(request.full_path.rstrip('?').replace(f'/{src}/', '/'),
|
|
|
|
safe=':/%')
|
2023-10-12 20:55:00 +00:00
|
|
|
|
|
|
|
if request.host in LOCAL_DOMAINS:
|
|
|
|
request.url = request.url.replace(f'/{src}/', '/')
|
|
|
|
return convert(dest, None, src)
|
|
|
|
|
2024-01-08 15:53:09 +00:00
|
|
|
proto = PROTOCOLS.get(src)
|
|
|
|
if not proto:
|
|
|
|
error(f'No protocol found for {src}', status=404)
|
|
|
|
|
|
|
|
return redirect(subdomain_wrap(proto, new_path), code=301)
|