2023-10-06 06:32:31 +00:00
|
|
|
"""Webmention protocol with microformats2 in HTML, aka the IndieWeb stack."""
|
2023-05-31 00:24:49 +00:00
|
|
|
import datetime
|
2023-05-26 23:07:36 +00:00
|
|
|
import difflib
|
2017-08-15 14:39:22 +00:00
|
|
|
import logging
|
2023-06-09 17:58:28 +00:00
|
|
|
import re
|
2023-06-08 18:04:11 +00:00
|
|
|
import urllib.parse
|
2023-11-06 20:18:11 +00:00
|
|
|
from urllib.parse import quote, urlencode, urljoin, urlparse
|
2017-08-15 14:39:22 +00:00
|
|
|
|
2023-05-31 00:24:49 +00:00
|
|
|
from flask import g, redirect, render_template, request
|
2023-06-14 21:35:08 +00:00
|
|
|
from google.cloud import ndb
|
2023-06-20 18:22:54 +00:00
|
|
|
from google.cloud.ndb import ComputedProperty
|
2023-02-14 15:40:37 +00:00
|
|
|
from granary import as1, as2, microformats2
|
2017-08-15 14:39:22 +00:00
|
|
|
import mf2util
|
2021-07-18 04:22:13 +00:00
|
|
|
from oauth_dropins.webutil import flask_util, util
|
2023-01-05 04:48:39 +00:00
|
|
|
from oauth_dropins.webutil.appengine_config import tasks_client
|
2023-09-09 22:47:38 +00:00
|
|
|
from oauth_dropins.webutil import appengine_info
|
2023-10-17 18:33:56 +00:00
|
|
|
from oauth_dropins.webutil.flask_util import cloud_tasks_only, error, flash
|
2019-12-25 07:26:58 +00:00
|
|
|
from oauth_dropins.webutil.util import json_dumps, json_loads
|
2023-05-27 00:40:29 +00:00
|
|
|
from oauth_dropins.webutil import webmention
|
2023-07-14 19:45:47 +00:00
|
|
|
from requests import HTTPError, RequestException
|
2023-05-26 23:07:36 +00:00
|
|
|
from werkzeug.exceptions import BadGateway, BadRequest, HTTPException, NotFound
|
2017-08-15 14:39:22 +00:00
|
|
|
|
|
|
|
import common
|
2023-09-22 19:14:50 +00:00
|
|
|
from common import add, DOMAIN_RE
|
2023-05-31 00:24:49 +00:00
|
|
|
from flask_app import app, cache
|
2023-11-06 20:18:11 +00:00
|
|
|
from ids import translate_object_id
|
2023-05-26 23:07:36 +00:00
|
|
|
from models import Follower, Object, PROTOCOLS, Target, User
|
|
|
|
from protocol import Protocol
|
2017-08-15 14:39:22 +00:00
|
|
|
|
2022-02-12 06:38:56 +00:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2023-01-05 04:48:39 +00:00
|
|
|
# https://cloud.google.com/appengine/docs/locations
|
|
|
|
TASKS_LOCATION = 'us-central1'
|
|
|
|
|
2023-04-17 22:36:29 +00:00
|
|
|
CHAR_AFTER_SPACE = chr(ord(' ') + 1)
|
|
|
|
|
2023-05-26 23:07:36 +00:00
|
|
|
# https://github.com/snarfed/bridgy-fed/issues/314
|
|
|
|
WWW_DOMAINS = frozenset((
|
|
|
|
'www.jvt.me',
|
|
|
|
))
|
2023-06-15 17:52:11 +00:00
|
|
|
NON_TLDS = frozenset(('html', 'json', 'php', 'xml'))
|
2017-08-15 14:39:22 +00:00
|
|
|
|
2023-05-26 23:07:36 +00:00
|
|
|
|
2023-09-22 22:14:15 +00:00
|
|
|
def is_valid_domain(domain):
|
|
|
|
"""Returns True if this is a valid domain we can use, False otherwise.
|
|
|
|
|
|
|
|
Valid means TLD is ok, not blacklisted, etc.
|
|
|
|
"""
|
|
|
|
if not re.match(DOMAIN_RE, domain):
|
2023-09-25 19:33:24 +00:00
|
|
|
logger.debug(f"{domain} doesn't look like a domain")
|
2023-09-22 22:14:15 +00:00
|
|
|
return False
|
|
|
|
|
|
|
|
if Web.is_blocklisted(domain):
|
|
|
|
logger.debug(f'{domain} is blocklisted')
|
|
|
|
return False
|
|
|
|
|
|
|
|
tld = domain.split('.')[-1]
|
|
|
|
if tld in NON_TLDS:
|
|
|
|
logger.info(f"{domain} looks like a domain but {tld} isn't a TLD")
|
|
|
|
return False
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
2023-05-27 00:40:29 +00:00
|
|
|
class Web(User, Protocol):
|
|
|
|
"""Web user and webmention protocol implementation.
|
2023-05-26 23:07:36 +00:00
|
|
|
|
|
|
|
The key name is the domain.
|
|
|
|
"""
|
2023-06-11 15:14:17 +00:00
|
|
|
ABBREV = 'web'
|
|
|
|
OTHER_LABELS = ('webmention',)
|
2023-10-10 18:14:42 +00:00
|
|
|
LOGO_HTML = '🕸️'
|
2023-10-24 23:09:28 +00:00
|
|
|
CONTENT_TYPE = common.CONTENT_TYPE_HTML
|
2023-03-20 04:08:24 +00:00
|
|
|
|
2023-06-14 21:35:08 +00:00
|
|
|
has_redirects = ndb.BooleanProperty()
|
|
|
|
redirects_error = ndb.TextProperty()
|
|
|
|
has_hcard = ndb.BooleanProperty()
|
|
|
|
|
2023-05-26 23:07:36 +00:00
|
|
|
@classmethod
|
|
|
|
def _get_kind(cls):
|
|
|
|
return 'MagicKey'
|
|
|
|
|
2023-06-23 19:22:37 +00:00
|
|
|
def _pre_put_hook(self):
|
|
|
|
"""Validate domain id, don't allow upper case or invalid characters."""
|
|
|
|
super()._pre_put_hook()
|
2023-06-09 17:58:28 +00:00
|
|
|
id = self.key.id()
|
2023-09-25 22:08:14 +00:00
|
|
|
assert is_valid_domain(id), id
|
2023-06-16 19:05:41 +00:00
|
|
|
assert id.lower() == id, f'upper case is not allowed in Web key id: {id}'
|
2023-06-09 17:58:28 +00:00
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def get_or_create(cls, id, **kwargs):
|
2023-08-18 20:30:26 +00:00
|
|
|
"""Normalizes domain, then passes through to :meth:`User.get_or_create`.
|
|
|
|
|
|
|
|
Normalizing currently consists of lower casing and removing leading and
|
|
|
|
trailing dots.
|
|
|
|
"""
|
|
|
|
return super().get_or_create(id.lower().strip('.'), **kwargs)
|
2023-06-09 17:58:28 +00:00
|
|
|
|
2023-09-25 22:08:14 +00:00
|
|
|
@ndb.ComputedProperty
|
2023-09-25 17:27:08 +00:00
|
|
|
def handle(self):
|
2023-09-25 19:33:24 +00:00
|
|
|
"""Returns this user's chosen username or domain, eg ``user.com``."""
|
|
|
|
# prettify if domain, noop if username
|
|
|
|
username = self.username()
|
|
|
|
if username != self.key.id():
|
|
|
|
return util.domain_from_link(username, minimize=False)
|
|
|
|
return username
|
2023-09-25 17:27:08 +00:00
|
|
|
|
2023-06-01 01:34:33 +00:00
|
|
|
def web_url(self):
|
2023-09-25 17:27:08 +00:00
|
|
|
"""Returns this user's web URL aka web_url, eg ``https://foo.com/``."""
|
2023-06-01 01:34:33 +00:00
|
|
|
return f'https://{self.key.id()}/'
|
|
|
|
|
2023-09-23 21:55:49 +00:00
|
|
|
profile_id = web_url
|
|
|
|
|
2023-10-19 23:25:04 +00:00
|
|
|
def is_web_url(self, url):
|
|
|
|
return super().is_web_url(url, ignore_www=True)
|
|
|
|
|
2023-05-31 17:47:09 +00:00
|
|
|
def ap_address(self):
|
2023-10-06 06:32:31 +00:00
|
|
|
"""Returns this user's ActivityPub address, eg ``@foo.com@foo.com``.
|
2023-05-31 17:47:09 +00:00
|
|
|
|
|
|
|
Uses the user's domain if they're direct, fed.brid.gy if they're not.
|
|
|
|
"""
|
|
|
|
if self.direct:
|
|
|
|
return f'@{self.username()}@{self.key.id()}'
|
2023-09-25 19:45:47 +00:00
|
|
|
|
|
|
|
return super().ap_address()
|
2023-05-31 17:47:09 +00:00
|
|
|
|
|
|
|
def ap_actor(self, rest=None):
|
|
|
|
"""Returns this user's ActivityPub/AS2 actor id.
|
|
|
|
|
2023-09-25 17:27:08 +00:00
|
|
|
Eg ``https://fed.brid.gy/foo.com``
|
2023-05-31 17:47:09 +00:00
|
|
|
|
2023-09-25 17:27:08 +00:00
|
|
|
Web users are special cased to not have an ``/ap/web/`` prefix, for
|
|
|
|
backward compatibility.
|
2023-05-31 17:47:09 +00:00
|
|
|
"""
|
2023-06-02 19:55:07 +00:00
|
|
|
url = common.host_url(self.key.id())
|
|
|
|
if rest:
|
2023-09-25 21:15:24 +00:00
|
|
|
url += f'/{rest.lstrip("/")}'
|
2023-06-02 19:55:07 +00:00
|
|
|
return url
|
2023-05-31 17:47:09 +00:00
|
|
|
|
2023-06-07 18:51:31 +00:00
|
|
|
def user_page_path(self, rest=None):
|
|
|
|
"""Always use domain."""
|
2023-06-11 15:14:17 +00:00
|
|
|
path = f'/{self.ABBREV}/{self.key.id()}'
|
2023-06-07 18:51:31 +00:00
|
|
|
|
|
|
|
if rest:
|
|
|
|
if not rest.startswith('?'):
|
|
|
|
path += '/'
|
2023-09-25 21:15:24 +00:00
|
|
|
path += rest.lstrip('/')
|
2023-06-07 18:51:31 +00:00
|
|
|
|
|
|
|
return path
|
|
|
|
|
2023-06-04 23:10:37 +00:00
|
|
|
def username(self):
|
|
|
|
"""Returns the user's preferred username.
|
|
|
|
|
|
|
|
Uses stored representative h-card if available, falls back to id.
|
|
|
|
|
2023-10-06 06:32:31 +00:00
|
|
|
Returns:
|
|
|
|
str:
|
2023-06-04 23:10:37 +00:00
|
|
|
"""
|
|
|
|
id = self.key.id()
|
|
|
|
|
2023-06-16 04:22:20 +00:00
|
|
|
if self.obj and self.obj.as1 and self.direct:
|
|
|
|
for url in (util.get_list(self.obj.as1, 'url') +
|
|
|
|
util.get_list(self.obj.as1, 'urls')):
|
|
|
|
url = url.get('value') if isinstance(url, dict) else url
|
2023-06-04 23:10:37 +00:00
|
|
|
if url and url.startswith('acct:'):
|
2023-06-14 20:34:29 +00:00
|
|
|
try:
|
|
|
|
urluser, urldomain = util.parse_acct_uri(url)
|
2023-06-27 03:22:06 +00:00
|
|
|
except ValueError as e:
|
2023-06-14 20:34:29 +00:00
|
|
|
continue
|
2023-06-04 23:10:37 +00:00
|
|
|
if urldomain == id:
|
|
|
|
logger.info(f'Found custom username: {urluser}')
|
|
|
|
return urluser
|
|
|
|
|
|
|
|
logger.info(f'Defaulting username to key id {id}')
|
|
|
|
return id
|
|
|
|
|
2023-05-26 23:07:36 +00:00
|
|
|
def verify(self):
|
|
|
|
"""Fetches site a couple ways to check for redirects and h-card.
|
|
|
|
|
|
|
|
|
2023-10-06 06:32:31 +00:00
|
|
|
Returns:
|
|
|
|
web.Web: user that was verified. May be different than self! eg if
|
|
|
|
self 's domain started with www and we switch to the root domain.
|
2023-05-26 23:07:36 +00:00
|
|
|
"""
|
|
|
|
domain = self.key.id()
|
|
|
|
logger.info(f'Verifying {domain}')
|
|
|
|
|
|
|
|
if domain.startswith('www.') and domain not in WWW_DOMAINS:
|
2023-10-19 22:01:19 +00:00
|
|
|
# if root domain serves ok, use it instead
|
2023-05-26 23:07:36 +00:00
|
|
|
# https://github.com/snarfed/bridgy-fed/issues/314
|
|
|
|
root = domain.removeprefix("www.")
|
|
|
|
root_site = f'https://{root}/'
|
|
|
|
try:
|
|
|
|
resp = util.requests_get(root_site, gateway=False)
|
2023-06-01 01:34:33 +00:00
|
|
|
if resp.ok and self.is_web_url(resp.url):
|
2023-10-19 22:01:19 +00:00
|
|
|
logger.info(f'{root_site} serves ok ; using {root} instead')
|
2023-06-07 21:24:00 +00:00
|
|
|
root_user = Web.get_or_create(root)
|
2023-05-26 23:07:36 +00:00
|
|
|
self.use_instead = root_user.key
|
|
|
|
self.put()
|
|
|
|
return root_user.verify()
|
|
|
|
except RequestException:
|
|
|
|
pass
|
|
|
|
|
|
|
|
# check webfinger redirect
|
|
|
|
path = f'/.well-known/webfinger?resource=acct:{domain}@{domain}'
|
|
|
|
self.has_redirects = False
|
|
|
|
self.redirects_error = None
|
|
|
|
try:
|
2023-06-01 01:34:33 +00:00
|
|
|
url = urljoin(self.web_url(), path)
|
2023-05-26 23:07:36 +00:00
|
|
|
resp = util.requests_get(url, gateway=False)
|
|
|
|
domain_urls = ([f'https://{domain}/' for domain in common.DOMAINS] +
|
|
|
|
[common.host_url()])
|
|
|
|
expected = [urljoin(url, path) for url in domain_urls]
|
2023-06-08 18:04:11 +00:00
|
|
|
if resp.ok and resp.url:
|
|
|
|
got = urllib.parse.unquote(resp.url)
|
|
|
|
if got in expected:
|
2023-05-26 23:07:36 +00:00
|
|
|
self.has_redirects = True
|
2023-06-08 18:04:11 +00:00
|
|
|
elif got:
|
|
|
|
diff = '\n'.join(difflib.Differ().compare([got], [expected[0]]))
|
2023-05-26 23:07:36 +00:00
|
|
|
self.redirects_error = f'Current vs expected:<pre>{diff}</pre>'
|
|
|
|
else:
|
|
|
|
lines = [url, f' returned HTTP {resp.status_code}']
|
2023-06-08 18:04:11 +00:00
|
|
|
if resp.url and resp.url != url:
|
2023-05-26 23:07:36 +00:00
|
|
|
lines[1:1] = [' redirected to:', resp.url]
|
|
|
|
self.redirects_error = '<pre>' + '\n'.join(lines) + '</pre>'
|
|
|
|
except RequestException:
|
|
|
|
pass
|
|
|
|
|
|
|
|
# check home page
|
2023-07-14 19:45:47 +00:00
|
|
|
self.obj = None
|
|
|
|
self.has_hcard = False
|
2023-06-03 14:28:01 +00:00
|
|
|
try:
|
2023-06-21 03:59:32 +00:00
|
|
|
self.obj = Web.load(self.web_url(), remote=True, gateway=True)
|
2023-07-14 19:45:47 +00:00
|
|
|
if self.obj:
|
|
|
|
self.has_hcard = True
|
|
|
|
except (BadRequest, NotFound):
|
|
|
|
pass
|
2023-05-26 23:07:36 +00:00
|
|
|
|
2023-06-16 04:22:20 +00:00
|
|
|
self.put()
|
2023-05-26 23:07:36 +00:00
|
|
|
return self
|
|
|
|
|
2023-06-13 21:30:00 +00:00
|
|
|
@classmethod
|
|
|
|
def key_for(cls, id):
|
|
|
|
"""Returns the :class:`ndb.Key` for a given id.
|
|
|
|
|
|
|
|
If id is a domain, uses it as is. If it's a home page URL or fed.brid.gy
|
|
|
|
or web.brid.gy AP actor URL, extracts the domain and uses that.
|
2023-11-07 22:34:54 +00:00
|
|
|
Otherwise, returns None.
|
2023-06-13 21:30:00 +00:00
|
|
|
|
|
|
|
Args:
|
2023-11-07 22:34:54 +00:00
|
|
|
id (str)
|
2023-06-15 17:52:11 +00:00
|
|
|
|
2023-11-07 22:34:54 +00:00
|
|
|
Returns:
|
|
|
|
ndb.Key or None:
|
2023-06-13 21:30:00 +00:00
|
|
|
"""
|
2023-06-15 17:52:11 +00:00
|
|
|
if not id:
|
2023-09-27 16:42:40 +00:00
|
|
|
return None
|
2023-06-13 21:30:00 +00:00
|
|
|
|
2023-06-14 20:46:13 +00:00
|
|
|
if util.is_web(id):
|
2023-06-13 21:30:00 +00:00
|
|
|
parsed = urlparse(id)
|
|
|
|
if parsed.path in ('', '/'):
|
2023-06-14 20:46:13 +00:00
|
|
|
id = parsed.netloc
|
|
|
|
|
2023-09-22 22:14:15 +00:00
|
|
|
if is_valid_domain(id):
|
2023-10-10 16:57:10 +00:00
|
|
|
return super().key_for(id)
|
2023-06-13 21:30:00 +00:00
|
|
|
|
2023-07-23 06:32:55 +00:00
|
|
|
logger.info(f'{id} is not a domain or usable home page URL')
|
|
|
|
return None
|
2023-06-13 21:30:00 +00:00
|
|
|
|
2023-06-13 20:17:11 +00:00
|
|
|
@classmethod
|
|
|
|
def owns_id(cls, id):
|
2023-06-15 17:52:11 +00:00
|
|
|
"""Returns None if id is a domain or http(s) URL, False otherwise.
|
2023-06-13 20:17:11 +00:00
|
|
|
|
|
|
|
All web pages are http(s) URLs, but not all http(s) URLs are web pages.
|
|
|
|
"""
|
2023-06-14 20:46:13 +00:00
|
|
|
if not id:
|
|
|
|
return False
|
|
|
|
|
2023-07-23 06:32:55 +00:00
|
|
|
key = cls.key_for(id)
|
|
|
|
if key:
|
|
|
|
user = key.get()
|
|
|
|
return True if user and user.has_redirects else None
|
2023-06-14 20:46:13 +00:00
|
|
|
|
2023-06-13 20:17:11 +00:00
|
|
|
return None if util.is_web(id) else False
|
|
|
|
|
2023-09-22 19:14:50 +00:00
|
|
|
@classmethod
|
|
|
|
def owns_handle(cls, handle):
|
2023-09-22 22:14:15 +00:00
|
|
|
if not is_valid_domain(handle):
|
2023-09-22 19:14:50 +00:00
|
|
|
return False
|
|
|
|
|
2023-09-22 20:11:15 +00:00
|
|
|
@classmethod
|
|
|
|
def handle_to_id(cls, handle):
|
|
|
|
assert cls.owns_handle(handle) is not False
|
|
|
|
return handle
|
|
|
|
|
2023-06-16 20:16:17 +00:00
|
|
|
@classmethod
|
|
|
|
def target_for(cls, obj, shared=False):
|
|
|
|
"""Returns `obj`'s id, as a URL webmention target."""
|
2023-06-21 14:22:03 +00:00
|
|
|
# TODO: we have entities in prod that fail this, eg
|
|
|
|
# https://indieweb.social/users/bismark has source_protocol webmention
|
|
|
|
# assert obj.source_protocol in (cls.LABEL, cls.ABBREV, 'ui', None), str(obj)
|
2023-06-16 20:16:17 +00:00
|
|
|
|
|
|
|
if not util.is_web(obj.key.id()):
|
|
|
|
logger.warning(f"{obj.key} is source_protocol web but id isn't a URL!")
|
|
|
|
return None
|
|
|
|
|
|
|
|
return obj.key.id()
|
|
|
|
|
2023-03-20 04:08:24 +00:00
|
|
|
@classmethod
|
2023-10-07 19:48:20 +00:00
|
|
|
def send(to_cls, obj, url, orig_obj=None, **kwargs):
|
2023-03-23 03:49:28 +00:00
|
|
|
"""Sends a webmention to a given target URL.
|
|
|
|
|
|
|
|
See :meth:`Protocol.send` for details.
|
2023-06-04 22:11:52 +00:00
|
|
|
|
2023-07-10 17:14:12 +00:00
|
|
|
Returns False if the target URL doesn't advertise a webmention endpoint,
|
|
|
|
or if webmention/microformats2 don't support the activity type.
|
2023-06-04 22:11:52 +00:00
|
|
|
https://fed.brid.gy/docs#error-handling
|
2023-03-23 03:49:28 +00:00
|
|
|
"""
|
2023-07-15 21:33:54 +00:00
|
|
|
# we only send webmentions for responses. for sending normal posts etc
|
|
|
|
# to followers, we just update our stored objects (elsewhere) and web
|
|
|
|
# users consume them via feeds.
|
2023-07-15 00:22:59 +00:00
|
|
|
verb = obj.as1.get('verb')
|
2023-10-18 19:14:18 +00:00
|
|
|
|
|
|
|
if verb in ('accept', 'undo'):
|
|
|
|
logger.info(f'Skipping sending {verb} (not supported in webmention/mf2) to {url}')
|
|
|
|
return False
|
|
|
|
elif url not in as1.targets(obj.as1):
|
2023-10-19 22:01:19 +00:00
|
|
|
# logger.info(f'Skipping sending to {url} , not a target in the object')
|
2023-10-18 19:14:18 +00:00
|
|
|
return False
|
|
|
|
elif to_cls.is_blocklisted(url):
|
|
|
|
logger.info(f'Skipping sending to blocklisted {url}')
|
2023-07-10 17:14:12 +00:00
|
|
|
return False
|
|
|
|
|
2023-11-06 20:18:11 +00:00
|
|
|
source_id = translate_object_id(
|
|
|
|
id=obj.key.id(), from_proto=PROTOCOLS[obj.source_protocol], to_proto=Web)
|
|
|
|
source_url = quote(source_id, safe=':/%+')
|
2023-03-20 18:23:49 +00:00
|
|
|
logger.info(f'Sending webmention from {source_url} to {url}')
|
|
|
|
|
|
|
|
endpoint = common.webmention_discover(url).endpoint
|
2023-07-07 04:16:04 +00:00
|
|
|
if not endpoint:
|
2023-06-04 22:11:52 +00:00
|
|
|
return False
|
2023-03-20 04:08:24 +00:00
|
|
|
|
2023-07-07 04:16:04 +00:00
|
|
|
webmention.send(endpoint, source_url, url)
|
|
|
|
return True
|
|
|
|
|
2023-03-20 04:08:24 +00:00
|
|
|
@classmethod
|
2023-06-14 03:24:09 +00:00
|
|
|
def fetch(cls, obj, gateway=False, check_backlink=False, **kwargs):
|
2023-03-23 03:49:28 +00:00
|
|
|
"""Fetches a URL over HTTP and extracts its microformats2.
|
|
|
|
|
2023-10-06 06:32:31 +00:00
|
|
|
Follows redirects, but doesn't change the original URL in ``obj``'s id!
|
2023-10-06 15:22:50 +00:00
|
|
|
:class:`google.cloud.ndb.model.Model` doesn't allow that anyway, but more
|
2023-10-06 06:32:31 +00:00
|
|
|
importantly, we want to preserve that original URL becase other objects
|
|
|
|
may refer to it instead of the final redirect destination URL.
|
2023-04-03 14:53:15 +00:00
|
|
|
|
|
|
|
See :meth:`Protocol.fetch` for other background.
|
2023-04-17 22:36:29 +00:00
|
|
|
|
|
|
|
Args:
|
2023-10-06 06:32:31 +00:00
|
|
|
gateway (bool): passed through to
|
|
|
|
:func:`oauth_dropins.webutil.util.fetch_mf2`
|
|
|
|
check_backlink (bool): optional, whether to require a link to Bridgy
|
2023-06-14 03:24:09 +00:00
|
|
|
Fed. Ignored if the URL is a homepage, ie has no path.
|
|
|
|
kwargs: ignored
|
2023-03-23 03:49:28 +00:00
|
|
|
"""
|
2023-04-03 14:53:15 +00:00
|
|
|
url = obj.key.id()
|
2023-07-14 19:45:47 +00:00
|
|
|
if not util.is_web(url):
|
|
|
|
logger.info(f'{url} is not a URL')
|
|
|
|
return False
|
|
|
|
|
2023-06-02 19:55:07 +00:00
|
|
|
is_homepage = urlparse(url).path.strip('/') == ''
|
2023-05-23 16:31:09 +00:00
|
|
|
|
2023-06-14 03:24:09 +00:00
|
|
|
require_backlink = (common.host_url().rstrip('/')
|
|
|
|
if check_backlink and not is_homepage
|
|
|
|
else None)
|
2023-04-05 01:02:41 +00:00
|
|
|
|
2023-04-02 02:13:51 +00:00
|
|
|
try:
|
2023-04-17 22:36:29 +00:00
|
|
|
parsed = util.fetch_mf2(url, gateway=gateway,
|
|
|
|
require_backlink=require_backlink)
|
2023-07-14 19:45:47 +00:00
|
|
|
except ValueError as e:
|
2023-04-02 02:13:51 +00:00
|
|
|
error(str(e))
|
|
|
|
|
|
|
|
if parsed is None:
|
|
|
|
error(f'id {urlparse(url).fragment} not found in {url}')
|
2023-07-14 19:45:47 +00:00
|
|
|
elif not parsed.get('items'):
|
|
|
|
logger.info(f'No microformats2 found in {url}')
|
|
|
|
return False
|
2023-04-02 02:13:51 +00:00
|
|
|
|
|
|
|
# find mf2 item
|
2023-06-02 19:55:07 +00:00
|
|
|
if is_homepage:
|
2023-06-01 01:34:33 +00:00
|
|
|
logger.info(f"{url} is user's web url")
|
2023-04-05 01:02:41 +00:00
|
|
|
entry = mf2util.representative_hcard(parsed, parsed['url'])
|
|
|
|
if not entry:
|
2023-07-14 19:45:47 +00:00
|
|
|
error(f"Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {parsed['url']}")
|
|
|
|
logger.info(f'Representative h-card: {json_dumps(entry, indent=2)}')
|
2023-04-05 01:02:41 +00:00
|
|
|
else:
|
|
|
|
entry = mf2util.find_first_entry(parsed, ['h-entry'])
|
|
|
|
if not entry:
|
2023-07-14 19:45:47 +00:00
|
|
|
error(f'No microformats2 h-entry found in {url}')
|
2023-03-23 03:49:28 +00:00
|
|
|
|
2023-04-02 02:13:51 +00:00
|
|
|
# store final URL in mf2 object, and also default url property to it,
|
|
|
|
# since that's the fallback for AS1/AS2 id
|
2023-06-02 19:55:07 +00:00
|
|
|
if is_homepage:
|
2023-04-05 01:02:41 +00:00
|
|
|
entry.setdefault('rel-urls', {}).update(parsed.get('rel-urls', {}))
|
2023-06-02 19:55:07 +00:00
|
|
|
entry.setdefault('type', ['h-card'])
|
2023-04-02 02:13:51 +00:00
|
|
|
props = entry.setdefault('properties', {})
|
2023-06-02 19:55:07 +00:00
|
|
|
if parsed['url']:
|
|
|
|
entry['url'] = parsed['url']
|
|
|
|
props.setdefault('url', [parsed['url']])
|
2023-04-02 02:13:51 +00:00
|
|
|
logger.info(f'Extracted microformats2 entry: {json_dumps(entry, indent=2)}')
|
2023-03-26 23:47:46 +00:00
|
|
|
|
2023-07-10 18:37:40 +00:00
|
|
|
if not is_homepage:
|
|
|
|
# default actor/author to home page URL
|
2023-07-29 01:58:48 +00:00
|
|
|
authors = props.setdefault('author', [])
|
|
|
|
if not microformats2.get_string_urls(authors):
|
|
|
|
homepage = urljoin(parsed.get('url') or url, '/')
|
|
|
|
logger.info(f'Defaulting author URL to {homepage}')
|
|
|
|
if authors and isinstance(authors[0], dict):
|
|
|
|
authors[0]['properties']['url'] = [homepage]
|
|
|
|
else:
|
|
|
|
authors.insert(0, homepage)
|
2023-07-10 18:37:40 +00:00
|
|
|
|
|
|
|
# run full authorship algorithm if necessary:
|
|
|
|
# https://indieweb.org/authorship
|
|
|
|
# duplicated in microformats2.json_to_object
|
|
|
|
author = util.get_first(props, 'author')
|
2023-07-29 01:58:48 +00:00
|
|
|
if not isinstance(author, dict):
|
2023-07-10 18:37:40 +00:00
|
|
|
logger.info(f'Fetching full authorship for author {author}')
|
|
|
|
author = mf2util.find_author({'items': [entry]}, hentry=entry,
|
|
|
|
fetch_mf2_func=util.fetch_mf2)
|
|
|
|
logger.info(f'Got: {author}')
|
|
|
|
if author:
|
|
|
|
props['author'] = util.trim_nulls([{
|
|
|
|
"type": ["h-card"],
|
|
|
|
'properties': {
|
|
|
|
field: [author[field]] if author.get(field) else []
|
|
|
|
for field in ('name', 'photo', 'url')
|
|
|
|
},
|
|
|
|
}])
|
2023-04-02 02:13:51 +00:00
|
|
|
|
2023-04-03 14:53:15 +00:00
|
|
|
obj.mf2 = entry
|
2023-07-14 19:45:47 +00:00
|
|
|
return True
|
2023-03-20 04:08:24 +00:00
|
|
|
|
2023-05-24 04:30:57 +00:00
|
|
|
@classmethod
|
2023-10-24 23:09:28 +00:00
|
|
|
def convert(cls, obj):
|
|
|
|
"""Converts a :class:`Object` to HTML.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
obj (models.Object)
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
str:
|
|
|
|
"""
|
2023-11-03 22:52:37 +00:00
|
|
|
if not obj or not obj.as1:
|
|
|
|
return ''
|
2023-05-24 04:30:57 +00:00
|
|
|
|
2023-11-03 22:52:37 +00:00
|
|
|
obj_as1 = obj.as1
|
2023-05-26 23:07:36 +00:00
|
|
|
from_proto = PROTOCOLS.get(obj.source_protocol)
|
2023-05-24 04:30:57 +00:00
|
|
|
if from_proto:
|
|
|
|
# fill in author/actor if available
|
|
|
|
for field in 'author', 'actor':
|
|
|
|
val = as1.get_object(obj.as1, field)
|
|
|
|
if val.keys() == set(['id']) and val['id']:
|
|
|
|
loaded = from_proto.load(val['id'])
|
|
|
|
if loaded and loaded.as1:
|
|
|
|
obj_as1 = {**obj_as1, field: loaded.as1}
|
|
|
|
else:
|
|
|
|
logger.debug(f'Not hydrating actor or author due to source_protocol {obj.source_protocol}')
|
|
|
|
|
2023-11-03 22:52:37 +00:00
|
|
|
html = microformats2.activities_to_html([cls.translate_ids(obj_as1)])
|
2023-05-24 04:30:57 +00:00
|
|
|
|
|
|
|
# add HTML meta redirect to source page. should trigger for end users in
|
|
|
|
# browsers but not for webmention receivers (hopefully).
|
|
|
|
url = util.get_url(obj_as1)
|
|
|
|
if url:
|
|
|
|
utf8 = '<meta charset="utf-8">'
|
|
|
|
refresh = f'<meta http-equiv="refresh" content="0;url={url}">'
|
|
|
|
html = html.replace(utf8, utf8 + '\n' + refresh)
|
|
|
|
|
2023-10-24 23:09:28 +00:00
|
|
|
return html
|
2023-05-24 04:30:57 +00:00
|
|
|
|
2023-03-20 04:08:24 +00:00
|
|
|
|
2023-05-31 00:24:49 +00:00
|
|
|
@app.get('/web-site')
|
|
|
|
@flask_util.cached(cache, datetime.timedelta(days=1))
|
|
|
|
def enter_web_site():
|
|
|
|
return render_template('enter_web_site.html')
|
|
|
|
|
|
|
|
|
|
|
|
@app.post('/web-site')
|
|
|
|
def check_web_site():
|
|
|
|
url = request.values['url']
|
2023-06-09 17:58:28 +00:00
|
|
|
# this normalizes and lower cases domain
|
2023-05-31 00:24:49 +00:00
|
|
|
domain = util.domain_from_link(url, minimize=False)
|
2023-09-25 21:45:04 +00:00
|
|
|
if not domain or not is_valid_domain(domain):
|
|
|
|
flash(f'{url} is not a valid or supported web site')
|
|
|
|
return render_template('enter_web_site.html'), 400
|
2023-05-31 00:24:49 +00:00
|
|
|
|
|
|
|
try:
|
2023-11-20 05:07:56 +00:00
|
|
|
user = Web.get_or_create(domain, direct=True)
|
|
|
|
user = user.verify()
|
2023-05-31 00:24:49 +00:00
|
|
|
except BaseException as e:
|
|
|
|
code, body = util.interpret_http_exception(e)
|
|
|
|
if code:
|
|
|
|
flash(f"Couldn't connect to {url}: {e}")
|
|
|
|
return render_template('enter_web_site.html')
|
|
|
|
raise
|
|
|
|
|
2023-11-20 05:07:56 +00:00
|
|
|
user.put()
|
|
|
|
return redirect(user.user_page_path())
|
2023-05-31 00:24:49 +00:00
|
|
|
|
|
|
|
|
2023-04-06 16:16:25 +00:00
|
|
|
@app.post('/webmention')
|
|
|
|
def webmention_external():
|
|
|
|
"""Handles inbound webmention, enqueue task to process.
|
2023-01-05 04:48:39 +00:00
|
|
|
|
2023-04-06 16:16:25 +00:00
|
|
|
Use a task queue to deliver to followers because we send to each inbox in
|
|
|
|
serial, which can take a long time with many followers/instances.
|
|
|
|
"""
|
2023-06-02 18:10:04 +00:00
|
|
|
logger.info(f'Params: {list(request.form.items())}')
|
|
|
|
|
2023-04-06 16:16:25 +00:00
|
|
|
source = flask_util.get_required_param('source').strip()
|
|
|
|
if not util.is_web(source):
|
|
|
|
error(f'Bad URL {source}')
|
|
|
|
|
|
|
|
domain = util.domain_from_link(source, minimize=False)
|
2023-06-02 18:10:04 +00:00
|
|
|
if not domain:
|
|
|
|
error(f'Bad source URL {source}')
|
|
|
|
|
2023-11-20 05:07:56 +00:00
|
|
|
user = Web.get_by_id(domain)
|
|
|
|
if not user:
|
2023-04-06 16:16:25 +00:00
|
|
|
error(f'No user found for domain {domain}')
|
|
|
|
|
2023-09-19 18:15:49 +00:00
|
|
|
return common.create_task('webmention', **request.form)
|
2023-04-06 16:16:25 +00:00
|
|
|
|
|
|
|
|
2023-04-17 00:37:02 +00:00
|
|
|
@app.post('/webmention-interactive')
|
|
|
|
def webmention_interactive():
|
|
|
|
"""Handler that runs interactive webmention-based requests from the web UI.
|
|
|
|
|
|
|
|
...eg the update profile button on user pages.
|
|
|
|
"""
|
2023-11-20 05:07:56 +00:00
|
|
|
source = flask_util.get_required_param('source').strip()
|
|
|
|
|
2023-04-17 00:37:02 +00:00
|
|
|
try:
|
|
|
|
webmention_external()
|
2023-11-20 05:07:56 +00:00
|
|
|
user = Web(id=util.domain_from_link(source, minimize=False))
|
|
|
|
flash(f'Updating fediverse profile from <a href="{user.web_url()}">{user.key.id()}</a>...')
|
|
|
|
return redirect(user.user_page_path(), code=302)
|
|
|
|
|
2023-04-17 00:37:02 +00:00
|
|
|
except HTTPException as e:
|
|
|
|
flash(util.linkify(str(e.description), pretty=True))
|
2023-11-20 05:07:56 +00:00
|
|
|
return redirect('/', code=302)
|
2023-04-17 00:37:02 +00:00
|
|
|
|
|
|
|
|
2023-09-29 20:49:17 +00:00
|
|
|
@app.post('/queue/webmention')
|
2023-10-17 18:33:56 +00:00
|
|
|
@cloud_tasks_only
|
2023-04-06 16:16:25 +00:00
|
|
|
def webmention_task():
|
2023-06-13 04:43:08 +00:00
|
|
|
"""Handles inbound webmention task."""
|
2023-04-06 16:16:25 +00:00
|
|
|
logger.info(f'Params: {list(request.form.items())}')
|
|
|
|
|
|
|
|
# load user
|
|
|
|
source = flask_util.get_required_param('source').strip()
|
|
|
|
domain = util.domain_from_link(source, minimize=False)
|
2023-04-09 21:34:22 +00:00
|
|
|
logger.info(f'webmention from {domain}')
|
|
|
|
|
2023-11-20 05:07:56 +00:00
|
|
|
user = Web.get_by_id(domain)
|
|
|
|
logger.info(f'User: {user.key}')
|
|
|
|
if not user:
|
2023-04-06 16:16:25 +00:00
|
|
|
error(f'No user found for domain {domain}', status=304)
|
|
|
|
|
|
|
|
# fetch source page
|
|
|
|
try:
|
2023-07-09 02:19:57 +00:00
|
|
|
# remote=True to force fetch, local=True to populate new/changed attrs
|
2023-09-09 22:47:38 +00:00
|
|
|
obj = Web.load(source, local=True, remote=True,
|
|
|
|
check_backlink=not appengine_info.LOCAL_SERVER)
|
2023-04-06 16:16:25 +00:00
|
|
|
except BadRequest as e:
|
|
|
|
error(str(e.description), status=304)
|
2023-04-17 22:36:29 +00:00
|
|
|
except HTTPError as e:
|
|
|
|
if e.response.status_code not in (410, 404):
|
|
|
|
error(f'{e} ; {e.response.text if e.response else ""}', status=502)
|
|
|
|
|
|
|
|
create_id = f'{source}#bridgy-fed-create'
|
|
|
|
logger.info(f'Interpreting as Delete. Looking for {create_id}')
|
2023-04-19 00:17:48 +00:00
|
|
|
create = Object.get_by_id(create_id)
|
2023-04-17 22:36:29 +00:00
|
|
|
if not create or create.status != 'complete':
|
|
|
|
error(f"Bridgy Fed hasn't successfully published {source}", status=304)
|
|
|
|
|
|
|
|
id = f'{source}#bridgy-fed-delete'
|
2023-09-19 18:15:49 +00:00
|
|
|
obj = Object(id=id, status='new', our_as1={
|
2023-04-17 22:36:29 +00:00
|
|
|
'id': id,
|
|
|
|
'objectType': 'activity',
|
|
|
|
'verb': 'delete',
|
2023-11-20 05:07:56 +00:00
|
|
|
'actor': user.web_url(),
|
2023-04-17 22:36:29 +00:00
|
|
|
'object': source,
|
|
|
|
})
|
2023-04-06 16:16:25 +00:00
|
|
|
|
2023-07-14 19:45:47 +00:00
|
|
|
if not obj or (not obj.mf2 and obj.type != 'delete'):
|
|
|
|
error(f"Couldn't load {source} as microformats2 HTML", status=304)
|
2023-07-24 18:32:07 +00:00
|
|
|
elif obj.mf2 and 'h-entry' in obj.mf2.get('type', []):
|
2023-07-25 04:28:37 +00:00
|
|
|
authors = obj.mf2['properties'].setdefault('author', [])
|
|
|
|
author_urls = microformats2.get_string_urls(authors)
|
|
|
|
if not author_urls:
|
2023-11-20 05:07:56 +00:00
|
|
|
authors.append(user.web_url())
|
|
|
|
elif not user.is_web_url(author_urls[0]):
|
|
|
|
logger.info(f'Overriding author {author_urls[0]} with {user.web_url()}')
|
2023-07-25 04:28:37 +00:00
|
|
|
if isinstance(authors[0], dict):
|
2023-11-20 05:07:56 +00:00
|
|
|
authors[0]['properties']['url'] = [user.web_url()]
|
2023-07-25 04:28:37 +00:00
|
|
|
else:
|
2023-11-20 05:07:56 +00:00
|
|
|
authors[0] = user.web_url()
|
2023-04-06 16:16:25 +00:00
|
|
|
|
2023-06-10 14:16:54 +00:00
|
|
|
# if source is home page, update Web user and send an actor Update to
|
|
|
|
# followers' instances
|
2023-11-20 05:07:56 +00:00
|
|
|
if user and (user.key.id() == obj.key.id()
|
|
|
|
or user.is_web_url(obj.key.id())):
|
2023-07-09 02:19:57 +00:00
|
|
|
logger.info(f'Converted to AS1: {obj.type}: {json_dumps(obj.as1, indent=2)}')
|
2023-04-06 16:16:25 +00:00
|
|
|
obj.put()
|
2023-11-20 05:07:56 +00:00
|
|
|
user.obj = obj
|
|
|
|
user.put()
|
2023-06-10 14:16:54 +00:00
|
|
|
|
2023-07-09 02:19:57 +00:00
|
|
|
logger.info('Wrapping in Update for home page user profile')
|
2023-04-06 16:16:25 +00:00
|
|
|
actor_as1 = {
|
|
|
|
**obj.as1,
|
2023-11-20 05:07:56 +00:00
|
|
|
'id': user.web_url(),
|
2023-04-06 16:16:25 +00:00
|
|
|
'updated': util.now().isoformat(),
|
|
|
|
}
|
|
|
|
id = common.host_url(f'{obj.key.id()}#update-{util.now().isoformat()}')
|
2023-09-19 18:15:49 +00:00
|
|
|
obj = Object(id=id, status='new', our_as1={
|
2023-04-06 16:16:25 +00:00
|
|
|
'objectType': 'activity',
|
|
|
|
'verb': 'update',
|
|
|
|
'id': id,
|
2023-11-20 05:07:56 +00:00
|
|
|
'actor': user.web_url(),
|
2023-04-06 16:16:25 +00:00
|
|
|
'object': actor_as1,
|
|
|
|
})
|
|
|
|
|
2023-07-10 19:11:29 +00:00
|
|
|
try:
|
|
|
|
return Web.receive(obj)
|
|
|
|
except ValueError as e:
|
|
|
|
logger.warning(e, exc_info=True)
|
|
|
|
error(e, status=304)
|