2023-10-06 06:32:31 +00:00
|
|
|
"""Misc common utilities."""
|
2023-05-27 21:57:29 +00:00
|
|
|
import base64
|
2023-03-08 21:10:41 +00:00
|
|
|
from datetime import timedelta
|
2024-07-30 21:50:33 +00:00
|
|
|
import functools
|
2017-08-15 06:07:24 +00:00
|
|
|
import logging
|
2023-11-29 18:39:13 +00:00
|
|
|
from pathlib import Path
|
2017-09-13 14:48:32 +00:00
|
|
|
import re
|
2023-03-11 06:24:58 +00:00
|
|
|
import threading
|
2019-12-26 06:20:57 +00:00
|
|
|
import urllib.parse
|
2024-04-28 15:14:55 +00:00
|
|
|
from urllib.parse import urljoin, urlparse
|
2017-08-15 06:07:24 +00:00
|
|
|
|
2023-03-11 06:24:58 +00:00
|
|
|
import cachetools
|
2023-05-27 21:57:29 +00:00
|
|
|
from Crypto.Util import number
|
2024-07-05 19:05:08 +00:00
|
|
|
from flask import abort, g, has_request_context, make_response, request
|
2024-06-04 18:27:08 +00:00
|
|
|
from google.cloud.error_reporting.util import build_flask_context
|
2024-06-05 02:10:01 +00:00
|
|
|
from google.cloud.ndb.global_cache import _InProcessGlobalCache, MemcacheCache
|
2024-01-01 22:47:03 +00:00
|
|
|
from google.protobuf.timestamp_pb2 import Timestamp
|
2023-03-11 06:24:58 +00:00
|
|
|
from oauth_dropins.webutil import util, webmention
|
2024-05-09 20:26:24 +00:00
|
|
|
from oauth_dropins.webutil.appengine_config import error_reporting_client, tasks_client
|
2023-09-09 14:51:54 +00:00
|
|
|
from oauth_dropins.webutil import appengine_info
|
2023-09-13 21:36:24 +00:00
|
|
|
from oauth_dropins.webutil.appengine_info import DEBUG
|
2023-10-31 19:49:15 +00:00
|
|
|
from oauth_dropins.webutil import flask_util
|
2024-06-05 02:10:01 +00:00
|
|
|
import pymemcache.client.base
|
2024-06-13 20:54:37 +00:00
|
|
|
from pymemcache.test.utils import MockMemcacheClient
|
2017-10-16 14:13:43 +00:00
|
|
|
|
2022-02-12 06:38:56 +00:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2023-06-09 17:58:28 +00:00
|
|
|
# allow hostname chars (a-z, 0-9, -), allow arbitrary unicode (eg ☃.net), don't
|
|
|
|
# allow specific chars that we'll often see in webfinger, AP handles, etc. (@, :)
|
|
|
|
# https://stackoverflow.com/questions/10306690/what-is-a-regular-expression-which-will-match-a-valid-domain-name-without-a-subd
|
|
|
|
#
|
|
|
|
# TODO: preprocess with domain2idna, then narrow this to just [a-z0-9-]
|
2023-09-22 00:11:06 +00:00
|
|
|
DOMAIN_RE = r'^([^/:;@?!\']+\.)+[^/:@_?!\']+$'
|
2017-09-12 14:31:50 +00:00
|
|
|
|
2019-12-26 06:20:57 +00:00
|
|
|
CONTENT_TYPE_HTML = 'text/html; charset=utf-8'
|
2017-10-20 14:49:25 +00:00
|
|
|
|
2020-12-30 18:26:48 +00:00
|
|
|
PRIMARY_DOMAIN = 'fed.brid.gy'
|
2023-06-11 02:50:31 +00:00
|
|
|
# protocol-specific subdomains are under this "super"domain
|
|
|
|
SUPERDOMAIN = '.brid.gy'
|
2023-06-10 23:29:58 +00:00
|
|
|
# TODO: add a Flask route decorator version of util.canonicalize_domain, then
|
|
|
|
# use it to canonicalize most UI routes from these to fed.brid.gy.
|
2024-05-07 23:58:52 +00:00
|
|
|
# TODO: unify with models.PROTOCOLS
|
2023-09-27 20:55:16 +00:00
|
|
|
PROTOCOL_DOMAINS = (
|
2023-06-10 23:29:58 +00:00
|
|
|
'ap.brid.gy',
|
2023-09-27 04:30:08 +00:00
|
|
|
'atproto.brid.gy',
|
2023-06-10 23:29:58 +00:00
|
|
|
'bsky.brid.gy',
|
2024-04-22 18:12:03 +00:00
|
|
|
'web.brid.gy',
|
2024-04-21 15:36:03 +00:00
|
|
|
'eefake.brid.gy',
|
2023-09-27 04:30:08 +00:00
|
|
|
'fa.brid.gy',
|
2024-04-21 15:36:03 +00:00
|
|
|
'other.brid.gy',
|
2020-12-30 18:26:48 +00:00
|
|
|
)
|
2023-09-27 20:55:16 +00:00
|
|
|
OTHER_DOMAINS = (
|
|
|
|
'bridgy-federated.appspot.com',
|
|
|
|
'bridgy-federated.uc.r.appspot.com',
|
|
|
|
)
|
2023-01-05 23:03:21 +00:00
|
|
|
LOCAL_DOMAINS = (
|
|
|
|
'localhost',
|
|
|
|
'localhost:8080',
|
|
|
|
'my.dev.com:8080',
|
|
|
|
)
|
2023-09-27 20:55:16 +00:00
|
|
|
DOMAINS = (PRIMARY_DOMAIN,) + PROTOCOL_DOMAINS + OTHER_DOMAINS + LOCAL_DOMAINS
|
2024-06-23 16:20:22 +00:00
|
|
|
# TODO: unify with manual_opt_out
|
2022-11-14 15:07:33 +00:00
|
|
|
# TODO: unify with Bridgy's
|
2023-09-06 23:15:19 +00:00
|
|
|
DOMAIN_BLOCKLIST = (
|
2024-05-09 14:07:31 +00:00
|
|
|
'bsky.social',
|
2022-11-14 15:07:33 +00:00
|
|
|
'facebook.com',
|
|
|
|
'fb.com',
|
2024-06-27 18:21:18 +00:00
|
|
|
'instagram.com',
|
|
|
|
'reddit.com',
|
2022-11-14 15:07:33 +00:00
|
|
|
't.co',
|
2024-06-27 18:21:18 +00:00
|
|
|
'tiktok.com',
|
2022-11-14 15:07:33 +00:00
|
|
|
'twitter.com',
|
2024-05-09 14:07:31 +00:00
|
|
|
'x.com',
|
2023-09-06 23:15:19 +00:00
|
|
|
)
|
2017-10-24 04:49:43 +00:00
|
|
|
|
2024-04-24 21:26:20 +00:00
|
|
|
SMTP_HOST = 'smtp.gmail.com'
|
2024-04-24 18:12:45 +00:00
|
|
|
SMTP_PORT = 587
|
|
|
|
|
2023-10-24 17:46:57 +00:00
|
|
|
# populated in models.reset_protocol_properties
|
|
|
|
SUBDOMAIN_BASE_URL_RE = None
|
|
|
|
ID_FIELDS = ['id', 'object', 'actor', 'author', 'inReplyTo', 'url']
|
|
|
|
|
2024-06-04 21:19:04 +00:00
|
|
|
CACHE_CONTROL = {'Cache-Control': 'public, max-age=3600'} # 1 hour
|
2023-02-15 18:57:11 +00:00
|
|
|
|
2023-08-31 17:48:28 +00:00
|
|
|
USER_AGENT = 'Bridgy Fed (https://fed.brid.gy/)'
|
2023-09-19 18:15:49 +00:00
|
|
|
util.set_user_agent(USER_AGENT)
|
2023-08-31 17:48:28 +00:00
|
|
|
|
2024-05-05 23:35:23 +00:00
|
|
|
# https://cloud.google.com/appengine/docs/locations
|
2023-09-06 03:10:11 +00:00
|
|
|
TASKS_LOCATION = 'us-central1'
|
2023-10-31 19:49:15 +00:00
|
|
|
RUN_TASKS_INLINE = False # overridden by unit tests
|
2023-09-06 03:10:11 +00:00
|
|
|
|
2024-05-12 02:20:33 +00:00
|
|
|
# for Protocol.REQUIRES_OLD_ACCOUNT, how old is old enough
|
|
|
|
OLD_ACCOUNT_AGE = timedelta(days=14)
|
|
|
|
|
2024-07-12 05:08:36 +00:00
|
|
|
# https://github.com/memcached/memcached/wiki/Commands#standard-protocol
|
|
|
|
MEMCACHE_KEY_MAX_LEN = 250
|
|
|
|
|
2024-06-19 18:01:46 +00:00
|
|
|
if appengine_info.DEBUG or appengine_info.LOCAL_SERVER:
|
|
|
|
logger.info('Using in memory mock memcache')
|
2024-06-13 20:54:37 +00:00
|
|
|
memcache = MockMemcacheClient()
|
2024-06-05 02:10:01 +00:00
|
|
|
global_cache = _InProcessGlobalCache()
|
|
|
|
else:
|
2024-06-19 18:01:46 +00:00
|
|
|
logger.info('Using production Memorystore memcache')
|
2024-06-13 20:54:37 +00:00
|
|
|
memcache = pymemcache.client.base.PooledClient(
|
2024-07-12 05:08:36 +00:00
|
|
|
'10.126.144.3', timeout=10, connect_timeout=10, # seconds
|
|
|
|
allow_unicode_keys=True)
|
2024-06-13 20:54:37 +00:00
|
|
|
global_cache = MemcacheCache(memcache)
|
2024-06-05 02:10:01 +00:00
|
|
|
|
2017-08-15 06:07:24 +00:00
|
|
|
|
2023-05-27 21:57:29 +00:00
|
|
|
def base64_to_long(x):
|
2023-10-06 06:32:31 +00:00
|
|
|
"""Converts from URL safe base64 encoding to long integer.
|
2023-05-27 21:57:29 +00:00
|
|
|
|
2023-10-06 06:32:31 +00:00
|
|
|
Originally from ``django_salmon.magicsigs``. Used in :meth:`User.public_pem`
|
2023-05-27 21:57:29 +00:00
|
|
|
and :meth:`User.private_pem`.
|
|
|
|
"""
|
|
|
|
return number.bytes_to_long(base64.urlsafe_b64decode(x))
|
|
|
|
|
|
|
|
|
|
|
|
def long_to_base64(x):
|
2023-10-06 06:32:31 +00:00
|
|
|
"""Converts from long integer to base64 URL safe encoding.
|
2023-05-27 21:57:29 +00:00
|
|
|
|
2023-10-06 06:32:31 +00:00
|
|
|
Originally from ``django_salmon.magicsigs``. Used in :meth:`User.get_or_create`.
|
2023-05-27 21:57:29 +00:00
|
|
|
"""
|
|
|
|
return base64.urlsafe_b64encode(number.long_to_bytes(x))
|
|
|
|
|
|
|
|
|
2023-01-05 23:03:21 +00:00
|
|
|
def host_url(path_query=None):
|
2023-06-16 04:22:20 +00:00
|
|
|
base = request.host_url
|
2023-06-20 18:22:54 +00:00
|
|
|
if (util.domain_or_parent_in(request.host, OTHER_DOMAINS)
|
|
|
|
# when running locally against prod datastore
|
|
|
|
or (not DEBUG and request.host in LOCAL_DOMAINS)):
|
|
|
|
base = f'https://{PRIMARY_DOMAIN}'
|
2023-01-08 20:01:31 +00:00
|
|
|
|
2023-09-27 20:55:16 +00:00
|
|
|
assert base
|
2023-10-23 22:44:32 +00:00
|
|
|
return urljoin(base, path_query)
|
2023-01-05 23:03:21 +00:00
|
|
|
|
|
|
|
|
2024-05-04 20:39:01 +00:00
|
|
|
def error(err, status=400, exc_info=None, **kwargs):
|
2023-10-06 06:32:31 +00:00
|
|
|
"""Like :func:`oauth_dropins.webutil.flask_util.error`, but wraps body in JSON."""
|
2024-05-04 20:39:01 +00:00
|
|
|
msg = str(err)
|
2023-04-04 14:14:31 +00:00
|
|
|
logger.info(f'Returning {status}: {msg}', exc_info=exc_info)
|
|
|
|
abort(status, response=make_response({'error': msg}, status), **kwargs)
|
2017-08-15 06:07:24 +00:00
|
|
|
|
|
|
|
|
2023-11-26 04:38:28 +00:00
|
|
|
def pretty_link(url, text=None, user=None, **kwargs):
|
2023-10-06 06:32:31 +00:00
|
|
|
"""Wrapper around :func:`oauth_dropins.webutil.util.pretty_link` that converts Mastodon user URLs to @-@ handles.
|
2023-02-07 05:08:52 +00:00
|
|
|
|
2023-06-20 18:22:54 +00:00
|
|
|
Eg for URLs like https://mastodon.social/@foo and
|
2023-10-06 15:22:50 +00:00
|
|
|
https://mastodon.social/users/foo, defaults text to ``@foo@mastodon.social``
|
|
|
|
if it's not provided.
|
2023-02-07 05:28:40 +00:00
|
|
|
|
2023-06-20 18:22:54 +00:00
|
|
|
Args:
|
2023-10-06 06:32:31 +00:00
|
|
|
url (str)
|
|
|
|
text (str)
|
2023-11-26 04:38:28 +00:00
|
|
|
user (models.User): current user
|
2023-10-06 06:32:31 +00:00
|
|
|
kwargs: passed through to :func:`oauth_dropins.webutil.util.pretty_link`
|
2023-06-20 18:22:54 +00:00
|
|
|
"""
|
2023-11-26 04:38:28 +00:00
|
|
|
if user and user.is_web_url(url):
|
|
|
|
return user.user_link()
|
2023-02-07 05:28:40 +00:00
|
|
|
|
2023-06-20 18:22:54 +00:00
|
|
|
if text is None:
|
|
|
|
match = re.match(r'https?://([^/]+)/(@|users/)([^/]+)$', url)
|
|
|
|
if match:
|
|
|
|
text = match.expand(r'@\3@\1')
|
2023-02-07 05:08:52 +00:00
|
|
|
|
2023-06-20 18:22:54 +00:00
|
|
|
return util.pretty_link(url, text=text, **kwargs)
|
2023-02-07 05:08:52 +00:00
|
|
|
|
|
|
|
|
2017-10-21 03:35:07 +00:00
|
|
|
def content_type(resp):
|
2021-07-12 20:49:48 +00:00
|
|
|
"""Returns a :class:`requests.Response`'s Content-Type, without charset suffix."""
|
2017-10-21 03:35:07 +00:00
|
|
|
type = resp.headers.get('Content-Type')
|
|
|
|
if type:
|
|
|
|
return type.split(';')[0]
|
|
|
|
|
|
|
|
|
2021-07-08 04:02:13 +00:00
|
|
|
def redirect_wrap(url):
|
|
|
|
"""Returns a URL on our domain that redirects to this URL.
|
2018-10-14 14:58:17 +00:00
|
|
|
|
2021-07-08 04:02:13 +00:00
|
|
|
...to satisfy Mastodon's non-standard domain matching requirement. :(
|
2018-10-14 14:58:17 +00:00
|
|
|
|
2021-07-08 04:02:13 +00:00
|
|
|
Args:
|
2023-10-06 06:32:31 +00:00
|
|
|
url (str)
|
2018-10-17 14:00:31 +00:00
|
|
|
|
2023-09-27 04:30:08 +00:00
|
|
|
* https://github.com/snarfed/bridgy-fed/issues/16#issuecomment-424799599
|
|
|
|
* https://github.com/tootsuite/mastodon/pull/6219#issuecomment-429142747
|
2018-10-17 14:00:31 +00:00
|
|
|
|
2023-10-06 06:32:31 +00:00
|
|
|
Returns:
|
|
|
|
str: redirect url
|
2021-07-08 04:02:13 +00:00
|
|
|
"""
|
2023-09-27 04:30:08 +00:00
|
|
|
if not url or util.domain_from_link(url) in DOMAINS:
|
2021-07-08 04:02:13 +00:00
|
|
|
return url
|
2018-10-17 14:00:31 +00:00
|
|
|
|
2023-03-21 02:17:55 +00:00
|
|
|
return host_url('/r/') + url
|
2020-01-31 15:38:58 +00:00
|
|
|
|
|
|
|
|
2023-10-23 22:44:32 +00:00
|
|
|
def subdomain_wrap(proto, path=None):
|
|
|
|
"""Returns the URL for a given path on this protocol's subdomain.
|
|
|
|
|
|
|
|
Eg for the path ``foo/bar`` on ActivityPub, returns
|
|
|
|
``https://ap.brid.gy/foo/bar``.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
proto (subclass of :class:`protocol.Protocol`)
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
str: URL
|
|
|
|
"""
|
2023-10-26 19:04:04 +00:00
|
|
|
subdomain = proto.ABBREV if proto and proto.ABBREV else 'fed'
|
|
|
|
return urljoin(f'https://{subdomain}{SUPERDOMAIN}/', path)
|
2023-10-23 22:44:32 +00:00
|
|
|
|
|
|
|
|
2023-10-24 17:46:57 +00:00
|
|
|
def unwrap(val, field=None):
|
|
|
|
"""Removes our subdomain/redirect wrapping from a URL, if it's there.
|
2020-01-31 15:38:58 +00:00
|
|
|
|
2023-10-06 06:32:31 +00:00
|
|
|
``val`` may be a string, dict, or list. dicts and lists are unwrapped
|
2021-07-08 04:02:13 +00:00
|
|
|
recursively.
|
|
|
|
|
|
|
|
Strings that aren't wrapped URLs are left unchanged.
|
|
|
|
|
|
|
|
Args:
|
2023-10-06 06:32:31 +00:00
|
|
|
val (str or dict or list)
|
2023-10-24 17:46:57 +00:00
|
|
|
field (str): optional field name for this value
|
2021-07-08 04:02:13 +00:00
|
|
|
|
2023-10-06 06:32:31 +00:00
|
|
|
Returns:
|
|
|
|
str: unwrapped url
|
2021-07-08 04:02:13 +00:00
|
|
|
"""
|
2024-04-28 15:14:55 +00:00
|
|
|
|
2021-07-08 04:02:13 +00:00
|
|
|
if isinstance(val, dict):
|
2024-04-28 15:14:55 +00:00
|
|
|
# TODO: clean up. https://github.com/snarfed/bridgy-fed/issues/967
|
|
|
|
id = val.get('id')
|
|
|
|
if (id and urlparse(id).path.strip('/') in DOMAINS + ('',)
|
|
|
|
and util.domain_from_link(id) in DOMAINS):
|
|
|
|
# protocol bot user, don't touch its URLs
|
|
|
|
return {**val, 'id': unwrap(id)}
|
|
|
|
|
2023-10-24 17:46:57 +00:00
|
|
|
return {f: unwrap(v, field=f) for f, v in val.items()}
|
2018-10-17 14:49:04 +00:00
|
|
|
|
2021-07-08 04:02:13 +00:00
|
|
|
elif isinstance(val, list):
|
2023-10-24 17:46:57 +00:00
|
|
|
return [unwrap(v) for v in val]
|
2019-04-16 14:59:29 +00:00
|
|
|
|
2021-07-08 04:02:13 +00:00
|
|
|
elif isinstance(val, str):
|
2024-04-23 19:00:39 +00:00
|
|
|
if match := SUBDOMAIN_BASE_URL_RE.match(val):
|
|
|
|
unwrapped = match.group('path')
|
|
|
|
if field in ID_FIELDS and re.fullmatch(DOMAIN_RE, unwrapped):
|
|
|
|
return f'https://{unwrapped}/'
|
|
|
|
return unwrapped
|
2019-04-16 14:59:29 +00:00
|
|
|
|
2021-07-08 04:02:13 +00:00
|
|
|
return val
|
2022-11-19 02:49:34 +00:00
|
|
|
|
|
|
|
|
2023-03-11 20:14:48 +00:00
|
|
|
def webmention_endpoint_cache_key(url):
|
2023-06-20 18:22:54 +00:00
|
|
|
"""Returns cache key for a cached webmention endpoint for a given URL.
|
2023-03-11 20:14:48 +00:00
|
|
|
|
2023-10-06 06:32:31 +00:00
|
|
|
Just the domain by default. If the URL is the home page, ie path is ``/``,
|
|
|
|
the key includes a ``/`` at the end, so that we cache webmention endpoints
|
|
|
|
for home pages separate from other pages.
|
|
|
|
https://github.com/snarfed/bridgy/issues/701
|
2023-03-11 20:14:48 +00:00
|
|
|
|
2023-10-06 06:32:31 +00:00
|
|
|
Example: ``snarfed.org /``
|
2023-03-11 20:14:48 +00:00
|
|
|
|
2023-06-20 18:22:54 +00:00
|
|
|
https://github.com/snarfed/bridgy-fed/issues/423
|
2023-03-11 20:14:48 +00:00
|
|
|
|
2023-10-06 06:32:31 +00:00
|
|
|
Adapted from ``bridgy/util.py``.
|
2023-06-20 18:22:54 +00:00
|
|
|
"""
|
|
|
|
parsed = urllib.parse.urlparse(url)
|
|
|
|
key = parsed.netloc
|
|
|
|
if parsed.path in ('', '/'):
|
|
|
|
key += ' /'
|
2023-03-11 20:14:48 +00:00
|
|
|
|
2023-06-20 18:22:54 +00:00
|
|
|
# logger.debug(f'wm cache key {key}')
|
|
|
|
return key
|
2023-03-11 20:14:48 +00:00
|
|
|
|
|
|
|
|
2023-03-11 06:24:58 +00:00
|
|
|
@cachetools.cached(cachetools.TTLCache(50000, 60 * 60 * 2), # 2h expiration
|
2023-03-11 20:14:48 +00:00
|
|
|
key=webmention_endpoint_cache_key,
|
2023-11-08 20:03:56 +00:00
|
|
|
lock=threading.Lock())
|
2023-03-11 06:24:58 +00:00
|
|
|
def webmention_discover(url, **kwargs):
|
2023-10-06 15:22:50 +00:00
|
|
|
"""Thin caching wrapper around :func:`oauth_dropins.webutil.webmention.discover`."""
|
Revert "cache outbound HTTP request responses, locally to each inbound request"
This reverts commit 30debfc8faf730190bd51a3aef49df6c6bfbd50a.
seemed promising, but broke in production. Saw a lot of `IncompleteRead`s on both GETs and POSTs. Rolled back for now.
```
('Connection broken: IncompleteRead(9172 bytes read, -4586 more expected)', IncompleteRead(9172 bytes read, -4586 more expected))
...
File "oauth_dropins/webutil/util.py", line 1673, in call
resp = getattr((session or requests), fn)(url, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "requests_cache/session.py", line 102, in get
return self.request('GET', url, params=params, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "requests_cache/session.py", line 158, in request
return super().request(method, url, *args, headers=headers, **kwargs) # type: ignore
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "requests/sessions.py", line 589, in request
resp = self.send(prep, **send_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "requests_cache/session.py", line 205, in send
response = self._send_and_cache(request, actions, cached_response, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "requests_cache/session.py", line 233, in _send_and_cache
self.cache.save_response(response, actions.cache_key, actions.expires)
File "requests_cache/backends/base.py", line 89, in save_response
cached_response = CachedResponse.from_response(response, expires=expires)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "requests_cache/models/response.py", line 102, in from_response
obj.raw = CachedHTTPResponse.from_response(response)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "requests_cache/models/raw_response.py", line 69, in from_response
_ = response.content # This property reads, decodes, and stores response content
^^^^^^^^^^^^^^^^
File "requests/models.py", line 899, in content
self._content = b"".join(self.iter_content(CONTENT_CHUNK_SIZE)) or b""
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "requests/models.py", line 818, in generate
raise ChunkedEncodingError(e)
```
2024-03-08 21:24:28 +00:00
|
|
|
return webmention.discover(url, **kwargs)
|
2023-06-30 05:15:07 +00:00
|
|
|
|
|
|
|
|
|
|
|
def add(seq, val):
|
2023-10-06 06:32:31 +00:00
|
|
|
"""Appends ``val`` to ``seq`` if seq doesn't already contain it.
|
2023-06-30 05:15:07 +00:00
|
|
|
|
|
|
|
Useful for treating repeated ndb properties like sets instead of lists.
|
|
|
|
"""
|
|
|
|
if val not in seq:
|
|
|
|
seq.append(val)
|
2023-09-06 03:10:11 +00:00
|
|
|
|
|
|
|
|
2024-04-18 22:56:40 +00:00
|
|
|
def remove(seq, val):
|
|
|
|
"""Removes ``val`` to ``seq`` if seq contains it.
|
|
|
|
|
|
|
|
Useful for treating repeated ndb properties like sets instead of lists.
|
|
|
|
"""
|
|
|
|
if val in seq:
|
|
|
|
seq.remove(val)
|
|
|
|
|
|
|
|
|
2024-01-01 22:47:03 +00:00
|
|
|
def create_task(queue, delay=None, **params):
|
2023-09-06 03:10:11 +00:00
|
|
|
"""Adds a Cloud Tasks task.
|
|
|
|
|
2023-09-19 18:15:49 +00:00
|
|
|
If running in a local server, runs the task handler inline instead of
|
|
|
|
creating a task.
|
|
|
|
|
2023-09-06 03:10:11 +00:00
|
|
|
Args:
|
2023-10-06 06:32:31 +00:00
|
|
|
queue (str): queue name
|
2024-01-01 22:47:03 +00:00
|
|
|
delay (:class:`datetime.timedelta`): optional, used as task ETA (from now)
|
2023-09-06 03:10:11 +00:00
|
|
|
params: form-encoded and included in the task request body
|
2023-09-19 18:15:49 +00:00
|
|
|
|
|
|
|
Returns:
|
2023-10-06 06:32:31 +00:00
|
|
|
flask.Response or (str, int): response from either running the task
|
|
|
|
inline, if running in a local server, or the response from creating the
|
|
|
|
task.
|
2023-09-06 03:10:11 +00:00
|
|
|
"""
|
|
|
|
assert queue
|
2023-09-29 20:49:17 +00:00
|
|
|
path = f'/queue/{queue}'
|
2023-09-09 14:51:54 +00:00
|
|
|
|
2023-10-31 19:49:15 +00:00
|
|
|
if RUN_TASKS_INLINE or appengine_info.LOCAL_SERVER:
|
2023-09-19 02:02:15 +00:00
|
|
|
logger.info(f'Running task inline: {queue} {params}')
|
2024-05-24 03:40:54 +00:00
|
|
|
from router import app
|
2023-10-17 18:33:56 +00:00
|
|
|
return app.test_client().post(
|
|
|
|
path, data=params, headers={flask_util.CLOUD_TASKS_QUEUE_HEADER: ''})
|
2023-09-19 18:15:49 +00:00
|
|
|
|
|
|
|
# # alternative: run inline in this request context
|
|
|
|
# request.form = params
|
|
|
|
# endpoint, args = app.url_map.bind(request.server[0])\
|
|
|
|
# .match(path, method='POST')
|
|
|
|
# return app.view_functions[endpoint](**args)
|
2023-09-19 02:02:15 +00:00
|
|
|
|
2024-01-01 22:47:03 +00:00
|
|
|
task = {
|
|
|
|
'app_engine_http_request': {
|
|
|
|
'http_method': 'POST',
|
|
|
|
'relative_uri': path,
|
|
|
|
'body': urllib.parse.urlencode(sorted(params.items())).encode(),
|
|
|
|
'headers': {'Content-Type': 'application/x-www-form-urlencoded'},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
if delay:
|
|
|
|
eta_seconds = int(util.to_utc_timestamp(util.now()) + delay.total_seconds())
|
|
|
|
task['schedule_time'] = Timestamp(seconds=eta_seconds)
|
|
|
|
|
|
|
|
parent = tasks_client.queue_path(appengine_info.APP_ID, TASKS_LOCATION, queue)
|
|
|
|
task = tasks_client.create_task(parent=parent, task=task)
|
2023-09-19 18:15:49 +00:00
|
|
|
msg = f'Added {queue} task {task.name} : {params}'
|
|
|
|
logger.info(msg)
|
|
|
|
return msg, 202
|
2024-04-24 18:12:45 +00:00
|
|
|
|
|
|
|
|
|
|
|
def email_me(msg):
|
2024-04-24 22:30:20 +00:00
|
|
|
assert False # not working, SMTP woes :(
|
2024-04-24 18:12:45 +00:00
|
|
|
if not DEBUG:
|
|
|
|
util.send_email(smtp_host=SMTP_HOST, smtp_port=SMTP_PORT,
|
2024-04-24 22:07:42 +00:00
|
|
|
from_='scufflechuck@gmail.com', to='bridgy-fed@ryanb.org',
|
2024-04-24 18:12:45 +00:00
|
|
|
subject=util.ellipsize(msg), body=msg)
|
2024-05-09 20:26:24 +00:00
|
|
|
|
|
|
|
|
2024-07-05 19:05:08 +00:00
|
|
|
def report_exception(**kwargs):
|
|
|
|
return report_error(msg=None, exception=True, **kwargs)
|
2024-06-03 21:11:23 +00:00
|
|
|
|
|
|
|
|
2024-07-05 19:05:08 +00:00
|
|
|
def report_error(msg, *, exception=False, **kwargs):
|
|
|
|
"""Reports an error to StackDriver Error Reporting.
|
2024-05-09 20:26:24 +00:00
|
|
|
|
2024-07-05 19:05:08 +00:00
|
|
|
https://cloud.google.com/python/docs/reference/clouderrorreporting/latest/google.cloud.error_reporting.client.Client
|
2024-05-09 20:26:24 +00:00
|
|
|
|
2024-07-05 19:05:08 +00:00
|
|
|
If ``DEBUG`` and ``exception`` are ``True``, re-raises the exception instead.
|
2024-05-11 02:59:04 +00:00
|
|
|
|
2024-05-09 20:26:24 +00:00
|
|
|
Duplicated in ``bridgy.util``.
|
|
|
|
"""
|
2024-05-11 02:59:04 +00:00
|
|
|
if DEBUG:
|
2024-07-05 19:05:08 +00:00
|
|
|
if exception:
|
|
|
|
raise
|
|
|
|
else:
|
|
|
|
logger.error(msg)
|
|
|
|
return
|
|
|
|
|
|
|
|
http_context = build_flask_context(request) if has_request_context() else None
|
|
|
|
fn = (error_reporting_client.report_exception if exception
|
|
|
|
else error_reporting_client.report)
|
2024-05-11 02:59:04 +00:00
|
|
|
|
2024-05-09 20:26:24 +00:00
|
|
|
try:
|
2024-07-05 19:05:08 +00:00
|
|
|
fn(msg, http_context=http_context, **kwargs)
|
2024-05-09 20:26:24 +00:00
|
|
|
except BaseException:
|
2024-07-05 19:05:08 +00:00
|
|
|
kwargs['exception'] = exception
|
|
|
|
logger.warning(f'Failed to report error! {kwargs}', exc_info=exception)
|
2024-05-09 20:26:24 +00:00
|
|
|
|
|
|
|
|
2024-06-01 04:15:34 +00:00
|
|
|
PROFILE_ID_RE = re.compile(
|
|
|
|
fr"""
|
|
|
|
/users?/[^/]+$ |
|
|
|
|
/app.bsky.actor.profile/self$ |
|
|
|
|
^did:[a-z0-9:.]+$ |
|
|
|
|
^https://{DOMAIN_RE[1:-1]}/?$
|
|
|
|
""", re.VERBOSE)
|
|
|
|
|
2024-06-04 22:13:53 +00:00
|
|
|
def global_cache_timeout_policy(key):
|
|
|
|
"""Cache users and profile objects longer than other objects."""
|
|
|
|
if (key and
|
|
|
|
(key.kind in ('ActivityPub', 'ATProto', 'Follower', 'MagicKey')
|
|
|
|
or key.kind == 'Object' and PROFILE_ID_RE.search(key.name))):
|
|
|
|
return int(timedelta(hours=2).total_seconds())
|
|
|
|
|
|
|
|
return int(timedelta(minutes=30).total_seconds())
|
2024-07-12 05:08:36 +00:00
|
|
|
|
|
|
|
|
|
|
|
def memcache_key(key):
|
|
|
|
"""Preprocesses a memcache key. Right now just truncates it to 250 chars.
|
|
|
|
|
|
|
|
https://pymemcache.readthedocs.io/en/latest/apidoc/pymemcache.client.base.html
|
|
|
|
https://github.com/memcached/memcached/wiki/Commands#standard-protocol
|
|
|
|
|
|
|
|
TODO: truncate to 250 *UTF-8* chars, to handle Unicode chars in URLs. Related:
|
|
|
|
pymemcache Client's allow_unicode_keys constructor kwarg.
|
|
|
|
"""
|
2024-07-30 21:49:50 +00:00
|
|
|
return key[:MEMCACHE_KEY_MAX_LEN].replace(' ', '%20').encode()
|
2024-07-30 21:50:33 +00:00
|
|
|
|
|
|
|
|
|
|
|
def memcache_memoize(expire=None):
|
|
|
|
"""Memoize function decorator that stores the cached value in memcache.
|
|
|
|
|
|
|
|
Only caches non-null/empty values.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
expire (int): optional, expiration in seconds
|
|
|
|
"""
|
|
|
|
def decorator(fn):
|
|
|
|
@functools.wraps(fn)
|
|
|
|
def wrapped(*args, **kwargs):
|
2024-07-31 16:14:16 +00:00
|
|
|
key = memcache_key(f'{fn.__name__}-{repr(args)}-{repr(kwargs)}')
|
2024-07-30 21:50:33 +00:00
|
|
|
if val := memcache.get(key):
|
|
|
|
logger.debug(f'cache hit {key}')
|
|
|
|
return val
|
|
|
|
|
|
|
|
logger.debug(f'cache miss {key}')
|
|
|
|
val = fn(*args, **kwargs)
|
|
|
|
memcache.set(key, val)
|
|
|
|
return val
|
|
|
|
|
|
|
|
return wrapped
|
|
|
|
|
|
|
|
return decorator
|