rework requests cacheing. set a 2 hours expiration on cache entries except for jsonld term definitions. try to get remote profiles from the client app before fetching.

jsonld-outbound
Alain St-Denis 2022-12-22 14:18:48 +00:00
rodzic 7b2de4811c
commit dad652a264
6 zmienionych plików z 43 dodań i 40 usunięć

Wyświetl plik

@ -21,28 +21,12 @@ from federation.entities.utils import get_base_attributes, get_profile
from federation.outbound import handle_send
from federation.types import UserType, ReceiverVariant
from federation.utils.activitypub import retrieve_and_parse_document, retrieve_and_parse_profile, get_profile_id_from_webfinger
from federation.utils.django import get_requests_cache_backend
from federation.utils.text import with_slash, validate_handle
import federation.entities.base as base
logger = logging.getLogger("federation")
# Make django federation parameters globally available
# if possible
try:
from federation.utils.django import get_configuration
django_params = get_configuration()
except ImportError:
django_params = {}
# try to obtain redis config from django and use as
# requests_cache backend if available
if django_params.get('redis'):
backend = rc.RedisCache(namespace='fed_cache', **django_params['redis'])
else:
backend = rc.SQLiteCache(db_path='fed_cache')
logger.info('Using %s for requests_cache', type(backend))
# This is required to workaround a bug in pyld that has the Accept header
# accept other content types. From what I understand, precedence handling
# is broken
@ -52,7 +36,7 @@ def get_loader(*args, **kwargs):
def loader(url, options={}):
options['headers']['Accept'] = 'application/ld+json'
with rc.enabled(cache_name='fed_cache', backend=backend):
with rc.enabled(cache_name='ld_cache', backend=get_requests_cache_backend('ld_cache')):
return requests_loader(url, options)
return loader
@ -63,8 +47,7 @@ jsonld.set_document_loader(get_loader())
def get_profile_or_entity(fid):
obj = get_profile(fid=fid)
if not obj:
with rc.enabled(cache_name='fed_cache', backend=backend):
obj = retrieve_and_parse_document(fid)
obj = retrieve_and_parse_document(fid)
return obj
@ -606,6 +589,7 @@ class Person(Object, base.Profile):
capabilities = CompactedDict(litepub.capabilities)
suspended = fields.Boolean(toot.suspended)
public = True
finger = None
_cached_inboxes = None
_cached_public_key = None
_cached_image_urls = None
@ -624,15 +608,18 @@ class Person(Object, base.Profile):
super().__init__(*args, **kwargs)
self._allowed_children += (PropertyValue, IdentityProof)
# Set handle to username@host if not provided by the platform
# Set finger to username@host if not provided by the platform
def post_receive(self):
if not self.finger:
profile = get_profile(fid=self.id)
if getattr(profile, 'finger', None):
self.finger = profile.finger
else:
domain = urlparse(self.id).netloc
finger = f'{self.username.lower()}@{domain}'
with rc.enabled(cache_name='fed_cache', backend=backend):
if get_profile_id_from_webfinger(finger) == self.id:
self.finger = finger
if self.guid and not self.handle:
if get_profile_id_from_webfinger(finger) == self.id:
self.finger = finger
# multi-protocol platform
if self.finger and self.guid and not self.handle:
self.handle = self.finger
def to_as2(self):
@ -1269,8 +1256,8 @@ def extract_receivers(entity):
profile = None
# don't care about receivers for payloads without an actor_id
if getattr(entity, 'actor_id'):
with rc.enabled(cache_name='fed_cache', backend=backend):
profile = retrieve_and_parse_profile(entity.actor_id)
profile = get_profile(fid=entity.actor_id)
if not profile: profile = retrieve_and_parse_profile(entity.actor_id)
if not profile: return receivers
for attr in ("to", "cc"):

Wyświetl plik

@ -28,7 +28,8 @@ def retrieve_remote_content(
protocol_name = identify_protocol_by_id(id).PROTOCOL_NAME
utils = importlib.import_module("federation.utils.%s" % protocol_name)
return utils.retrieve_and_parse_content(
id=id, guid=guid, handle=handle, entity_type=entity_type, sender_key_fetcher=sender_key_fetcher,
id=id, guid=guid, handle=handle, entity_type=entity_type,
cache=cache, sender_key_fetcher=sender_key_fetcher,
)

Wyświetl plik

@ -34,15 +34,15 @@ def get_profile_id_from_webfinger(handle: str) -> Optional[str]:
def retrieve_and_parse_content(**kwargs) -> Optional[Any]:
return retrieve_and_parse_document(kwargs.get("id"))
return retrieve_and_parse_document(kwargs.get("id"), cache=kwargs.get('cache',True))
def retrieve_and_parse_document(fid: str) -> Optional[Any]:
def retrieve_and_parse_document(fid: str, cache: bool=True) -> Optional[Any]:
"""
Retrieve remote document by ID and return the entity.
"""
from federation.entities.activitypub.models import element_to_objects # Circulars
document, status_code, ex = fetch_document(fid, extra_headers={'accept': 'application/activity+json'},
document, status_code, ex = fetch_document(fid, extra_headers={'accept': 'application/activity+json'}, cache=cache,
auth=get_http_authentication(federation_user.rsa_private_key,f'{federation_user.id}#main-key') if federation_user else None)
if document:
try:

Wyświetl plik

@ -162,7 +162,8 @@ def parse_profile_from_hcard(hcard: str, handle: str):
def retrieve_and_parse_content(
id: str, guid: str, handle: str, entity_type: str, sender_key_fetcher: Callable[[str], str]=None):
id: str, guid: str, handle: str, entity_type: str, cache: bool=True,
sender_key_fetcher: Callable[[str], str]=None):
"""Retrieve remote content and return an Entity class instance.
This is basically the inverse of receiving an entity. Instead, we fetch it, then call "handle_receive".
@ -175,7 +176,7 @@ def retrieve_and_parse_content(
return
_username, domain = handle.split("@")
url = get_fetch_content_endpoint(domain, entity_type.lower(), guid)
document, status_code, error = fetch_document(url)
document, status_code, error = fetch_document(url, cache=cache)
if status_code == 200:
request = RequestType(body=document)
_sender, _protocol, entities = handle_receive(request, sender_key_fetcher=sender_key_fetcher)

Wyświetl plik

@ -1,4 +1,5 @@
import importlib
from requests_cache import RedisCache, SQLiteCache
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
@ -59,3 +60,11 @@ def get_federation_user():
return UserType(id=config['federation_id'], private_key=key)
def get_requests_cache_backend(namespace):
"""
Use RedisCache is available, else fallback to SQLiteCache
"""
config = get_configuration()
if not config.get('redis'): return SQLiteCache()
return RedisCache(namespace, **config['redis'])

Wyświetl plik

@ -8,30 +8,34 @@ from urllib.parse import quote
from uuid import uuid4
import requests
from requests_cache import CachedSession, DO_NOT_CACHE
from requests.exceptions import RequestException, HTTPError, SSLError
from requests.exceptions import ConnectionError
from requests.structures import CaseInsensitiveDict
from federation import __version__
from federation.utils.django import get_requests_cache_backend
logger = logging.getLogger("federation")
USER_AGENT = "python/federation/%s" % __version__
session = CachedSession('fed_cache', backend=get_requests_cache_backend('fed_cache'))
EXPIRATION = datetime.timedelta(hours=2)
def fetch_content_type(url: str) -> Optional[str]:
"""
Fetch the HEAD of the remote url to determine the content type.
"""
try:
response = requests.head(url, headers={'user-agent': USER_AGENT}, timeout=10)
response = session.head(url, headers={'user-agent': USER_AGENT}, timeout=10)
except RequestException as ex:
logger.warning("fetch_content_type - %s when fetching url %s", ex, url)
else:
return response.headers.get('Content-Type')
def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=True, extra_headers=None, **kwargs):
def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=True, extra_headers=None, cache=True, **kwargs):
"""Helper method to fetch remote document.
Must be given either the ``url`` or ``host``.
@ -60,7 +64,8 @@ def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=T
# Use url since it was given
logger.debug("fetch_document: trying %s", url)
try:
response = requests.get(url, timeout=timeout, headers=headers, **kwargs)
response = session.get(url, timeout=timeout, headers=headers,
expire_after=EXPIRATION if cache else DO_NOT_CACHE, **kwargs)
logger.debug("fetch_document: found document, code %s", response.status_code)
response.raise_for_status()
return response.text, response.status_code, None
@ -73,7 +78,7 @@ def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=T
url = "https://%s%s" % (host_string, path_string)
logger.debug("fetch_document: trying %s", url)
try:
response = requests.get(url, timeout=timeout, headers=headers)
response = session.get(url, timeout=timeout, headers=headers)
logger.debug("fetch_document: found document, code %s", response.status_code)
response.raise_for_status()
return response.text, response.status_code, None
@ -85,7 +90,7 @@ def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=T
url = url.replace("https://", "http://")
logger.debug("fetch_document: trying %s", url)
try:
response = requests.get(url, timeout=timeout, headers=headers)
response = session.get(url, timeout=timeout, headers=headers)
logger.debug("fetch_document: found document, code %s", response.status_code)
response.raise_for_status()
return response.text, response.status_code, None
@ -116,7 +121,7 @@ def fetch_file(url: str, timeout: int = 30, extra_headers: Dict = None) -> str:
headers = {'user-agent': USER_AGENT}
if extra_headers:
headers.update(extra_headers)
response = requests.get(url, timeout=timeout, headers=headers, stream=True)
response = session.get(url, timeout=timeout, headers=headers, stream=True)
response.raise_for_status()
name = f"/tmp/{str(uuid4())}"
with open(name, "wb") as f: