rework requests cacheing. set a 2 hours expiration on cache entries except for jsonld term definitions. try to get remote profiles from the client app before fetching.

jsonld-outbound
Alain St-Denis 2022-12-22 14:18:48 +00:00
rodzic 7b2de4811c
commit dad652a264
6 zmienionych plików z 43 dodań i 40 usunięć

Wyświetl plik

@ -21,28 +21,12 @@ from federation.entities.utils import get_base_attributes, get_profile
from federation.outbound import handle_send from federation.outbound import handle_send
from federation.types import UserType, ReceiverVariant from federation.types import UserType, ReceiverVariant
from federation.utils.activitypub import retrieve_and_parse_document, retrieve_and_parse_profile, get_profile_id_from_webfinger from federation.utils.activitypub import retrieve_and_parse_document, retrieve_and_parse_profile, get_profile_id_from_webfinger
from federation.utils.django import get_requests_cache_backend
from federation.utils.text import with_slash, validate_handle from federation.utils.text import with_slash, validate_handle
import federation.entities.base as base import federation.entities.base as base
logger = logging.getLogger("federation") logger = logging.getLogger("federation")
# Make django federation parameters globally available
# if possible
try:
from federation.utils.django import get_configuration
django_params = get_configuration()
except ImportError:
django_params = {}
# try to obtain redis config from django and use as
# requests_cache backend if available
if django_params.get('redis'):
backend = rc.RedisCache(namespace='fed_cache', **django_params['redis'])
else:
backend = rc.SQLiteCache(db_path='fed_cache')
logger.info('Using %s for requests_cache', type(backend))
# This is required to workaround a bug in pyld that has the Accept header # This is required to workaround a bug in pyld that has the Accept header
# accept other content types. From what I understand, precedence handling # accept other content types. From what I understand, precedence handling
# is broken # is broken
@ -52,7 +36,7 @@ def get_loader(*args, **kwargs):
def loader(url, options={}): def loader(url, options={}):
options['headers']['Accept'] = 'application/ld+json' options['headers']['Accept'] = 'application/ld+json'
with rc.enabled(cache_name='fed_cache', backend=backend): with rc.enabled(cache_name='ld_cache', backend=get_requests_cache_backend('ld_cache')):
return requests_loader(url, options) return requests_loader(url, options)
return loader return loader
@ -63,8 +47,7 @@ jsonld.set_document_loader(get_loader())
def get_profile_or_entity(fid): def get_profile_or_entity(fid):
obj = get_profile(fid=fid) obj = get_profile(fid=fid)
if not obj: if not obj:
with rc.enabled(cache_name='fed_cache', backend=backend): obj = retrieve_and_parse_document(fid)
obj = retrieve_and_parse_document(fid)
return obj return obj
@ -606,6 +589,7 @@ class Person(Object, base.Profile):
capabilities = CompactedDict(litepub.capabilities) capabilities = CompactedDict(litepub.capabilities)
suspended = fields.Boolean(toot.suspended) suspended = fields.Boolean(toot.suspended)
public = True public = True
finger = None
_cached_inboxes = None _cached_inboxes = None
_cached_public_key = None _cached_public_key = None
_cached_image_urls = None _cached_image_urls = None
@ -624,15 +608,18 @@ class Person(Object, base.Profile):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self._allowed_children += (PropertyValue, IdentityProof) self._allowed_children += (PropertyValue, IdentityProof)
# Set handle to username@host if not provided by the platform # Set finger to username@host if not provided by the platform
def post_receive(self): def post_receive(self):
if not self.finger: profile = get_profile(fid=self.id)
if getattr(profile, 'finger', None):
self.finger = profile.finger
else:
domain = urlparse(self.id).netloc domain = urlparse(self.id).netloc
finger = f'{self.username.lower()}@{domain}' finger = f'{self.username.lower()}@{domain}'
with rc.enabled(cache_name='fed_cache', backend=backend): if get_profile_id_from_webfinger(finger) == self.id:
if get_profile_id_from_webfinger(finger) == self.id: self.finger = finger
self.finger = finger # multi-protocol platform
if self.guid and not self.handle: if self.finger and self.guid and not self.handle:
self.handle = self.finger self.handle = self.finger
def to_as2(self): def to_as2(self):
@ -1269,8 +1256,8 @@ def extract_receivers(entity):
profile = None profile = None
# don't care about receivers for payloads without an actor_id # don't care about receivers for payloads without an actor_id
if getattr(entity, 'actor_id'): if getattr(entity, 'actor_id'):
with rc.enabled(cache_name='fed_cache', backend=backend): profile = get_profile(fid=entity.actor_id)
profile = retrieve_and_parse_profile(entity.actor_id) if not profile: profile = retrieve_and_parse_profile(entity.actor_id)
if not profile: return receivers if not profile: return receivers
for attr in ("to", "cc"): for attr in ("to", "cc"):

Wyświetl plik

@ -28,7 +28,8 @@ def retrieve_remote_content(
protocol_name = identify_protocol_by_id(id).PROTOCOL_NAME protocol_name = identify_protocol_by_id(id).PROTOCOL_NAME
utils = importlib.import_module("federation.utils.%s" % protocol_name) utils = importlib.import_module("federation.utils.%s" % protocol_name)
return utils.retrieve_and_parse_content( return utils.retrieve_and_parse_content(
id=id, guid=guid, handle=handle, entity_type=entity_type, sender_key_fetcher=sender_key_fetcher, id=id, guid=guid, handle=handle, entity_type=entity_type,
cache=cache, sender_key_fetcher=sender_key_fetcher,
) )

Wyświetl plik

@ -34,15 +34,15 @@ def get_profile_id_from_webfinger(handle: str) -> Optional[str]:
def retrieve_and_parse_content(**kwargs) -> Optional[Any]: def retrieve_and_parse_content(**kwargs) -> Optional[Any]:
return retrieve_and_parse_document(kwargs.get("id")) return retrieve_and_parse_document(kwargs.get("id"), cache=kwargs.get('cache',True))
def retrieve_and_parse_document(fid: str) -> Optional[Any]: def retrieve_and_parse_document(fid: str, cache: bool=True) -> Optional[Any]:
""" """
Retrieve remote document by ID and return the entity. Retrieve remote document by ID and return the entity.
""" """
from federation.entities.activitypub.models import element_to_objects # Circulars from federation.entities.activitypub.models import element_to_objects # Circulars
document, status_code, ex = fetch_document(fid, extra_headers={'accept': 'application/activity+json'}, document, status_code, ex = fetch_document(fid, extra_headers={'accept': 'application/activity+json'}, cache=cache,
auth=get_http_authentication(federation_user.rsa_private_key,f'{federation_user.id}#main-key') if federation_user else None) auth=get_http_authentication(federation_user.rsa_private_key,f'{federation_user.id}#main-key') if federation_user else None)
if document: if document:
try: try:

Wyświetl plik

@ -162,7 +162,8 @@ def parse_profile_from_hcard(hcard: str, handle: str):
def retrieve_and_parse_content( def retrieve_and_parse_content(
id: str, guid: str, handle: str, entity_type: str, sender_key_fetcher: Callable[[str], str]=None): id: str, guid: str, handle: str, entity_type: str, cache: bool=True,
sender_key_fetcher: Callable[[str], str]=None):
"""Retrieve remote content and return an Entity class instance. """Retrieve remote content and return an Entity class instance.
This is basically the inverse of receiving an entity. Instead, we fetch it, then call "handle_receive". This is basically the inverse of receiving an entity. Instead, we fetch it, then call "handle_receive".
@ -175,7 +176,7 @@ def retrieve_and_parse_content(
return return
_username, domain = handle.split("@") _username, domain = handle.split("@")
url = get_fetch_content_endpoint(domain, entity_type.lower(), guid) url = get_fetch_content_endpoint(domain, entity_type.lower(), guid)
document, status_code, error = fetch_document(url) document, status_code, error = fetch_document(url, cache=cache)
if status_code == 200: if status_code == 200:
request = RequestType(body=document) request = RequestType(body=document)
_sender, _protocol, entities = handle_receive(request, sender_key_fetcher=sender_key_fetcher) _sender, _protocol, entities = handle_receive(request, sender_key_fetcher=sender_key_fetcher)

Wyświetl plik

@ -1,4 +1,5 @@
import importlib import importlib
from requests_cache import RedisCache, SQLiteCache
from django.conf import settings from django.conf import settings
from django.core.exceptions import ImproperlyConfigured from django.core.exceptions import ImproperlyConfigured
@ -59,3 +60,11 @@ def get_federation_user():
return UserType(id=config['federation_id'], private_key=key) return UserType(id=config['federation_id'], private_key=key)
def get_requests_cache_backend(namespace):
"""
Use RedisCache is available, else fallback to SQLiteCache
"""
config = get_configuration()
if not config.get('redis'): return SQLiteCache()
return RedisCache(namespace, **config['redis'])

Wyświetl plik

@ -8,30 +8,34 @@ from urllib.parse import quote
from uuid import uuid4 from uuid import uuid4
import requests import requests
from requests_cache import CachedSession, DO_NOT_CACHE
from requests.exceptions import RequestException, HTTPError, SSLError from requests.exceptions import RequestException, HTTPError, SSLError
from requests.exceptions import ConnectionError from requests.exceptions import ConnectionError
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
from federation import __version__ from federation import __version__
from federation.utils.django import get_requests_cache_backend
logger = logging.getLogger("federation") logger = logging.getLogger("federation")
USER_AGENT = "python/federation/%s" % __version__ USER_AGENT = "python/federation/%s" % __version__
session = CachedSession('fed_cache', backend=get_requests_cache_backend('fed_cache'))
EXPIRATION = datetime.timedelta(hours=2)
def fetch_content_type(url: str) -> Optional[str]: def fetch_content_type(url: str) -> Optional[str]:
""" """
Fetch the HEAD of the remote url to determine the content type. Fetch the HEAD of the remote url to determine the content type.
""" """
try: try:
response = requests.head(url, headers={'user-agent': USER_AGENT}, timeout=10) response = session.head(url, headers={'user-agent': USER_AGENT}, timeout=10)
except RequestException as ex: except RequestException as ex:
logger.warning("fetch_content_type - %s when fetching url %s", ex, url) logger.warning("fetch_content_type - %s when fetching url %s", ex, url)
else: else:
return response.headers.get('Content-Type') return response.headers.get('Content-Type')
def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=True, extra_headers=None, **kwargs): def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=True, extra_headers=None, cache=True, **kwargs):
"""Helper method to fetch remote document. """Helper method to fetch remote document.
Must be given either the ``url`` or ``host``. Must be given either the ``url`` or ``host``.
@ -60,7 +64,8 @@ def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=T
# Use url since it was given # Use url since it was given
logger.debug("fetch_document: trying %s", url) logger.debug("fetch_document: trying %s", url)
try: try:
response = requests.get(url, timeout=timeout, headers=headers, **kwargs) response = session.get(url, timeout=timeout, headers=headers,
expire_after=EXPIRATION if cache else DO_NOT_CACHE, **kwargs)
logger.debug("fetch_document: found document, code %s", response.status_code) logger.debug("fetch_document: found document, code %s", response.status_code)
response.raise_for_status() response.raise_for_status()
return response.text, response.status_code, None return response.text, response.status_code, None
@ -73,7 +78,7 @@ def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=T
url = "https://%s%s" % (host_string, path_string) url = "https://%s%s" % (host_string, path_string)
logger.debug("fetch_document: trying %s", url) logger.debug("fetch_document: trying %s", url)
try: try:
response = requests.get(url, timeout=timeout, headers=headers) response = session.get(url, timeout=timeout, headers=headers)
logger.debug("fetch_document: found document, code %s", response.status_code) logger.debug("fetch_document: found document, code %s", response.status_code)
response.raise_for_status() response.raise_for_status()
return response.text, response.status_code, None return response.text, response.status_code, None
@ -85,7 +90,7 @@ def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=T
url = url.replace("https://", "http://") url = url.replace("https://", "http://")
logger.debug("fetch_document: trying %s", url) logger.debug("fetch_document: trying %s", url)
try: try:
response = requests.get(url, timeout=timeout, headers=headers) response = session.get(url, timeout=timeout, headers=headers)
logger.debug("fetch_document: found document, code %s", response.status_code) logger.debug("fetch_document: found document, code %s", response.status_code)
response.raise_for_status() response.raise_for_status()
return response.text, response.status_code, None return response.text, response.status_code, None
@ -116,7 +121,7 @@ def fetch_file(url: str, timeout: int = 30, extra_headers: Dict = None) -> str:
headers = {'user-agent': USER_AGENT} headers = {'user-agent': USER_AGENT}
if extra_headers: if extra_headers:
headers.update(extra_headers) headers.update(extra_headers)
response = requests.get(url, timeout=timeout, headers=headers, stream=True) response = session.get(url, timeout=timeout, headers=headers, stream=True)
response.raise_for_status() response.raise_for_status()
name = f"/tmp/{str(uuid4())}" name = f"/tmp/{str(uuid4())}"
with open(name, "wb") as f: with open(name, "wb") as f: