kopia lustrzana https://gitlab.com/jaywink/federation
rework requests cacheing. set a 2 hours expiration on cache entries except for jsonld term definitions. try to get remote profiles from the client app before fetching.
rodzic
7b2de4811c
commit
dad652a264
|
@ -21,28 +21,12 @@ from federation.entities.utils import get_base_attributes, get_profile
|
|||
from federation.outbound import handle_send
|
||||
from federation.types import UserType, ReceiverVariant
|
||||
from federation.utils.activitypub import retrieve_and_parse_document, retrieve_and_parse_profile, get_profile_id_from_webfinger
|
||||
from federation.utils.django import get_requests_cache_backend
|
||||
from federation.utils.text import with_slash, validate_handle
|
||||
import federation.entities.base as base
|
||||
|
||||
logger = logging.getLogger("federation")
|
||||
|
||||
# Make django federation parameters globally available
|
||||
# if possible
|
||||
try:
|
||||
from federation.utils.django import get_configuration
|
||||
django_params = get_configuration()
|
||||
except ImportError:
|
||||
django_params = {}
|
||||
|
||||
# try to obtain redis config from django and use as
|
||||
# requests_cache backend if available
|
||||
if django_params.get('redis'):
|
||||
backend = rc.RedisCache(namespace='fed_cache', **django_params['redis'])
|
||||
else:
|
||||
backend = rc.SQLiteCache(db_path='fed_cache')
|
||||
logger.info('Using %s for requests_cache', type(backend))
|
||||
|
||||
|
||||
# This is required to workaround a bug in pyld that has the Accept header
|
||||
# accept other content types. From what I understand, precedence handling
|
||||
# is broken
|
||||
|
@ -52,7 +36,7 @@ def get_loader(*args, **kwargs):
|
|||
|
||||
def loader(url, options={}):
|
||||
options['headers']['Accept'] = 'application/ld+json'
|
||||
with rc.enabled(cache_name='fed_cache', backend=backend):
|
||||
with rc.enabled(cache_name='ld_cache', backend=get_requests_cache_backend('ld_cache')):
|
||||
return requests_loader(url, options)
|
||||
|
||||
return loader
|
||||
|
@ -63,8 +47,7 @@ jsonld.set_document_loader(get_loader())
|
|||
def get_profile_or_entity(fid):
|
||||
obj = get_profile(fid=fid)
|
||||
if not obj:
|
||||
with rc.enabled(cache_name='fed_cache', backend=backend):
|
||||
obj = retrieve_and_parse_document(fid)
|
||||
obj = retrieve_and_parse_document(fid)
|
||||
return obj
|
||||
|
||||
|
||||
|
@ -606,6 +589,7 @@ class Person(Object, base.Profile):
|
|||
capabilities = CompactedDict(litepub.capabilities)
|
||||
suspended = fields.Boolean(toot.suspended)
|
||||
public = True
|
||||
finger = None
|
||||
_cached_inboxes = None
|
||||
_cached_public_key = None
|
||||
_cached_image_urls = None
|
||||
|
@ -624,15 +608,18 @@ class Person(Object, base.Profile):
|
|||
super().__init__(*args, **kwargs)
|
||||
self._allowed_children += (PropertyValue, IdentityProof)
|
||||
|
||||
# Set handle to username@host if not provided by the platform
|
||||
# Set finger to username@host if not provided by the platform
|
||||
def post_receive(self):
|
||||
if not self.finger:
|
||||
profile = get_profile(fid=self.id)
|
||||
if getattr(profile, 'finger', None):
|
||||
self.finger = profile.finger
|
||||
else:
|
||||
domain = urlparse(self.id).netloc
|
||||
finger = f'{self.username.lower()}@{domain}'
|
||||
with rc.enabled(cache_name='fed_cache', backend=backend):
|
||||
if get_profile_id_from_webfinger(finger) == self.id:
|
||||
self.finger = finger
|
||||
if self.guid and not self.handle:
|
||||
if get_profile_id_from_webfinger(finger) == self.id:
|
||||
self.finger = finger
|
||||
# multi-protocol platform
|
||||
if self.finger and self.guid and not self.handle:
|
||||
self.handle = self.finger
|
||||
|
||||
def to_as2(self):
|
||||
|
@ -1269,8 +1256,8 @@ def extract_receivers(entity):
|
|||
profile = None
|
||||
# don't care about receivers for payloads without an actor_id
|
||||
if getattr(entity, 'actor_id'):
|
||||
with rc.enabled(cache_name='fed_cache', backend=backend):
|
||||
profile = retrieve_and_parse_profile(entity.actor_id)
|
||||
profile = get_profile(fid=entity.actor_id)
|
||||
if not profile: profile = retrieve_and_parse_profile(entity.actor_id)
|
||||
if not profile: return receivers
|
||||
|
||||
for attr in ("to", "cc"):
|
||||
|
|
|
@ -28,7 +28,8 @@ def retrieve_remote_content(
|
|||
protocol_name = identify_protocol_by_id(id).PROTOCOL_NAME
|
||||
utils = importlib.import_module("federation.utils.%s" % protocol_name)
|
||||
return utils.retrieve_and_parse_content(
|
||||
id=id, guid=guid, handle=handle, entity_type=entity_type, sender_key_fetcher=sender_key_fetcher,
|
||||
id=id, guid=guid, handle=handle, entity_type=entity_type,
|
||||
cache=cache, sender_key_fetcher=sender_key_fetcher,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -34,15 +34,15 @@ def get_profile_id_from_webfinger(handle: str) -> Optional[str]:
|
|||
|
||||
|
||||
def retrieve_and_parse_content(**kwargs) -> Optional[Any]:
|
||||
return retrieve_and_parse_document(kwargs.get("id"))
|
||||
return retrieve_and_parse_document(kwargs.get("id"), cache=kwargs.get('cache',True))
|
||||
|
||||
|
||||
def retrieve_and_parse_document(fid: str) -> Optional[Any]:
|
||||
def retrieve_and_parse_document(fid: str, cache: bool=True) -> Optional[Any]:
|
||||
"""
|
||||
Retrieve remote document by ID and return the entity.
|
||||
"""
|
||||
from federation.entities.activitypub.models import element_to_objects # Circulars
|
||||
document, status_code, ex = fetch_document(fid, extra_headers={'accept': 'application/activity+json'},
|
||||
document, status_code, ex = fetch_document(fid, extra_headers={'accept': 'application/activity+json'}, cache=cache,
|
||||
auth=get_http_authentication(federation_user.rsa_private_key,f'{federation_user.id}#main-key') if federation_user else None)
|
||||
if document:
|
||||
try:
|
||||
|
|
|
@ -162,7 +162,8 @@ def parse_profile_from_hcard(hcard: str, handle: str):
|
|||
|
||||
|
||||
def retrieve_and_parse_content(
|
||||
id: str, guid: str, handle: str, entity_type: str, sender_key_fetcher: Callable[[str], str]=None):
|
||||
id: str, guid: str, handle: str, entity_type: str, cache: bool=True,
|
||||
sender_key_fetcher: Callable[[str], str]=None):
|
||||
"""Retrieve remote content and return an Entity class instance.
|
||||
|
||||
This is basically the inverse of receiving an entity. Instead, we fetch it, then call "handle_receive".
|
||||
|
@ -175,7 +176,7 @@ def retrieve_and_parse_content(
|
|||
return
|
||||
_username, domain = handle.split("@")
|
||||
url = get_fetch_content_endpoint(domain, entity_type.lower(), guid)
|
||||
document, status_code, error = fetch_document(url)
|
||||
document, status_code, error = fetch_document(url, cache=cache)
|
||||
if status_code == 200:
|
||||
request = RequestType(body=document)
|
||||
_sender, _protocol, entities = handle_receive(request, sender_key_fetcher=sender_key_fetcher)
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import importlib
|
||||
from requests_cache import RedisCache, SQLiteCache
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import ImproperlyConfigured
|
||||
|
@ -59,3 +60,11 @@ def get_federation_user():
|
|||
|
||||
return UserType(id=config['federation_id'], private_key=key)
|
||||
|
||||
def get_requests_cache_backend(namespace):
|
||||
"""
|
||||
Use RedisCache is available, else fallback to SQLiteCache
|
||||
"""
|
||||
config = get_configuration()
|
||||
if not config.get('redis'): return SQLiteCache()
|
||||
|
||||
return RedisCache(namespace, **config['redis'])
|
||||
|
|
|
@ -8,30 +8,34 @@ from urllib.parse import quote
|
|||
from uuid import uuid4
|
||||
|
||||
import requests
|
||||
from requests_cache import CachedSession, DO_NOT_CACHE
|
||||
from requests.exceptions import RequestException, HTTPError, SSLError
|
||||
from requests.exceptions import ConnectionError
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
from federation import __version__
|
||||
from federation.utils.django import get_requests_cache_backend
|
||||
|
||||
logger = logging.getLogger("federation")
|
||||
|
||||
USER_AGENT = "python/federation/%s" % __version__
|
||||
|
||||
session = CachedSession('fed_cache', backend=get_requests_cache_backend('fed_cache'))
|
||||
EXPIRATION = datetime.timedelta(hours=2)
|
||||
|
||||
def fetch_content_type(url: str) -> Optional[str]:
|
||||
"""
|
||||
Fetch the HEAD of the remote url to determine the content type.
|
||||
"""
|
||||
try:
|
||||
response = requests.head(url, headers={'user-agent': USER_AGENT}, timeout=10)
|
||||
response = session.head(url, headers={'user-agent': USER_AGENT}, timeout=10)
|
||||
except RequestException as ex:
|
||||
logger.warning("fetch_content_type - %s when fetching url %s", ex, url)
|
||||
else:
|
||||
return response.headers.get('Content-Type')
|
||||
|
||||
|
||||
def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=True, extra_headers=None, **kwargs):
|
||||
def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=True, extra_headers=None, cache=True, **kwargs):
|
||||
"""Helper method to fetch remote document.
|
||||
|
||||
Must be given either the ``url`` or ``host``.
|
||||
|
@ -60,7 +64,8 @@ def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=T
|
|||
# Use url since it was given
|
||||
logger.debug("fetch_document: trying %s", url)
|
||||
try:
|
||||
response = requests.get(url, timeout=timeout, headers=headers, **kwargs)
|
||||
response = session.get(url, timeout=timeout, headers=headers,
|
||||
expire_after=EXPIRATION if cache else DO_NOT_CACHE, **kwargs)
|
||||
logger.debug("fetch_document: found document, code %s", response.status_code)
|
||||
response.raise_for_status()
|
||||
return response.text, response.status_code, None
|
||||
|
@ -73,7 +78,7 @@ def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=T
|
|||
url = "https://%s%s" % (host_string, path_string)
|
||||
logger.debug("fetch_document: trying %s", url)
|
||||
try:
|
||||
response = requests.get(url, timeout=timeout, headers=headers)
|
||||
response = session.get(url, timeout=timeout, headers=headers)
|
||||
logger.debug("fetch_document: found document, code %s", response.status_code)
|
||||
response.raise_for_status()
|
||||
return response.text, response.status_code, None
|
||||
|
@ -85,7 +90,7 @@ def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=T
|
|||
url = url.replace("https://", "http://")
|
||||
logger.debug("fetch_document: trying %s", url)
|
||||
try:
|
||||
response = requests.get(url, timeout=timeout, headers=headers)
|
||||
response = session.get(url, timeout=timeout, headers=headers)
|
||||
logger.debug("fetch_document: found document, code %s", response.status_code)
|
||||
response.raise_for_status()
|
||||
return response.text, response.status_code, None
|
||||
|
@ -116,7 +121,7 @@ def fetch_file(url: str, timeout: int = 30, extra_headers: Dict = None) -> str:
|
|||
headers = {'user-agent': USER_AGENT}
|
||||
if extra_headers:
|
||||
headers.update(extra_headers)
|
||||
response = requests.get(url, timeout=timeout, headers=headers, stream=True)
|
||||
response = session.get(url, timeout=timeout, headers=headers, stream=True)
|
||||
response.raise_for_status()
|
||||
name = f"/tmp/{str(uuid4())}"
|
||||
with open(name, "wb") as f:
|
||||
|
|
Ładowanie…
Reference in New Issue