kopia lustrzana https://gitlab.com/jaywink/federation
rework requests cacheing. set a 2 hours expiration on cache entries except for jsonld term definitions. try to get remote profiles from the client app before fetching.
rodzic
7b2de4811c
commit
dad652a264
|
@ -21,28 +21,12 @@ from federation.entities.utils import get_base_attributes, get_profile
|
||||||
from federation.outbound import handle_send
|
from federation.outbound import handle_send
|
||||||
from federation.types import UserType, ReceiverVariant
|
from federation.types import UserType, ReceiverVariant
|
||||||
from federation.utils.activitypub import retrieve_and_parse_document, retrieve_and_parse_profile, get_profile_id_from_webfinger
|
from federation.utils.activitypub import retrieve_and_parse_document, retrieve_and_parse_profile, get_profile_id_from_webfinger
|
||||||
|
from federation.utils.django import get_requests_cache_backend
|
||||||
from federation.utils.text import with_slash, validate_handle
|
from federation.utils.text import with_slash, validate_handle
|
||||||
import federation.entities.base as base
|
import federation.entities.base as base
|
||||||
|
|
||||||
logger = logging.getLogger("federation")
|
logger = logging.getLogger("federation")
|
||||||
|
|
||||||
# Make django federation parameters globally available
|
|
||||||
# if possible
|
|
||||||
try:
|
|
||||||
from federation.utils.django import get_configuration
|
|
||||||
django_params = get_configuration()
|
|
||||||
except ImportError:
|
|
||||||
django_params = {}
|
|
||||||
|
|
||||||
# try to obtain redis config from django and use as
|
|
||||||
# requests_cache backend if available
|
|
||||||
if django_params.get('redis'):
|
|
||||||
backend = rc.RedisCache(namespace='fed_cache', **django_params['redis'])
|
|
||||||
else:
|
|
||||||
backend = rc.SQLiteCache(db_path='fed_cache')
|
|
||||||
logger.info('Using %s for requests_cache', type(backend))
|
|
||||||
|
|
||||||
|
|
||||||
# This is required to workaround a bug in pyld that has the Accept header
|
# This is required to workaround a bug in pyld that has the Accept header
|
||||||
# accept other content types. From what I understand, precedence handling
|
# accept other content types. From what I understand, precedence handling
|
||||||
# is broken
|
# is broken
|
||||||
|
@ -52,7 +36,7 @@ def get_loader(*args, **kwargs):
|
||||||
|
|
||||||
def loader(url, options={}):
|
def loader(url, options={}):
|
||||||
options['headers']['Accept'] = 'application/ld+json'
|
options['headers']['Accept'] = 'application/ld+json'
|
||||||
with rc.enabled(cache_name='fed_cache', backend=backend):
|
with rc.enabled(cache_name='ld_cache', backend=get_requests_cache_backend('ld_cache')):
|
||||||
return requests_loader(url, options)
|
return requests_loader(url, options)
|
||||||
|
|
||||||
return loader
|
return loader
|
||||||
|
@ -63,8 +47,7 @@ jsonld.set_document_loader(get_loader())
|
||||||
def get_profile_or_entity(fid):
|
def get_profile_or_entity(fid):
|
||||||
obj = get_profile(fid=fid)
|
obj = get_profile(fid=fid)
|
||||||
if not obj:
|
if not obj:
|
||||||
with rc.enabled(cache_name='fed_cache', backend=backend):
|
obj = retrieve_and_parse_document(fid)
|
||||||
obj = retrieve_and_parse_document(fid)
|
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
||||||
|
@ -606,6 +589,7 @@ class Person(Object, base.Profile):
|
||||||
capabilities = CompactedDict(litepub.capabilities)
|
capabilities = CompactedDict(litepub.capabilities)
|
||||||
suspended = fields.Boolean(toot.suspended)
|
suspended = fields.Boolean(toot.suspended)
|
||||||
public = True
|
public = True
|
||||||
|
finger = None
|
||||||
_cached_inboxes = None
|
_cached_inboxes = None
|
||||||
_cached_public_key = None
|
_cached_public_key = None
|
||||||
_cached_image_urls = None
|
_cached_image_urls = None
|
||||||
|
@ -624,15 +608,18 @@ class Person(Object, base.Profile):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self._allowed_children += (PropertyValue, IdentityProof)
|
self._allowed_children += (PropertyValue, IdentityProof)
|
||||||
|
|
||||||
# Set handle to username@host if not provided by the platform
|
# Set finger to username@host if not provided by the platform
|
||||||
def post_receive(self):
|
def post_receive(self):
|
||||||
if not self.finger:
|
profile = get_profile(fid=self.id)
|
||||||
|
if getattr(profile, 'finger', None):
|
||||||
|
self.finger = profile.finger
|
||||||
|
else:
|
||||||
domain = urlparse(self.id).netloc
|
domain = urlparse(self.id).netloc
|
||||||
finger = f'{self.username.lower()}@{domain}'
|
finger = f'{self.username.lower()}@{domain}'
|
||||||
with rc.enabled(cache_name='fed_cache', backend=backend):
|
if get_profile_id_from_webfinger(finger) == self.id:
|
||||||
if get_profile_id_from_webfinger(finger) == self.id:
|
self.finger = finger
|
||||||
self.finger = finger
|
# multi-protocol platform
|
||||||
if self.guid and not self.handle:
|
if self.finger and self.guid and not self.handle:
|
||||||
self.handle = self.finger
|
self.handle = self.finger
|
||||||
|
|
||||||
def to_as2(self):
|
def to_as2(self):
|
||||||
|
@ -1269,8 +1256,8 @@ def extract_receivers(entity):
|
||||||
profile = None
|
profile = None
|
||||||
# don't care about receivers for payloads without an actor_id
|
# don't care about receivers for payloads without an actor_id
|
||||||
if getattr(entity, 'actor_id'):
|
if getattr(entity, 'actor_id'):
|
||||||
with rc.enabled(cache_name='fed_cache', backend=backend):
|
profile = get_profile(fid=entity.actor_id)
|
||||||
profile = retrieve_and_parse_profile(entity.actor_id)
|
if not profile: profile = retrieve_and_parse_profile(entity.actor_id)
|
||||||
if not profile: return receivers
|
if not profile: return receivers
|
||||||
|
|
||||||
for attr in ("to", "cc"):
|
for attr in ("to", "cc"):
|
||||||
|
|
|
@ -28,7 +28,8 @@ def retrieve_remote_content(
|
||||||
protocol_name = identify_protocol_by_id(id).PROTOCOL_NAME
|
protocol_name = identify_protocol_by_id(id).PROTOCOL_NAME
|
||||||
utils = importlib.import_module("federation.utils.%s" % protocol_name)
|
utils = importlib.import_module("federation.utils.%s" % protocol_name)
|
||||||
return utils.retrieve_and_parse_content(
|
return utils.retrieve_and_parse_content(
|
||||||
id=id, guid=guid, handle=handle, entity_type=entity_type, sender_key_fetcher=sender_key_fetcher,
|
id=id, guid=guid, handle=handle, entity_type=entity_type,
|
||||||
|
cache=cache, sender_key_fetcher=sender_key_fetcher,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -34,15 +34,15 @@ def get_profile_id_from_webfinger(handle: str) -> Optional[str]:
|
||||||
|
|
||||||
|
|
||||||
def retrieve_and_parse_content(**kwargs) -> Optional[Any]:
|
def retrieve_and_parse_content(**kwargs) -> Optional[Any]:
|
||||||
return retrieve_and_parse_document(kwargs.get("id"))
|
return retrieve_and_parse_document(kwargs.get("id"), cache=kwargs.get('cache',True))
|
||||||
|
|
||||||
|
|
||||||
def retrieve_and_parse_document(fid: str) -> Optional[Any]:
|
def retrieve_and_parse_document(fid: str, cache: bool=True) -> Optional[Any]:
|
||||||
"""
|
"""
|
||||||
Retrieve remote document by ID and return the entity.
|
Retrieve remote document by ID and return the entity.
|
||||||
"""
|
"""
|
||||||
from federation.entities.activitypub.models import element_to_objects # Circulars
|
from federation.entities.activitypub.models import element_to_objects # Circulars
|
||||||
document, status_code, ex = fetch_document(fid, extra_headers={'accept': 'application/activity+json'},
|
document, status_code, ex = fetch_document(fid, extra_headers={'accept': 'application/activity+json'}, cache=cache,
|
||||||
auth=get_http_authentication(federation_user.rsa_private_key,f'{federation_user.id}#main-key') if federation_user else None)
|
auth=get_http_authentication(federation_user.rsa_private_key,f'{federation_user.id}#main-key') if federation_user else None)
|
||||||
if document:
|
if document:
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -162,7 +162,8 @@ def parse_profile_from_hcard(hcard: str, handle: str):
|
||||||
|
|
||||||
|
|
||||||
def retrieve_and_parse_content(
|
def retrieve_and_parse_content(
|
||||||
id: str, guid: str, handle: str, entity_type: str, sender_key_fetcher: Callable[[str], str]=None):
|
id: str, guid: str, handle: str, entity_type: str, cache: bool=True,
|
||||||
|
sender_key_fetcher: Callable[[str], str]=None):
|
||||||
"""Retrieve remote content and return an Entity class instance.
|
"""Retrieve remote content and return an Entity class instance.
|
||||||
|
|
||||||
This is basically the inverse of receiving an entity. Instead, we fetch it, then call "handle_receive".
|
This is basically the inverse of receiving an entity. Instead, we fetch it, then call "handle_receive".
|
||||||
|
@ -175,7 +176,7 @@ def retrieve_and_parse_content(
|
||||||
return
|
return
|
||||||
_username, domain = handle.split("@")
|
_username, domain = handle.split("@")
|
||||||
url = get_fetch_content_endpoint(domain, entity_type.lower(), guid)
|
url = get_fetch_content_endpoint(domain, entity_type.lower(), guid)
|
||||||
document, status_code, error = fetch_document(url)
|
document, status_code, error = fetch_document(url, cache=cache)
|
||||||
if status_code == 200:
|
if status_code == 200:
|
||||||
request = RequestType(body=document)
|
request = RequestType(body=document)
|
||||||
_sender, _protocol, entities = handle_receive(request, sender_key_fetcher=sender_key_fetcher)
|
_sender, _protocol, entities = handle_receive(request, sender_key_fetcher=sender_key_fetcher)
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import importlib
|
import importlib
|
||||||
|
from requests_cache import RedisCache, SQLiteCache
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.exceptions import ImproperlyConfigured
|
from django.core.exceptions import ImproperlyConfigured
|
||||||
|
@ -59,3 +60,11 @@ def get_federation_user():
|
||||||
|
|
||||||
return UserType(id=config['federation_id'], private_key=key)
|
return UserType(id=config['federation_id'], private_key=key)
|
||||||
|
|
||||||
|
def get_requests_cache_backend(namespace):
|
||||||
|
"""
|
||||||
|
Use RedisCache is available, else fallback to SQLiteCache
|
||||||
|
"""
|
||||||
|
config = get_configuration()
|
||||||
|
if not config.get('redis'): return SQLiteCache()
|
||||||
|
|
||||||
|
return RedisCache(namespace, **config['redis'])
|
||||||
|
|
|
@ -8,30 +8,34 @@ from urllib.parse import quote
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
from requests_cache import CachedSession, DO_NOT_CACHE
|
||||||
from requests.exceptions import RequestException, HTTPError, SSLError
|
from requests.exceptions import RequestException, HTTPError, SSLError
|
||||||
from requests.exceptions import ConnectionError
|
from requests.exceptions import ConnectionError
|
||||||
from requests.structures import CaseInsensitiveDict
|
from requests.structures import CaseInsensitiveDict
|
||||||
|
|
||||||
from federation import __version__
|
from federation import __version__
|
||||||
|
from federation.utils.django import get_requests_cache_backend
|
||||||
|
|
||||||
logger = logging.getLogger("federation")
|
logger = logging.getLogger("federation")
|
||||||
|
|
||||||
USER_AGENT = "python/federation/%s" % __version__
|
USER_AGENT = "python/federation/%s" % __version__
|
||||||
|
|
||||||
|
session = CachedSession('fed_cache', backend=get_requests_cache_backend('fed_cache'))
|
||||||
|
EXPIRATION = datetime.timedelta(hours=2)
|
||||||
|
|
||||||
def fetch_content_type(url: str) -> Optional[str]:
|
def fetch_content_type(url: str) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
Fetch the HEAD of the remote url to determine the content type.
|
Fetch the HEAD of the remote url to determine the content type.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
response = requests.head(url, headers={'user-agent': USER_AGENT}, timeout=10)
|
response = session.head(url, headers={'user-agent': USER_AGENT}, timeout=10)
|
||||||
except RequestException as ex:
|
except RequestException as ex:
|
||||||
logger.warning("fetch_content_type - %s when fetching url %s", ex, url)
|
logger.warning("fetch_content_type - %s when fetching url %s", ex, url)
|
||||||
else:
|
else:
|
||||||
return response.headers.get('Content-Type')
|
return response.headers.get('Content-Type')
|
||||||
|
|
||||||
|
|
||||||
def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=True, extra_headers=None, **kwargs):
|
def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=True, extra_headers=None, cache=True, **kwargs):
|
||||||
"""Helper method to fetch remote document.
|
"""Helper method to fetch remote document.
|
||||||
|
|
||||||
Must be given either the ``url`` or ``host``.
|
Must be given either the ``url`` or ``host``.
|
||||||
|
@ -60,7 +64,8 @@ def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=T
|
||||||
# Use url since it was given
|
# Use url since it was given
|
||||||
logger.debug("fetch_document: trying %s", url)
|
logger.debug("fetch_document: trying %s", url)
|
||||||
try:
|
try:
|
||||||
response = requests.get(url, timeout=timeout, headers=headers, **kwargs)
|
response = session.get(url, timeout=timeout, headers=headers,
|
||||||
|
expire_after=EXPIRATION if cache else DO_NOT_CACHE, **kwargs)
|
||||||
logger.debug("fetch_document: found document, code %s", response.status_code)
|
logger.debug("fetch_document: found document, code %s", response.status_code)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response.text, response.status_code, None
|
return response.text, response.status_code, None
|
||||||
|
@ -73,7 +78,7 @@ def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=T
|
||||||
url = "https://%s%s" % (host_string, path_string)
|
url = "https://%s%s" % (host_string, path_string)
|
||||||
logger.debug("fetch_document: trying %s", url)
|
logger.debug("fetch_document: trying %s", url)
|
||||||
try:
|
try:
|
||||||
response = requests.get(url, timeout=timeout, headers=headers)
|
response = session.get(url, timeout=timeout, headers=headers)
|
||||||
logger.debug("fetch_document: found document, code %s", response.status_code)
|
logger.debug("fetch_document: found document, code %s", response.status_code)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response.text, response.status_code, None
|
return response.text, response.status_code, None
|
||||||
|
@ -85,7 +90,7 @@ def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=T
|
||||||
url = url.replace("https://", "http://")
|
url = url.replace("https://", "http://")
|
||||||
logger.debug("fetch_document: trying %s", url)
|
logger.debug("fetch_document: trying %s", url)
|
||||||
try:
|
try:
|
||||||
response = requests.get(url, timeout=timeout, headers=headers)
|
response = session.get(url, timeout=timeout, headers=headers)
|
||||||
logger.debug("fetch_document: found document, code %s", response.status_code)
|
logger.debug("fetch_document: found document, code %s", response.status_code)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response.text, response.status_code, None
|
return response.text, response.status_code, None
|
||||||
|
@ -116,7 +121,7 @@ def fetch_file(url: str, timeout: int = 30, extra_headers: Dict = None) -> str:
|
||||||
headers = {'user-agent': USER_AGENT}
|
headers = {'user-agent': USER_AGENT}
|
||||||
if extra_headers:
|
if extra_headers:
|
||||||
headers.update(extra_headers)
|
headers.update(extra_headers)
|
||||||
response = requests.get(url, timeout=timeout, headers=headers, stream=True)
|
response = session.get(url, timeout=timeout, headers=headers, stream=True)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
name = f"/tmp/{str(uuid4())}"
|
name = f"/tmp/{str(uuid4())}"
|
||||||
with open(name, "wb") as f:
|
with open(name, "wb") as f:
|
||||||
|
|
Ładowanie…
Reference in New Issue