AP inbox delivery: cache activity ids that we've already seen in memory

for #411
pull/424/head
Ryan Barrett 2023-02-12 22:17:04 -08:00
rodzic 7a625c5a02
commit c305dcc8d5
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 6BE31FDF4776E9D4
2 zmienionych plików z 19 dodań i 3 usunięć

Wyświetl plik

@ -3,7 +3,9 @@
import datetime
import logging
import re
import threading
from cachetools import LRUCache
from flask import request
from google.cloud import ndb
from google.cloud.ndb import OR
@ -38,6 +40,10 @@ FETCH_OBJECT_TYPES = (
'Announce',
)
# activity ids that we've already handled and can now ignore
seen_ids = LRUCache(100000)
seen_ids_lock = threading.Lock()
@app.get(f'/<regex("{common.DOMAIN_RE}"):domain>')
@flask_util.cached(cache, CACHE_TIME, http_5xx=True)
@ -92,13 +98,17 @@ def inbox(domain=None):
error('Activity has no id')
# short circuit if we've already seen this activity id
#
with seen_ids_lock:
if id in seen_ids:
error(f'Already handled this activity {id}', status=204)
# (theoretically querying keys-only with a key == filter should be the same
# query plan as get_by_id(), and slightly cheaper, since it doesn't have to
# return the properties?)
if Object.query(Object.key == ndb.Key(Object, id)).get(keys_only=True):
logger.info("Already handled this activity {id}")
return '', 204
with seen_ids_lock:
seen_ids[id] = True
error(f'Already handled this activity {id}', status=204)
activity_as1 = as2.to_as1(activity)
as1_type = as1.object_type(activity_as1)

Wyświetl plik

@ -844,6 +844,12 @@ class ActivityPubTest(testutil.TestCase):
self.assertEqual(204, got.status_code)
self.assertEqual(0, Follower.query().count())
# second time should use in memory cache
obj_key.delete()
got = self.client.post('/foo.com/inbox', json=FOLLOW_WRAPPED)
self.assertEqual(204, got.status_code)
self.assertEqual(0, Follower.query().count())
def test_followers_collection_unknown_user(self, *args):
resp = self.client.get('/nope.com/followers')
self.assertEqual(404, resp.status_code)