diff --git a/activitypub.py b/activitypub.py index cf563ba..9e6e904 100644 --- a/activitypub.py +++ b/activitypub.py @@ -3,7 +3,9 @@ import datetime import logging import re +import threading +from cachetools import LRUCache from flask import request from google.cloud import ndb from google.cloud.ndb import OR @@ -38,6 +40,10 @@ FETCH_OBJECT_TYPES = ( 'Announce', ) +# activity ids that we've already handled and can now ignore +seen_ids = LRUCache(100000) +seen_ids_lock = threading.Lock() + @app.get(f'/') @flask_util.cached(cache, CACHE_TIME, http_5xx=True) @@ -92,13 +98,17 @@ def inbox(domain=None): error('Activity has no id') # short circuit if we've already seen this activity id - # + with seen_ids_lock: + if id in seen_ids: + error(f'Already handled this activity {id}', status=204) + # (theoretically querying keys-only with a key == filter should be the same # query plan as get_by_id(), and slightly cheaper, since it doesn't have to # return the properties?) if Object.query(Object.key == ndb.Key(Object, id)).get(keys_only=True): - logger.info("Already handled this activity {id}") - return '', 204 + with seen_ids_lock: + seen_ids[id] = True + error(f'Already handled this activity {id}', status=204) activity_as1 = as2.to_as1(activity) as1_type = as1.object_type(activity_as1) diff --git a/tests/test_activitypub.py b/tests/test_activitypub.py index f00968f..67a36e6 100644 --- a/tests/test_activitypub.py +++ b/tests/test_activitypub.py @@ -844,6 +844,12 @@ class ActivityPubTest(testutil.TestCase): self.assertEqual(204, got.status_code) self.assertEqual(0, Follower.query().count()) + # second time should use in memory cache + obj_key.delete() + got = self.client.post('/foo.com/inbox', json=FOLLOW_WRAPPED) + self.assertEqual(204, got.status_code) + self.assertEqual(0, Follower.query().count()) + def test_followers_collection_unknown_user(self, *args): resp = self.client.get('/nope.com/followers') self.assertEqual(404, resp.status_code)