add Object.expire property for datastore TTL to garbage collect old Objects

right now, auto-delete some ephemeral activities after 90d. for #456. https://cloud.google.com/datastore/docs/ttl
pull/489/head
Ryan Barrett 2023-04-18 09:08:45 -07:00
rodzic 24b6a985a1
commit b39d673f12
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 6BE31FDF4776E9D4
3 zmienionych plików z 32 dodań i 6 usunięć

Wyświetl plik

@ -1,6 +1,6 @@
"""Datastore model classes.""" """Datastore model classes."""
import base64 import base64
from datetime import timezone from datetime import timedelta, timezone
import difflib import difflib
import itertools import itertools
import logging import logging
@ -36,6 +36,19 @@ PROTOCOLS = ('activitypub', 'bluesky', 'ostatus', 'webmention', 'ui')
KEY_BITS = 1024 if DEBUG else 2048 KEY_BITS = 1024 if DEBUG else 2048
PAGE_SIZE = 20 PAGE_SIZE = 20
# auto delete old objects of these types via the Object.expire property
# https://cloud.google.com/datastore/docs/ttl
OBJECT_EXPIRE_TYPES = (
'post',
'update',
'delete',
'accept',
'reject',
'undo',
None
)
OBJECT_EXPIRE_AGE = timedelta(days=90)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -331,7 +344,6 @@ class Object(StringIdModel):
def _object_ids(self): # id(s) of inner objects def _object_ids(self): # id(s) of inner objects
if self.as1: if self.as1:
return common.redirect_unwrap(as1.get_ids(self.as1, 'object')) return common.redirect_unwrap(as1.get_ids(self.as1, 'object'))
object_ids = ndb.ComputedProperty(_object_ids, repeated=True) object_ids = ndb.ComputedProperty(_object_ids, repeated=True)
deleted = ndb.BooleanProperty() deleted = ndb.BooleanProperty()
@ -343,6 +355,16 @@ class Object(StringIdModel):
created = ndb.DateTimeProperty(auto_now_add=True) created = ndb.DateTimeProperty(auto_now_add=True)
updated = ndb.DateTimeProperty(auto_now=True) updated = ndb.DateTimeProperty(auto_now=True)
# For certain types, automatically delete this Object after 90d using a
# TTL policy:
# https://cloud.google.com/datastore/docs/ttl#ttl_properties_and_indexes
# They recommend not indexing TTL properties:
# https://cloud.google.com/datastore/docs/ttl#ttl_properties_and_indexes
def _expire(self):
if self.type in OBJECT_EXPIRE_TYPES:
return (self.updated or util.now()) + OBJECT_EXPIRE_AGE
expire = ndb.ComputedProperty(_expire, indexed=False)
def _pre_put_hook(self): def _pre_put_hook(self):
assert '^^' not in self.key.id() assert '^^' not in self.key.id()

Wyświetl plik

@ -4,11 +4,11 @@ from unittest import mock
from flask import g, get_flashed_messages from flask import g, get_flashed_messages
from granary import as2 from granary import as2
from oauth_dropins.webutil.testutil import requests_response from oauth_dropins.webutil.testutil import NOW, requests_response
from app import app from app import app
import common import common
from models import Follower, Object, User from models import Follower, Object, OBJECT_EXPIRE_AGE, User
import protocol import protocol
from protocol import Protocol from protocol import Protocol
from . import testutil from . import testutil
@ -329,6 +329,10 @@ class ObjectTest(testutil.TestCase):
def test_computed_properties_without_as1(self): def test_computed_properties_without_as1(self):
Object(id='a').put() Object(id='a').put()
def test_expire(self):
obj = Object(id='a', our_as1={'objectType': 'activity', 'verb': 'update'})
self.assertEqual(NOW + OBJECT_EXPIRE_AGE, obj.expire)
def test_put_adds_removes_activity_label(self): def test_put_adds_removes_activity_label(self):
obj = Object(id='x#y', our_as1={}) obj = Object(id='x#y', our_as1={})
obj.put() obj.put()

Wyświetl plik

@ -182,8 +182,8 @@ class TestCase(unittest.TestCase, testutil.Asserts):
got.mf2.pop('url', None) got.mf2.pop('url', None)
self.assert_entities_equal(Object(id=id, **props), got, self.assert_entities_equal(Object(id=id, **props), got,
ignore=['as1', 'created', 'object_ids', ignore=['as1', 'created', 'expire',
'type', 'updated']) 'object_ids', 'type', 'updated'])
def assert_equals(self, expected, actual, msg=None, ignore=(), **kwargs): def assert_equals(self, expected, actual, msg=None, ignore=(), **kwargs):
return super().assert_equals( return super().assert_equals(