Start support for pruning old inbox data

v2
Thomas Sileo 2022-08-18 23:48:00 +02:00
rodzic 08618c3c72
commit 0ffacca796
3 zmienionych plików z 89 dodań i 0 usunięć

Wyświetl plik

@ -72,6 +72,8 @@ class Config(pydantic.BaseModel):
code_highlighting_theme = "friendly_grayscale"
blocked_servers: list[_BlockedServer] = []
inbox_retention_days: int = 15
# Config items to make tests easier
sqlalchemy_database: str | None = None
key_path: str | None = None
@ -118,6 +120,8 @@ if CONFIG.privacy_replace:
BLOCKED_SERVERS = {blocked_server.hostname for blocked_server in CONFIG.blocked_servers}
INBOX_RETENTION_DAYS = CONFIG.inbox_retention_days
BASE_URL = ID
DEBUG = CONFIG.debug
DB_PATH = CONFIG.sqlalchemy_database or ROOT_DIR / "data" / "microblogpub.db"

77
app/prune.py 100644
Wyświetl plik

@ -0,0 +1,77 @@
from datetime import timedelta
from loguru import logger
from sqlalchemy import and_
from sqlalchemy import delete
from sqlalchemy import not_
from app import activitypub as ap
from app import models
from app.config import BASE_URL
from app.config import INBOX_RETENTION_DAYS
from app.database import AsyncSession
from app.database import async_session
from app.utils.datetime import now
async def prune_old_data(
db_session: AsyncSession,
) -> None:
logger.info(f"Pruning old data with {INBOX_RETENTION_DAYS=}")
await _prune_old_incoming_activities(db_session)
await _prune_old_inbox_objects(db_session)
await db_session.commit()
# Reclaim disk space
await db_session.execute("VACUUM") # type: ignore
async def _prune_old_incoming_activities(
db_session: AsyncSession,
) -> None:
result = await db_session.execute(
delete(models.IncomingActivity)
.where(
models.IncomingActivity.created_at
< now() - timedelta(days=INBOX_RETENTION_DAYS),
# Keep failed activity for debug
models.IncomingActivity.is_errored.is_(False),
)
.execution_options(synchronize_session=False)
)
logger.info(f"Deleted {result.rowcount} old incoming activities") # type: ignore
async def _prune_old_inbox_objects(
db_session: AsyncSession,
) -> None:
result = await db_session.execute(
delete(models.InboxObject)
.where(
# Keep bookmarked objects
models.InboxObject.is_bookmarked.is_(False),
# Keep liked objects
models.InboxObject.liked_via_outbox_object_ap_id.is_(None),
# Keep announced objects
models.InboxObject.announced_via_outbox_object_ap_id.is_(None),
# Keep objects related to local conversations
models.InboxObject.conversation.not_like(f"{BASE_URL}/%"),
# Keep direct messages
not_(
and_(
models.InboxObject.visibility == ap.VisibilityEnum.DIRECT,
models.InboxObject.ap_type.in_(["Note"]),
)
),
# Filter by retention days
models.InboxObject.ap_published_at
< now() - timedelta(days=INBOX_RETENTION_DAYS),
)
.execution_options(synchronize_session=False)
)
logger.info(f"Deleted {result.rowcount} old inbox objects") # type: ignore
async def run_prune_old_data() -> None:
async with async_session() as db_session:
await prune_old_data(db_session)

Wyświetl plik

@ -181,3 +181,11 @@ def build_docker_image(ctx):
# type: (Context) -> None
with embed_version():
run("docker build -t microblogpub/microblogpub .")
@task
def prune_old_data(ctx):
# type: (Context) -> None
from app.prune import run_prune_old_data
asyncio.run(run_prune_old_data())