From 74f69a3813b95142c3ee14e2baacbebd9700af6b Mon Sep 17 00:00:00 2001 From: Andrew Godwin Date: Sun, 12 Nov 2023 18:01:01 -0700 Subject: [PATCH] Add identity pruning, improve post pruning --- activities/management/commands/pruneposts.py | 18 +++++-- takahe/settings.py | 6 +-- users/management/__init__.py | 0 users/management/commands/__init__.py | 0 users/management/commands/pruneidentities.py | 52 ++++++++++++++++++++ 5 files changed, 70 insertions(+), 6 deletions(-) create mode 100644 users/management/__init__.py create mode 100644 users/management/commands/__init__.py create mode 100644 users/management/commands/pruneidentities.py diff --git a/activities/management/commands/pruneposts.py b/activities/management/commands/pruneposts.py index c405c0c..8c3667c 100644 --- a/activities/management/commands/pruneposts.py +++ b/activities/management/commands/pruneposts.py @@ -3,6 +3,7 @@ import sys from django.conf import settings from django.core.management.base import BaseCommand +from django.db.models import Q from django.utils import timezone from activities.models import Post @@ -21,13 +22,21 @@ class Command(BaseCommand): ) def handle(self, number: int, *args, **options): + if not settings.SETUP.REMOTE_PRUNE_HORIZON: + print("Pruning has been disabled as REMOTE_PRUNE_HORIZON=0") + sys.exit(2) # Find a set of posts that match the initial criteria print(f"Running query to find up to {number} old posts...") posts = Post.objects.filter( local=False, created__lt=timezone.now() - datetime.timedelta(days=settings.SETUP.REMOTE_PRUNE_HORIZON), - ).exclude(interactions__identity__local=True)[:number] + ).exclude( + Q(interactions__identity__local=True) + | Q(visibility=Post.Visibilities.mentioned) + )[ + :number + ] post_ids_and_uris = dict(posts.values_list("object_uri", "id")) print(f" found {len(post_ids_and_uris)}") @@ -43,9 +52,12 @@ class Command(BaseCommand): # Delete them print(f" down to {len(post_ids_and_uris)} to delete") - number_deleted, _ = Post.objects.filter( + print("Deleting...") + number_deleted, deleted = Post.objects.filter( id__in=post_ids_and_uris.values() ).delete() - print(f"Deleted {number_deleted} posts and dependencies") + print("Deleted:") + for model, model_deleted in deleted.items(): + print(f" {model}: {model_deleted}") if number_deleted == 0: sys.exit(1) diff --git a/takahe/settings.py b/takahe/settings.py index f9a422d..a2724f1 100644 --- a/takahe/settings.py +++ b/takahe/settings.py @@ -143,9 +143,9 @@ class Settings(BaseSettings): CACHES_DEFAULT: CacheBackendUrl | None = None # How long to wait, in days, until remote posts/profiles are pruned from - # our database if nobody local has interacted with them. Must be in rough - # multiples of two weeks. Set to zero to disable. - REMOTE_PRUNE_HORIZON: int = 0 + # our database if nobody local has interacted with them. + # Set to zero to disable. + REMOTE_PRUNE_HORIZON: int = 90 # Stator tuning STATOR_CONCURRENCY: int = 50 diff --git a/users/management/__init__.py b/users/management/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/users/management/commands/__init__.py b/users/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/users/management/commands/pruneidentities.py b/users/management/commands/pruneidentities.py new file mode 100644 index 0000000..7bf0a55 --- /dev/null +++ b/users/management/commands/pruneidentities.py @@ -0,0 +1,52 @@ +import sys + +from django.conf import settings +from django.core.management.base import BaseCommand +from django.db.models import Q +from django.utils import timezone + +from users.models import Identity + + +class Command(BaseCommand): + help = "Prunes identities that have no local interaction" + + def add_arguments(self, parser): + parser.add_argument( + "--number", + "-n", + type=int, + default=1000, + help="The maximum number of identities to prune at once", + ) + + def handle(self, number: int, *args, **options): + if not settings.SETUP.REMOTE_PRUNE_HORIZON: + print("Pruning has been disabled as REMOTE_PRUNE_HORIZON=0") + sys.exit(2) + # Find a set of identities that match the initial criteria + print(f"Running query to find up to {number} unused identities...") + identities = Identity.objects.filter( + local=False, + created__lt=timezone.now(), + ).exclude( + Q(interactions__post__local=True) + | Q(posts__isnull=False) + | Q(outbound_follows__isnull=False) + | Q(inbound_follows__isnull=False) + | Q(outbound_blocks__isnull=False) + | Q(inbound_blocks__isnull=False) + )[ + :number + ] + identity_ids = identities.values_list("id", flat=True) + print(f" found {len(identity_ids)}") + + # Delete them + print("Deleting...") + number_deleted, deleted = Identity.objects.filter(id__in=identity_ids).delete() + print("Deleted:") + for model, model_deleted in deleted.items(): + print(f" {model}: {model_deleted}") + if number_deleted == 0: + sys.exit(1)