From e463c80250941a5489d1edf53c6951218281ac45 Mon Sep 17 00:00:00 2001 From: Jake Howard Date: Mon, 23 Jan 2023 17:08:19 +0000 Subject: [PATCH] Prevent memory exhaustion when running wagtail_update_image_renditions with many renditions This reduces the memory overhead of the command, allowing it to be run on sites with lots of images. Whilst that may reduce performance, I've also added a `select_related` and massively simplified the `purge_only` path to counter --- CHANGELOG.txt | 1 + docs/releases/5.1.md | 1 + .../wagtail_update_image_renditions.py | 86 ++++++++++++------- .../images/tests/test_management_commands.py | 44 ++++++---- 4 files changed, 84 insertions(+), 48 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index a846cef165..fb1590f91d 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -42,6 +42,7 @@ Changelog * Fix: Avoid `ValueError` when extending `PagesAPIViewSet` and setting `meta_fields` to an empty list (Henry Harutyunyan, Alex Morega) * Fix: Improve accessibility for header search, remove autofocus on page load, advise screen readers that content has changed when results update (LB (Ben) Johnston) * Fix: Fix incorrect override of `PagePermissionHelper.user_can_unpublish_obj()` in ModelAdmin (Sébastien Corbin) + * Fix: Prevent memory exhaustion when updating a large number of image renditions (Jake Howard) * Docs: Document how to add non-ModelAdmin views to a `ModelAdminGroup` (Onno Timmerman) * Docs: Document how to add StructBlock data to a StreamField (Ramon Wenger) * Docs: Update ReadTheDocs settings to v2 to resolve urllib3 issue in linkcheck extension (Thibaud Colas) diff --git a/docs/releases/5.1.md b/docs/releases/5.1.md index 7ec6e5d7cf..b51d1cc9d4 100644 --- a/docs/releases/5.1.md +++ b/docs/releases/5.1.md @@ -76,6 +76,7 @@ The `wagtail start` command now supports an optional `--template` argument that * Avoid `ValueError` when extending `PagesAPIViewSet` and setting `meta_fields` to an empty list (Henry Harutyunyan, Alex Morega) * Improve accessibility for header search, remove autofocus on page load, advise screen readers that content has changed when results update (LB (Ben) Johnston) * Fix incorrect override of `PagePermissionHelper.user_can_unpublish_obj()` in ModelAdmin (Sébastien Corbin) + * Prevent memory exhaustion when updating a large number of image renditions (Jake Howard) ### Documentation diff --git a/wagtail/images/management/commands/wagtail_update_image_renditions.py b/wagtail/images/management/commands/wagtail_update_image_renditions.py index d7580560a7..3d75960023 100644 --- a/wagtail/images/management/commands/wagtail_update_image_renditions.py +++ b/wagtail/images/management/commands/wagtail_update_image_renditions.py @@ -1,7 +1,12 @@ +import logging + from django.core.management.base import BaseCommand +from django.db import transaction from wagtail.images import get_image_model +logger = logging.getLogger(__name__) + class Command(BaseCommand): """Command to create missing image renditions with the option to remove (purge) any existing ones.""" @@ -14,43 +19,66 @@ class Command(BaseCommand): action="store_true", help="Purge all image renditions without regenerating them", ) + parser.add_argument( + "--chunk-size", + type=int, + default=50, + help="Operate in x size chunks (default: %(default)s)", + ) def handle(self, *args, **options): - renditions = get_image_model().get_rendition_model().objects.all() - if len(renditions) == 0: - self.stdout.write("No image renditions found.") + Rendition = get_image_model().get_rendition_model() + + renditions = Rendition.objects.all() + + purge_only = options["purge_only"] + + if not renditions.exists(): + self.stdout.write(self.style.WARNING("No image renditions found.")) return - success_count = 0 - if options["purge_only"]: - for rendition in renditions: - try: + rendition_ids = list(renditions.values_list("id", flat=True)) + num_renditions = len(rendition_ids) + + if purge_only: + self.stdout.write( + self.style.HTTP_INFO(f"Purging {num_renditions} rendition(s)") + ) + else: + self.stdout.write( + self.style.HTTP_INFO(f"Regenerating {num_renditions} rendition(s)") + ) + + for rendition in ( + # Pre-calculate the ids of the renditions to change, + # otherwise `.iterator` never ends. + renditions.filter(id__in=rendition_ids) + .select_related("image") + .iterator(chunk_size=options["chunk_size"]) + ): + try: + with transaction.atomic(): + rendition_filter = rendition.filter rendition_image = rendition.image + + # Delete the existing rendition rendition.delete() - success_count = success_count + 1 - except Exception: # noqa: BLE001 - self.stderr.write( - f"Could not purge rendition for {rendition_image.title}" - ) + + if not purge_only: + # Create a new one + rendition_image.get_rendition(rendition_filter) + except: # noqa:E722 + logger.exception("Error operating on rendition %d", rendition.id) + self.stderr.write( + self.style.ERROR(f"Failed to operate on rendition {rendition.id}") + ) + num_renditions -= 1 + + if num_renditions: self.stdout.write( self.style.SUCCESS( - f"Successfully purged {success_count} image rendition(s)" + f"Successfully processed {num_renditions} rendition(s)" ) ) else: - for rendition in renditions: - try: - rendition_filter = rendition.filter - rendition_image = rendition.image - rendition.delete() - rendition_image.get_rendition(rendition_filter) - success_count = success_count + 1 - except Exception: # noqa: BLE001 - self.stderr.write( - f"Could not regenerate rendition for {rendition_image.title}" - ) - self.stdout.write( - self.style.SUCCESS( - f"Successfully regenerated {success_count} image rendition(s)" - ) - ) + self.stdout.write(self.style.WARNING("Could not process any renditions.")) diff --git a/wagtail/images/tests/test_management_commands.py b/wagtail/images/tests/test_management_commands.py index 8f7de2252f..117cbe3fd8 100644 --- a/wagtail/images/tests/test_management_commands.py +++ b/wagtail/images/tests/test_management_commands.py @@ -5,20 +5,22 @@ from io import StringIO from django.core import management from django.test import TestCase -from wagtail.images import get_image_model - from .utils import Image, get_test_image_file +# note .utils.Image already does get_image_model() +Rendition = Image.get_rendition_model() + class TestUpdateImageRenditions(TestCase): - def setUp(self): - self.image = Image.objects.create( + @classmethod + def setUpTestData(cls): + cls.image = Image.objects.create( title="Test image", file=get_test_image_file(filename="test_image.png", colour="white"), ) - self.rendition = Image.get_rendition_model().objects.create( - image=self.image, + cls.rendition = Rendition.objects.create( + image=cls.image, filter_spec="original", width=1000, height=1000, @@ -27,8 +29,10 @@ class TestUpdateImageRenditions(TestCase): ), ) + cls.reaesc = re.compile(r"\x1b[^m]*m") + def delete_renditions(self): - renditions = get_image_model().get_rendition_model().objects.all() + renditions = Rendition.objects.all() for rendition in renditions: try: rendition_image = rendition.image @@ -49,42 +53,44 @@ class TestUpdateImageRenditions(TestCase): self.delete_renditions() # checking when command is called without any arguments output = self.run_command() - self.assertEqual(output.read(), "No image renditions found.\n") + output_string = self.reaesc.sub("", output.read()) + self.assertEqual(output_string, "No image renditions found.\n") # checking when command is called with '--purge-only' output = self.run_command(purge_only=True) - self.assertEqual(output.read(), "No image renditions found.\n") + output_string = self.reaesc.sub("", output.read()) + self.assertEqual(output_string, "No image renditions found.\n") def test_image_renditions(self): - renditions = get_image_model().get_rendition_model().objects.all() + renditions = Rendition.objects.all() total_renditions = len(renditions) output = self.run_command() - reaesc = re.compile(r"\x1b[^m]*m") - output_string = reaesc.sub("", output.read()) + output_string = self.reaesc.sub("", output.read()) # checking if the number of renditions regenerated equal total_renditions self.assertEqual( output_string, - f"Successfully regenerated {total_renditions} image rendition(s)\n", + f"Regenerating {total_renditions} rendition(s)\n" + f"Successfully processed {total_renditions} rendition(s)\n", ) # checking if the number of renditions now equal total_renditions - renditions_now = get_image_model().get_rendition_model().objects.all() + renditions_now = Rendition.objects.all() total_renditions_now = len(renditions_now) self.assertEqual(total_renditions_now, total_renditions) def test_image_renditions_with_purge_only(self): - renditions = get_image_model().get_rendition_model().objects.all() + renditions = Rendition.objects.all() total_renditions = len(renditions) output = self.run_command(purge_only=True) - reaesc = re.compile(r"\x1b[^m]*m") - output_string = reaesc.sub("", output.read()) + output_string = self.reaesc.sub("", output.read()) # checking if the number of renditions purged equal total_renditions self.assertEqual( output_string, - f"Successfully purged {total_renditions} image rendition(s)\n", + f"Purging {total_renditions} rendition(s)\n" + f"Successfully processed {total_renditions} rendition(s)\n", ) # checking if the number of renditions now equal 0 - renditions_now = get_image_model().get_rendition_model().objects.all() + renditions_now = Rendition.objects.all() total_renditions_now = len(renditions_now) self.assertEqual(total_renditions_now, 0)