Prevent memory exhaustion when running wagtail_update_image_renditions with many renditions

This reduces the memory overhead of the command, allowing it to be run on sites with lots of images.
Whilst that may reduce performance, I've also added a `select_related` and massively simplified the
`purge_only` path to counter
pull/10666/head
Jake Howard 2023-01-23 17:08:19 +00:00 zatwierdzone przez zerolab
rodzic 366e7f0153
commit e463c80250
Nie znaleziono w bazie danych klucza dla tego podpisu
4 zmienionych plików z 84 dodań i 48 usunięć

Wyświetl plik

@ -42,6 +42,7 @@ Changelog
* Fix: Avoid `ValueError` when extending `PagesAPIViewSet` and setting `meta_fields` to an empty list (Henry Harutyunyan, Alex Morega)
* Fix: Improve accessibility for header search, remove autofocus on page load, advise screen readers that content has changed when results update (LB (Ben) Johnston)
* Fix: Fix incorrect override of `PagePermissionHelper.user_can_unpublish_obj()` in ModelAdmin (Sébastien Corbin)
* Fix: Prevent memory exhaustion when updating a large number of image renditions (Jake Howard)
* Docs: Document how to add non-ModelAdmin views to a `ModelAdminGroup` (Onno Timmerman)
* Docs: Document how to add StructBlock data to a StreamField (Ramon Wenger)
* Docs: Update ReadTheDocs settings to v2 to resolve urllib3 issue in linkcheck extension (Thibaud Colas)

Wyświetl plik

@ -76,6 +76,7 @@ The `wagtail start` command now supports an optional `--template` argument that
* Avoid `ValueError` when extending `PagesAPIViewSet` and setting `meta_fields` to an empty list (Henry Harutyunyan, Alex Morega)
* Improve accessibility for header search, remove autofocus on page load, advise screen readers that content has changed when results update (LB (Ben) Johnston)
* Fix incorrect override of `PagePermissionHelper.user_can_unpublish_obj()` in ModelAdmin (Sébastien Corbin)
* Prevent memory exhaustion when updating a large number of image renditions (Jake Howard)
### Documentation

Wyświetl plik

@ -1,7 +1,12 @@
import logging
from django.core.management.base import BaseCommand
from django.db import transaction
from wagtail.images import get_image_model
logger = logging.getLogger(__name__)
class Command(BaseCommand):
"""Command to create missing image renditions with the option to remove (purge) any existing ones."""
@ -14,43 +19,66 @@ class Command(BaseCommand):
action="store_true",
help="Purge all image renditions without regenerating them",
)
parser.add_argument(
"--chunk-size",
type=int,
default=50,
help="Operate in x size chunks (default: %(default)s)",
)
def handle(self, *args, **options):
renditions = get_image_model().get_rendition_model().objects.all()
if len(renditions) == 0:
self.stdout.write("No image renditions found.")
Rendition = get_image_model().get_rendition_model()
renditions = Rendition.objects.all()
purge_only = options["purge_only"]
if not renditions.exists():
self.stdout.write(self.style.WARNING("No image renditions found."))
return
success_count = 0
if options["purge_only"]:
for rendition in renditions:
try:
rendition_ids = list(renditions.values_list("id", flat=True))
num_renditions = len(rendition_ids)
if purge_only:
self.stdout.write(
self.style.HTTP_INFO(f"Purging {num_renditions} rendition(s)")
)
else:
self.stdout.write(
self.style.HTTP_INFO(f"Regenerating {num_renditions} rendition(s)")
)
for rendition in (
# Pre-calculate the ids of the renditions to change,
# otherwise `.iterator` never ends.
renditions.filter(id__in=rendition_ids)
.select_related("image")
.iterator(chunk_size=options["chunk_size"])
):
try:
with transaction.atomic():
rendition_filter = rendition.filter
rendition_image = rendition.image
# Delete the existing rendition
rendition.delete()
success_count = success_count + 1
except Exception: # noqa: BLE001
self.stderr.write(
f"Could not purge rendition for {rendition_image.title}"
)
if not purge_only:
# Create a new one
rendition_image.get_rendition(rendition_filter)
except: # noqa:E722
logger.exception("Error operating on rendition %d", rendition.id)
self.stderr.write(
self.style.ERROR(f"Failed to operate on rendition {rendition.id}")
)
num_renditions -= 1
if num_renditions:
self.stdout.write(
self.style.SUCCESS(
f"Successfully purged {success_count} image rendition(s)"
f"Successfully processed {num_renditions} rendition(s)"
)
)
else:
for rendition in renditions:
try:
rendition_filter = rendition.filter
rendition_image = rendition.image
rendition.delete()
rendition_image.get_rendition(rendition_filter)
success_count = success_count + 1
except Exception: # noqa: BLE001
self.stderr.write(
f"Could not regenerate rendition for {rendition_image.title}"
)
self.stdout.write(
self.style.SUCCESS(
f"Successfully regenerated {success_count} image rendition(s)"
)
)
self.stdout.write(self.style.WARNING("Could not process any renditions."))

Wyświetl plik

@ -5,20 +5,22 @@ from io import StringIO
from django.core import management
from django.test import TestCase
from wagtail.images import get_image_model
from .utils import Image, get_test_image_file
# note .utils.Image already does get_image_model()
Rendition = Image.get_rendition_model()
class TestUpdateImageRenditions(TestCase):
def setUp(self):
self.image = Image.objects.create(
@classmethod
def setUpTestData(cls):
cls.image = Image.objects.create(
title="Test image",
file=get_test_image_file(filename="test_image.png", colour="white"),
)
self.rendition = Image.get_rendition_model().objects.create(
image=self.image,
cls.rendition = Rendition.objects.create(
image=cls.image,
filter_spec="original",
width=1000,
height=1000,
@ -27,8 +29,10 @@ class TestUpdateImageRenditions(TestCase):
),
)
cls.reaesc = re.compile(r"\x1b[^m]*m")
def delete_renditions(self):
renditions = get_image_model().get_rendition_model().objects.all()
renditions = Rendition.objects.all()
for rendition in renditions:
try:
rendition_image = rendition.image
@ -49,42 +53,44 @@ class TestUpdateImageRenditions(TestCase):
self.delete_renditions()
# checking when command is called without any arguments
output = self.run_command()
self.assertEqual(output.read(), "No image renditions found.\n")
output_string = self.reaesc.sub("", output.read())
self.assertEqual(output_string, "No image renditions found.\n")
# checking when command is called with '--purge-only'
output = self.run_command(purge_only=True)
self.assertEqual(output.read(), "No image renditions found.\n")
output_string = self.reaesc.sub("", output.read())
self.assertEqual(output_string, "No image renditions found.\n")
def test_image_renditions(self):
renditions = get_image_model().get_rendition_model().objects.all()
renditions = Rendition.objects.all()
total_renditions = len(renditions)
output = self.run_command()
reaesc = re.compile(r"\x1b[^m]*m")
output_string = reaesc.sub("", output.read())
output_string = self.reaesc.sub("", output.read())
# checking if the number of renditions regenerated equal total_renditions
self.assertEqual(
output_string,
f"Successfully regenerated {total_renditions} image rendition(s)\n",
f"Regenerating {total_renditions} rendition(s)\n"
f"Successfully processed {total_renditions} rendition(s)\n",
)
# checking if the number of renditions now equal total_renditions
renditions_now = get_image_model().get_rendition_model().objects.all()
renditions_now = Rendition.objects.all()
total_renditions_now = len(renditions_now)
self.assertEqual(total_renditions_now, total_renditions)
def test_image_renditions_with_purge_only(self):
renditions = get_image_model().get_rendition_model().objects.all()
renditions = Rendition.objects.all()
total_renditions = len(renditions)
output = self.run_command(purge_only=True)
reaesc = re.compile(r"\x1b[^m]*m")
output_string = reaesc.sub("", output.read())
output_string = self.reaesc.sub("", output.read())
# checking if the number of renditions purged equal total_renditions
self.assertEqual(
output_string,
f"Successfully purged {total_renditions} image rendition(s)\n",
f"Purging {total_renditions} rendition(s)\n"
f"Successfully processed {total_renditions} rendition(s)\n",
)
# checking if the number of renditions now equal 0
renditions_now = get_image_model().get_rendition_model().objects.all()
renditions_now = Rendition.objects.all()
total_renditions_now = len(renditions_now)
self.assertEqual(total_renditions_now, 0)