From a983dc220ba5db46310d23f26045c077aaad9706 Mon Sep 17 00:00:00 2001 From: Andy Babic Date: Sat, 15 Mar 2025 05:48:58 +0000 Subject: [PATCH] Frontend cache: Fix n+1 query issues when batch purging page urls (#12448) * Update tests to highlight the current issues * Optimise PurgeBatch and util methods, and add docstrings * Use page objects as the site_cache_target in tests * Use 'cache_object' instead of 'site_cache_target' to better match terminology in Page methods * Ensure self.captureOnCommitCallback() is applied when performing purges in tests * Ensure tests check PURGED_URLS outside of captureOnCommitCallback() --- wagtail/contrib/frontend_cache/tests.py | 126 +++++++++++++++++++++--- wagtail/contrib/frontend_cache/utils.py | 118 +++++++++++++++++++--- 2 files changed, 218 insertions(+), 26 deletions(-) diff --git a/wagtail/contrib/frontend_cache/tests.py b/wagtail/contrib/frontend_cache/tests.py index f13ea09959..b25c1e9517 100644 --- a/wagtail/contrib/frontend_cache/tests.py +++ b/wagtail/contrib/frontend_cache/tests.py @@ -18,7 +18,7 @@ from wagtail.contrib.frontend_cache.backends import ( ) from wagtail.contrib.frontend_cache.utils import get_backends from wagtail.models import Page -from wagtail.test.testapp.models import EventIndex +from wagtail.test.testapp.models import EventIndex, EventPage from wagtail.utils.deprecation import RemovedInWagtail70Warning from .utils import ( @@ -29,6 +29,16 @@ from .utils import ( purge_urls_from_cache, ) +EVENTPAGE_URLS = { + "http://localhost/events/final-event/", + "http://localhost/events/christmas/", + "http://localhost/events/saint-patrick/", + "http://localhost/events/tentative-unpublished-event/", + "http://localhost/events/someone-elses-event/", + "http://localhost/events/tentative-unpublished-event/", + "http://localhost/secret-plans/steal-underpants/", +} + class TestBackendConfiguration(SimpleTestCase): def test_default(self): @@ -438,7 +448,12 @@ class MockCloudflareBackend(CloudflareBackend): "varnish": { "BACKEND": "wagtail.contrib.frontend_cache.tests.MockBackend", }, - } + }, + CACHES={ + "default": { + "BACKEND": "django.core.cache.backends.dummy.DummyCache", + } + }, ) class TestCachePurgingFunctions(TestCase): fixtures = ["test.json"] @@ -457,26 +472,74 @@ class TestCachePurgingFunctions(TestCase): self.assertEqual(PURGED_URLS, {"http://localhost/foo", "http://localhost/bar"}) def test_purge_page_from_cache(self): + page = EventIndex.objects.get(url_path="/home/events/") with self.captureOnCommitCallbacks(execute=True): - page = EventIndex.objects.get(url_path="/home/events/") - purge_page_from_cache(page) + with self.assertNumQueries(1): + # Because no cache object is provided, a query is needed to + # fetch site root paths in order to derive page urls + purge_page_from_cache(page) self.assertEqual( - PURGED_URLS, {"http://localhost/events/", "http://localhost/events/past/"} + PURGED_URLS, + {"http://localhost/events/", "http://localhost/events/past/"}, + ) + + def test_purge_page_from_cache_with_shared_cache_object(self): + page = EventIndex.objects.get(url_path="/home/events/") + + # Ensure site root paths are already cached, which should result in + # zero additional queries being incurred by this test + page._get_relevant_site_root_paths() + + with self.captureOnCommitCallbacks(execute=True): + # Because site root paths are already available via the cache_object, + # no further queries should be needed to derive page urls + with self.assertNumQueries(0): + purge_page_from_cache(page, cache_object=page) + + self.assertEqual( + PURGED_URLS, + {"http://localhost/events/", "http://localhost/events/past/"}, ) def test_purge_pages_from_cache(self): + pages = list(Page.objects.all().type(EventPage)) with self.captureOnCommitCallbacks(execute=True): - purge_pages_from_cache(EventIndex.objects.all()) - self.assertEqual( - PURGED_URLS, {"http://localhost/events/", "http://localhost/events/past/"} - ) + with self.assertNumQueries(1): + # Because no cache object is provided, a query is needed to + # fetch site root paths in order to derive page urls + purge_pages_from_cache(pages) + self.assertEqual(PURGED_URLS, EVENTPAGE_URLS) + + def test_purge_pages_from_cache_with_shared_cache_object(self): + pages = list(Page.objects.all().type(EventPage)) + + # Use the first page as the cache object for the operation + cache_object = pages[0] + + # Ensure site root paths are already cached, which should result in + # zero additional queries being incurred by this test + cache_object._get_relevant_site_root_paths() + + with self.captureOnCommitCallbacks(execute=True): + # Because site root paths are already available via the cache_object, + # no further queries should be needed to derive page urls + with self.assertNumQueries(0): + purge_pages_from_cache(pages, cache_object=cache_object) + + self.assertEqual(PURGED_URLS, EVENTPAGE_URLS) def test_purge_batch(self): - with self.captureOnCommitCallbacks(execute=True): - batch = PurgeBatch() - page = EventIndex.objects.get(url_path="/home/events/") + page = EventIndex.objects.get(url_path="/home/events/") + batch = PurgeBatch() + + # Because no cache object is provided, a query is needed to + # fetch site root paths in order to derive page urls + with self.assertNumQueries(1): batch.add_page(page) - batch.add_url("http://localhost/foo") + + batch.add_url("http://localhost/foo") + + with self.captureOnCommitCallbacks(execute=True): batch.purge() self.assertEqual( @@ -488,6 +551,43 @@ class TestCachePurgingFunctions(TestCase): }, ) + def test_purge_batch_with_multiple_pages(self): + pages = list(Page.objects.all().type(EventPage)) + batch = PurgeBatch() + + # Because the batch has no cache object, a query is needed to + # fetch site root paths in order to derive page urls + with self.assertNumQueries(1): + batch.add_pages(pages) + + with self.captureOnCommitCallbacks(execute=True): + batch.purge() + + self.assertEqual(PURGED_URLS, EVENTPAGE_URLS) + + def test_multiple_purge_batches_with_shared_cache_object(self): + pages = list(Page.objects.all().type(EventPage)) + + # Use the first page as the cache object for the batch + cache_object = pages[0] + + # Ensure site root paths are already cached, which should result in + # zero additional queries being incurred by this test + cache_object._get_relevant_site_root_paths() + + batch = PurgeBatch(cache_object=cache_object) + + with self.assertNumQueries(0): + # Because site root paths are already available via the cache_object, + # no queries should be needed to derive page urls + batch.add_pages(pages) + + with self.captureOnCommitCallbacks(execute=True): + batch.purge() + + self.assertEqual(PURGED_URLS, EVENTPAGE_URLS) + PURGED_URLS.clear() + @override_settings( WAGTAILFRONTENDCACHE={ "varnish": { diff --git a/wagtail/contrib/frontend_cache/utils.py b/wagtail/contrib/frontend_cache/utils.py index 534e2c782e..616cc392e3 100644 --- a/wagtail/contrib/frontend_cache/utils.py +++ b/wagtail/contrib/frontend_cache/utils.py @@ -55,45 +55,137 @@ def get_backends(backend_settings=None, backends=None): def purge_url_from_cache(url, backend_settings=None, backends=None): + """ + Purge a single URL from the frontend cache. + + :param url: The URL to purge from the cache. + :type url: str + :param backend_settings: Optional custom backend settings to use instead of those defined in ``settings.WAGTAILFRONTENDCACHE``. + :type backend_settings: dict, optional + :param backends: Optional list of strings referencing specific backends from ``settings.WAGTAILFRONTENDCACHE`` or provided as ``backend_settings``. Can be used to limit purge operations to specific backends. + :type backends: list, optional + + This function purges a single URL from the configured frontend cache backends. It's useful + when you need to invalidate the cache for a specific URL. + + If no custom backends or settings are provided, it will use the default configuration + from ``settings.WAGTAILFRONTENDCACHE``. + + NOTE: This function also handles internationalization, creating language-specific URLs if + ``WAGTAILFRONTENDCACHE_LANGUAGES`` is set and ``USE_I18N`` is ``True``. + """ purge_urls_from_cache([url], backend_settings=backend_settings, backends=backends) def purge_urls_from_cache(urls, backend_settings=None, backends=None): + """ + Purge multiple URLs from the frontend cache. + + :param urls: An iterable of URLs to purge from the cache. + :type urls: iterable of str + :param backend_settings: Optional custom backend settings to use instead of those defined in ``settings.WAGTAILFRONTENDCACHE``. + :type backend_settings: dict, optional + :param backends: Optional list of strings referencing specific backends from ``settings.WAGTAILFRONTENDCACHE`` or provided as ``backend_settings``. Can be used to limit purge operations to specific backends. + :type backends: list, optional + + This function purges multiple URLs from the configured frontend cache backends. It's useful + when you need to invalidate the cache for multiple URLs at once. + + If no custom backends or settings are provided, it will use the default configuration + from ``settings.WAGTAILFRONTENDCACHE``. + + NOTE: This function also handles internationalization, creating language-specific URLs if + ``WAGTAILFRONTENDCACHE_LANGUAGES`` is set and ``USE_I18N`` is ``True``. + """ from .tasks import purge_urls_from_cache_task + if not urls: + return + purge_urls_from_cache_task.enqueue(list(urls), backend_settings, backends) -def _get_page_cached_urls(page): - page_url = page.full_url +def _get_page_cached_urls(page, cache_object=None): + page_url = page.get_full_url(cache_object) if page_url is None: # nothing to be done if the page has no routable URL return [] - return [page_url + path.lstrip("/") for path in page.specific.get_cached_paths()] + return [ + page_url + path.lstrip("/") + for path in page.specific_deferred.get_cached_paths() + ] -def purge_page_from_cache(page, backend_settings=None, backends=None): - purge_pages_from_cache([page], backend_settings=backend_settings, backends=backends) +def purge_page_from_cache( + page, backend_settings=None, backends=None, *, cache_object=None +): + """ + Purge a single page from the frontend cache. + + :param page: The page to purge from the cache. + :type page: Page + :param backend_settings: Optional custom backend settings to use instead of those defined in ``settings.WAGTAILFRONTENDCACHE``. + :type backend_settings: dict, optional + :param backends: Optional list of strings referencing specific backends from ``settings.WAGTAILFRONTENDCACHE`` or provided as ``backend_settings``. Can be used to limit purge operations to specific backends. + :type backends: list, optional + :param cache_object: Optional, but strongly recommended when making a series of requests to this method. An object to be passed to URL-related methods, allowing cached site root path data to be reused across multiple requests. + :type cache_object: object, optional + + This function retrieves all cached URLs for the given page and purges them from the configured + backends. It's useful when you need to invalidate the cache for a specific page, + for example after the page has been updated. + + If no custom backends or settings are provided, it will use the default configuration + from ``settings.WAGTAILFRONTENDCACHE``. + + The `cache_object` parameter can be any kind of object that supports arbitrary attribute + assignment, such as a Python object or Django Model instance. + """ + urls = _get_page_cached_urls(page, cache_object) + purge_urls_from_cache(urls, backend_settings, backends) -def purge_pages_from_cache(pages, backend_settings=None, backends=None): - urls = [] - for page in pages: - urls.extend(_get_page_cached_urls(page)) +def purge_pages_from_cache( + pages, backend_settings=None, backends=None, *, cache_object=None +): + """ + Purge multiple pages from the frontend cache. - if urls: - purge_urls_from_cache(urls, backend_settings, backends) + :param pages: An iterable of pages to purge from the cache. + :type pages: iterable of Page + :param backend_settings: Optional custom backend settings to use instead of those defined in ``settings.WAGTAILFRONTENDCACHE``. + :type backend_settings: dict, optional + :param backends: Optional list of strings matching keys from ``settings.WAGTAILFRONTENDCACHE`` or provided as ``backend_settings``. Can be used to limit purge operations to specific backends. + :type backends: list, optional + :param cache_object: Optional object to be passed to URL-related methods, to allow cached site root path data to be reused across multiple requests to this method. If not provided, the ``PurgeBatch`` object created by this method will be used instead. + :type cache_object: object, optional + + This function retrieves all cached URLs for the given pages and purges them from the configured + backends. It's useful when you need to invalidate the cache for multiple pages at once, + for example after a bulk update operation. + + If no custom backends or settings are provided, it will use the default configuration + from ``settings.WAGTAILFRONTENDCACHE``. + + The `cache_object` parameter can be any kind of object that supports arbitrary attribute + assignment, such as a Python object or Django Model instance. + """ + batch = PurgeBatch(cache_object=cache_object) + batch.add_pages(pages) + batch.purge(backend_settings, backends) class PurgeBatch: """Represents a list of URLs to be purged in a single request""" - def __init__(self, urls=None): + def __init__(self, urls=None, *, cache_object=None): self.urls = set() if urls is not None: self.add_urls(urls) + self.cache_object = cache_object + def add_url(self, url): """Adds a single URL""" self.urls.add(url) @@ -114,7 +206,7 @@ class PurgeBatch: This combines the page's full URL with each path that is returned by the page's `.get_cached_paths` method """ - self.add_urls(_get_page_cached_urls(page)) + self.add_urls(_get_page_cached_urls(page, self.cache_object or self)) def add_pages(self, pages): """