Frontend cache: Fix n+1 query issues when batch purging page urls (#12448)

* Update tests to highlight the current issues
* Optimise PurgeBatch and util methods, and add docstrings
* Use page objects as the site_cache_target in tests
* Use 'cache_object' instead of 'site_cache_target' to better match terminology in Page methods
* Ensure self.captureOnCommitCallback() is applied when performing purges in tests
* Ensure tests check PURGED_URLS outside of captureOnCommitCallback()
pull/12945/merge
Andy Babic 2025-03-15 05:48:58 +00:00 zatwierdzone przez GitHub
rodzic de99b8b90f
commit a983dc220b
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: B5690EEEBB952194
2 zmienionych plików z 218 dodań i 26 usunięć

Wyświetl plik

@ -18,7 +18,7 @@ from wagtail.contrib.frontend_cache.backends import (
)
from wagtail.contrib.frontend_cache.utils import get_backends
from wagtail.models import Page
from wagtail.test.testapp.models import EventIndex
from wagtail.test.testapp.models import EventIndex, EventPage
from wagtail.utils.deprecation import RemovedInWagtail70Warning
from .utils import (
@ -29,6 +29,16 @@ from .utils import (
purge_urls_from_cache,
)
EVENTPAGE_URLS = {
"http://localhost/events/final-event/",
"http://localhost/events/christmas/",
"http://localhost/events/saint-patrick/",
"http://localhost/events/tentative-unpublished-event/",
"http://localhost/events/someone-elses-event/",
"http://localhost/events/tentative-unpublished-event/",
"http://localhost/secret-plans/steal-underpants/",
}
class TestBackendConfiguration(SimpleTestCase):
def test_default(self):
@ -438,7 +448,12 @@ class MockCloudflareBackend(CloudflareBackend):
"varnish": {
"BACKEND": "wagtail.contrib.frontend_cache.tests.MockBackend",
},
}
},
CACHES={
"default": {
"BACKEND": "django.core.cache.backends.dummy.DummyCache",
}
},
)
class TestCachePurgingFunctions(TestCase):
fixtures = ["test.json"]
@ -457,26 +472,74 @@ class TestCachePurgingFunctions(TestCase):
self.assertEqual(PURGED_URLS, {"http://localhost/foo", "http://localhost/bar"})
def test_purge_page_from_cache(self):
page = EventIndex.objects.get(url_path="/home/events/")
with self.captureOnCommitCallbacks(execute=True):
page = EventIndex.objects.get(url_path="/home/events/")
purge_page_from_cache(page)
with self.assertNumQueries(1):
# Because no cache object is provided, a query is needed to
# fetch site root paths in order to derive page urls
purge_page_from_cache(page)
self.assertEqual(
PURGED_URLS, {"http://localhost/events/", "http://localhost/events/past/"}
PURGED_URLS,
{"http://localhost/events/", "http://localhost/events/past/"},
)
def test_purge_page_from_cache_with_shared_cache_object(self):
page = EventIndex.objects.get(url_path="/home/events/")
# Ensure site root paths are already cached, which should result in
# zero additional queries being incurred by this test
page._get_relevant_site_root_paths()
with self.captureOnCommitCallbacks(execute=True):
# Because site root paths are already available via the cache_object,
# no further queries should be needed to derive page urls
with self.assertNumQueries(0):
purge_page_from_cache(page, cache_object=page)
self.assertEqual(
PURGED_URLS,
{"http://localhost/events/", "http://localhost/events/past/"},
)
def test_purge_pages_from_cache(self):
pages = list(Page.objects.all().type(EventPage))
with self.captureOnCommitCallbacks(execute=True):
purge_pages_from_cache(EventIndex.objects.all())
self.assertEqual(
PURGED_URLS, {"http://localhost/events/", "http://localhost/events/past/"}
)
with self.assertNumQueries(1):
# Because no cache object is provided, a query is needed to
# fetch site root paths in order to derive page urls
purge_pages_from_cache(pages)
self.assertEqual(PURGED_URLS, EVENTPAGE_URLS)
def test_purge_pages_from_cache_with_shared_cache_object(self):
pages = list(Page.objects.all().type(EventPage))
# Use the first page as the cache object for the operation
cache_object = pages[0]
# Ensure site root paths are already cached, which should result in
# zero additional queries being incurred by this test
cache_object._get_relevant_site_root_paths()
with self.captureOnCommitCallbacks(execute=True):
# Because site root paths are already available via the cache_object,
# no further queries should be needed to derive page urls
with self.assertNumQueries(0):
purge_pages_from_cache(pages, cache_object=cache_object)
self.assertEqual(PURGED_URLS, EVENTPAGE_URLS)
def test_purge_batch(self):
with self.captureOnCommitCallbacks(execute=True):
batch = PurgeBatch()
page = EventIndex.objects.get(url_path="/home/events/")
page = EventIndex.objects.get(url_path="/home/events/")
batch = PurgeBatch()
# Because no cache object is provided, a query is needed to
# fetch site root paths in order to derive page urls
with self.assertNumQueries(1):
batch.add_page(page)
batch.add_url("http://localhost/foo")
batch.add_url("http://localhost/foo")
with self.captureOnCommitCallbacks(execute=True):
batch.purge()
self.assertEqual(
@ -488,6 +551,43 @@ class TestCachePurgingFunctions(TestCase):
},
)
def test_purge_batch_with_multiple_pages(self):
pages = list(Page.objects.all().type(EventPage))
batch = PurgeBatch()
# Because the batch has no cache object, a query is needed to
# fetch site root paths in order to derive page urls
with self.assertNumQueries(1):
batch.add_pages(pages)
with self.captureOnCommitCallbacks(execute=True):
batch.purge()
self.assertEqual(PURGED_URLS, EVENTPAGE_URLS)
def test_multiple_purge_batches_with_shared_cache_object(self):
pages = list(Page.objects.all().type(EventPage))
# Use the first page as the cache object for the batch
cache_object = pages[0]
# Ensure site root paths are already cached, which should result in
# zero additional queries being incurred by this test
cache_object._get_relevant_site_root_paths()
batch = PurgeBatch(cache_object=cache_object)
with self.assertNumQueries(0):
# Because site root paths are already available via the cache_object,
# no queries should be needed to derive page urls
batch.add_pages(pages)
with self.captureOnCommitCallbacks(execute=True):
batch.purge()
self.assertEqual(PURGED_URLS, EVENTPAGE_URLS)
PURGED_URLS.clear()
@override_settings(
WAGTAILFRONTENDCACHE={
"varnish": {

Wyświetl plik

@ -55,45 +55,137 @@ def get_backends(backend_settings=None, backends=None):
def purge_url_from_cache(url, backend_settings=None, backends=None):
"""
Purge a single URL from the frontend cache.
:param url: The URL to purge from the cache.
:type url: str
:param backend_settings: Optional custom backend settings to use instead of those defined in ``settings.WAGTAILFRONTENDCACHE``.
:type backend_settings: dict, optional
:param backends: Optional list of strings referencing specific backends from ``settings.WAGTAILFRONTENDCACHE`` or provided as ``backend_settings``. Can be used to limit purge operations to specific backends.
:type backends: list, optional
This function purges a single URL from the configured frontend cache backends. It's useful
when you need to invalidate the cache for a specific URL.
If no custom backends or settings are provided, it will use the default configuration
from ``settings.WAGTAILFRONTENDCACHE``.
NOTE: This function also handles internationalization, creating language-specific URLs if
``WAGTAILFRONTENDCACHE_LANGUAGES`` is set and ``USE_I18N`` is ``True``.
"""
purge_urls_from_cache([url], backend_settings=backend_settings, backends=backends)
def purge_urls_from_cache(urls, backend_settings=None, backends=None):
"""
Purge multiple URLs from the frontend cache.
:param urls: An iterable of URLs to purge from the cache.
:type urls: iterable of str
:param backend_settings: Optional custom backend settings to use instead of those defined in ``settings.WAGTAILFRONTENDCACHE``.
:type backend_settings: dict, optional
:param backends: Optional list of strings referencing specific backends from ``settings.WAGTAILFRONTENDCACHE`` or provided as ``backend_settings``. Can be used to limit purge operations to specific backends.
:type backends: list, optional
This function purges multiple URLs from the configured frontend cache backends. It's useful
when you need to invalidate the cache for multiple URLs at once.
If no custom backends or settings are provided, it will use the default configuration
from ``settings.WAGTAILFRONTENDCACHE``.
NOTE: This function also handles internationalization, creating language-specific URLs if
``WAGTAILFRONTENDCACHE_LANGUAGES`` is set and ``USE_I18N`` is ``True``.
"""
from .tasks import purge_urls_from_cache_task
if not urls:
return
purge_urls_from_cache_task.enqueue(list(urls), backend_settings, backends)
def _get_page_cached_urls(page):
page_url = page.full_url
def _get_page_cached_urls(page, cache_object=None):
page_url = page.get_full_url(cache_object)
if page_url is None: # nothing to be done if the page has no routable URL
return []
return [page_url + path.lstrip("/") for path in page.specific.get_cached_paths()]
return [
page_url + path.lstrip("/")
for path in page.specific_deferred.get_cached_paths()
]
def purge_page_from_cache(page, backend_settings=None, backends=None):
purge_pages_from_cache([page], backend_settings=backend_settings, backends=backends)
def purge_page_from_cache(
page, backend_settings=None, backends=None, *, cache_object=None
):
"""
Purge a single page from the frontend cache.
:param page: The page to purge from the cache.
:type page: Page
:param backend_settings: Optional custom backend settings to use instead of those defined in ``settings.WAGTAILFRONTENDCACHE``.
:type backend_settings: dict, optional
:param backends: Optional list of strings referencing specific backends from ``settings.WAGTAILFRONTENDCACHE`` or provided as ``backend_settings``. Can be used to limit purge operations to specific backends.
:type backends: list, optional
:param cache_object: Optional, but strongly recommended when making a series of requests to this method. An object to be passed to URL-related methods, allowing cached site root path data to be reused across multiple requests.
:type cache_object: object, optional
This function retrieves all cached URLs for the given page and purges them from the configured
backends. It's useful when you need to invalidate the cache for a specific page,
for example after the page has been updated.
If no custom backends or settings are provided, it will use the default configuration
from ``settings.WAGTAILFRONTENDCACHE``.
The `cache_object` parameter can be any kind of object that supports arbitrary attribute
assignment, such as a Python object or Django Model instance.
"""
urls = _get_page_cached_urls(page, cache_object)
purge_urls_from_cache(urls, backend_settings, backends)
def purge_pages_from_cache(pages, backend_settings=None, backends=None):
urls = []
for page in pages:
urls.extend(_get_page_cached_urls(page))
def purge_pages_from_cache(
pages, backend_settings=None, backends=None, *, cache_object=None
):
"""
Purge multiple pages from the frontend cache.
if urls:
purge_urls_from_cache(urls, backend_settings, backends)
:param pages: An iterable of pages to purge from the cache.
:type pages: iterable of Page
:param backend_settings: Optional custom backend settings to use instead of those defined in ``settings.WAGTAILFRONTENDCACHE``.
:type backend_settings: dict, optional
:param backends: Optional list of strings matching keys from ``settings.WAGTAILFRONTENDCACHE`` or provided as ``backend_settings``. Can be used to limit purge operations to specific backends.
:type backends: list, optional
:param cache_object: Optional object to be passed to URL-related methods, to allow cached site root path data to be reused across multiple requests to this method. If not provided, the ``PurgeBatch`` object created by this method will be used instead.
:type cache_object: object, optional
This function retrieves all cached URLs for the given pages and purges them from the configured
backends. It's useful when you need to invalidate the cache for multiple pages at once,
for example after a bulk update operation.
If no custom backends or settings are provided, it will use the default configuration
from ``settings.WAGTAILFRONTENDCACHE``.
The `cache_object` parameter can be any kind of object that supports arbitrary attribute
assignment, such as a Python object or Django Model instance.
"""
batch = PurgeBatch(cache_object=cache_object)
batch.add_pages(pages)
batch.purge(backend_settings, backends)
class PurgeBatch:
"""Represents a list of URLs to be purged in a single request"""
def __init__(self, urls=None):
def __init__(self, urls=None, *, cache_object=None):
self.urls = set()
if urls is not None:
self.add_urls(urls)
self.cache_object = cache_object
def add_url(self, url):
"""Adds a single URL"""
self.urls.add(url)
@ -114,7 +206,7 @@ class PurgeBatch:
This combines the page's full URL with each path that is returned by
the page's `.get_cached_paths` method
"""
self.add_urls(_get_page_cached_urls(page))
self.add_urls(_get_page_cached_urls(page, self.cache_object or self))
def add_pages(self, pages):
"""