diff --git a/docs/advanced_topics/performance.md b/docs/advanced_topics/performance.md index c803f3827b..838974be99 100644 --- a/docs/advanced_topics/performance.md +++ b/docs/advanced_topics/performance.md @@ -68,6 +68,8 @@ Many websites use a frontend cache such as Varnish, Squid, Cloudflare or CloudFr Wagtail supports being [integrated](frontend_cache_purging) with many CDNs, so it can inform them when a page changes, so the cache can be cleared immediately and users see the changes sooner. +If you have multiple frontends configured (eg Cloudflare for one site, CloudFront for another), it's recommended to set the [`HOSTNAMES`](frontendcache_multiple_backends) key to the list of hostnames the backend can purge, to prevent unnecessary extra purge requests. + ## Page URLs To fully resolve the URL of a page, Wagtail requires information from a few different sources. diff --git a/docs/reference/contrib/frontendcache.md b/docs/reference/contrib/frontendcache.md index 1c0a421fc9..d4693adfb7 100644 --- a/docs/reference/contrib/frontendcache.md +++ b/docs/reference/contrib/frontendcache.md @@ -121,24 +121,6 @@ Configuration of credentials can done in multiple ways. You won't need to store } ``` -In case you run multiple sites with Wagtail and each site has its CloudFront distribution, provide a mapping instead of a single distribution. Make sure the mapping matches with the hostnames provided in your site settings. - -```python -WAGTAILFRONTENDCACHE = { - 'cloudfront': { - 'BACKEND': 'wagtail.contrib.frontend_cache.backends.CloudfrontBackend', - 'DISTRIBUTION_ID': { - 'www.wagtail.org': 'your-distribution-id', - 'www.madewithwagtail.org': 'your-distribution-id', - }, - }, -} -``` - -```{note} -In most cases, absolute URLs with ``www`` prefixed domain names should be used in your mapping. Only drop the ``www`` prefix if you're absolutely sure you're not using it (for example a subdomain). -``` - ### Azure CDN With [Azure CDN](https://azure.microsoft.com/en-gb/products/cdn/) you will need a CDN profile with an endpoint configured. @@ -236,6 +218,56 @@ WAGTAILFRONTENDCACHE = { Another option that can be set is `SUBSCRIPTION_ID`. By default the first encountered subscription will be used, but if your credential has access to more subscriptions, you should set this to an explicit value. +(frontendcache_multiple_backends)= + +## Multiple backends + +Multiple backends can be configured by adding multiple entries in `WAGTAILFRONTENDCACHE`. + +By default, a backend will attempt to invalidate all invalidation requests. To only invalidate certain hostnames, specify them in `HOSTNAMES`: + +```python +WAGTAILFRONTENDCACHE = { + 'main-site': { + 'BACKEND': 'wagtail.contrib.frontend_cache.backends.HTTPBackend', + 'LOCATION': 'http://localhost:8000', + 'HOSTNAMES': ['example.com'] + }, + 'cdn': { + 'BACKEND': 'wagtail.contrib.frontend_cache.backends.CloudflareBackend', + 'BEARER_TOKEN': 'your cloudflare bearer token', + 'ZONEID': 'your cloudflare domain zone id', + 'HOSTNAMES': ['cdn.example.com'] + }, +} +``` + +In the above example, invalidations for `cdn.example.com/foo` will be invalidated by Cloudflare, whilst `example.com/foo` will be invalidated with the `main-site` backend. This allows different configuration to be used for each backend, for example by changing the `ZONEID` for the Cloudflare backend: + +```python + +WAGTAILFRONTENDCACHE = { + 'main-site': { + 'BACKEND': 'wagtail.contrib.frontend_cache.backends.CloudflareBackend', + 'BEARER_TOKEN': os.environ["CLOUDFLARE_BEARER_TOKEN"], + 'ZONEID': 'example.com zone id', + 'HOSTNAMES': ['example.com'] + }, + 'other-site': { + 'BACKEND': 'wagtail.contrib.frontend_cache.backends.CloudflareBackend', + 'BEARER_TOKEN': os.environ["CLOUDFLARE_BEARER_TOKEN"], + 'ZONEID': 'example.net zone id', + 'HOSTNAMES': ['example.net'] + }, +} +``` + +```{note} +In most cases, absolute URLs with ``www`` prefixed domain names should be used in your mapping. Only drop the ``www`` prefix if you're absolutely sure you're not using it (for example a subdomain). +``` + +Much like Django's `ALLOWED_HOSTS`, values in `HOSTNAMES` starting with a `.` can be used as a subdomain wildcard. + ## Advanced usage ### Invalidating more than one URL per page diff --git a/wagtail/contrib/frontend_cache/backends.py b/wagtail/contrib/frontend_cache/backends.py index e4ee527db5..7f6f2fdd49 100644 --- a/wagtail/contrib/frontend_cache/backends.py +++ b/wagtail/contrib/frontend_cache/backends.py @@ -4,11 +4,14 @@ from collections import defaultdict from urllib.error import HTTPError, URLError from urllib.parse import urlparse, urlsplit, urlunparse, urlunsplit from urllib.request import Request, urlopen +from warnings import warn import requests from django.core.exceptions import ImproperlyConfigured +from django.http.request import validate_host from wagtail import __version__ +from wagtail.utils.deprecation import RemovedInWagtail70Warning logger = logging.getLogger("wagtail.frontendcache") @@ -19,6 +22,10 @@ class PurgeRequest(Request): class BaseBackend: + def __init__(self, params): + # If unspecified, invalidate all hosts + self.hostnames = params.get("HOSTNAMES", ["*"]) + def purge(self, url): raise NotImplementedError @@ -27,9 +34,16 @@ class BaseBackend: for url in urls: self.purge(url) + def invalidates_hostname(self, hostname): + """ + Can `hostname` be invalidated by this backend? + """ + return validate_host(hostname, self.hostnames) + class HTTPBackend(BaseBackend): def __init__(self, params): + super().__init__(params) location_url_parsed = urlparse(params.pop("LOCATION")) self.cache_scheme = location_url_parsed.scheme self.cache_netloc = location_url_parsed.netloc @@ -78,6 +92,8 @@ class CloudflareBackend(BaseBackend): CHUNK_SIZE = 30 def __init__(self, params): + super().__init__(params) + self.cloudflare_email = params.pop("EMAIL", None) self.cloudflare_api_key = params.pop("TOKEN", None) or params.pop( "API_KEY", None @@ -174,6 +190,8 @@ class CloudfrontBackend(BaseBackend): def __init__(self, params): import boto3 + super().__init__(params) + self.client = boto3.client("cloudfront") try: self.cloudfront_distribution_id = params.pop("DISTRIBUTION_ID") @@ -182,6 +200,14 @@ class CloudfrontBackend(BaseBackend): "The setting 'WAGTAILFRONTENDCACHE' requires the object 'DISTRIBUTION_ID'." ) + # Add known hostnames for hostname validation (if not already defined) + # RemovedInWagtail70Warning + if isinstance(self.cloudfront_distribution_id, dict): + if "HOSTNAMES" in params: + self.hostnames.extend(self.cloudfront_distribution_id.keys()) + else: + self.hostnames = list(self.cloudfront_distribution_id.keys()) + def purge_batch(self, urls): paths_by_distribution_id = defaultdict(list) @@ -190,11 +216,15 @@ class CloudfrontBackend(BaseBackend): distribution_id = None if isinstance(self.cloudfront_distribution_id, dict): + warn( + "Using a `DISTRIBUTION_ID` mapping is deprecated - use `HOSTNAMES` in combination with multiple backends instead.", + category=RemovedInWagtail70Warning, + ) host = url_parsed.hostname if host in self.cloudfront_distribution_id: distribution_id = self.cloudfront_distribution_id.get(host) else: - logger.info( + logger.warning( "Couldn't purge '%s' from CloudFront. Hostname '%s' not found in the DISTRIBUTION_ID mapping", url, host, @@ -235,6 +265,7 @@ class CloudfrontBackend(BaseBackend): class AzureBaseBackend(BaseBackend): def __init__(self, params): + super().__init__(params) self._credentials = params.pop("CREDENTIALS", None) self._subscription_id = params.pop("SUBSCRIPTION_ID", None) try: diff --git a/wagtail/contrib/frontend_cache/tests.py b/wagtail/contrib/frontend_cache/tests.py index 5258b90acb..c5e74f1be5 100644 --- a/wagtail/contrib/frontend_cache/tests.py +++ b/wagtail/contrib/frontend_cache/tests.py @@ -19,6 +19,7 @@ from wagtail.contrib.frontend_cache.backends import ( from wagtail.contrib.frontend_cache.utils import get_backends from wagtail.models import Page from wagtail.test.testapp.models import EventIndex +from wagtail.utils.deprecation import RemovedInWagtail70Warning from .utils import ( PurgeBatch, @@ -336,15 +337,31 @@ class TestBackendConfiguration(TestCase): }, } ) - backends.get("cloudfront").purge( - "http://www.wagtail.org/home/events/christmas/" - ) - backends.get("cloudfront").purge("http://torchbox.com/blog/") + with self.assertWarnsMessage( + RemovedInWagtail70Warning, + "Using a `DISTRIBUTION_ID` mapping is deprecated - use `HOSTNAMES` in combination with multiple backends instead.", + ): + backends.get("cloudfront").purge( + "http://www.wagtail.org/home/events/christmas/" + ) + + with self.assertWarnsMessage( + RemovedInWagtail70Warning, + "Using a `DISTRIBUTION_ID` mapping is deprecated - use `HOSTNAMES` in combination with multiple backends instead.", + ): + backends.get("cloudfront").purge("http://torchbox.com/blog/") _create_invalidation.assert_called_once_with( "frontend", ["/home/events/christmas/"] ) + self.assertTrue( + backends.get("cloudfront").invalidates_hostname("www.wagtail.org") + ) + self.assertFalse( + backends.get("cloudfront").invalidates_hostname("torchbox.com") + ) + def test_multiple(self): backends = get_backends( backend_settings={ @@ -396,17 +413,11 @@ PURGED_URLS = [] class MockBackend(BaseBackend): - def __init__(self, config): - pass - def purge(self, url): PURGED_URLS.append(url) class MockCloudflareBackend(CloudflareBackend): - def __init__(self, config): - pass - def _purge_urls(self, urls): if len(urls) > self.CHUNK_SIZE: raise Exception("Cloudflare backend is not chunking requests as expected") @@ -465,11 +476,34 @@ class TestCachePurgingFunctions(TestCase): ], ) + @override_settings( + WAGTAILFRONTENDCACHE={ + "varnish": { + "BACKEND": "wagtail.contrib.frontend_cache.tests.MockBackend", + "HOSTNAMES": ["example.com"], + }, + } + ) + def test_invalidate_specific_location(self): + with self.assertLogs(level="WARNING") as log_output: + purge_url_from_cache("http://localhost/foo") + + self.assertEqual(PURGED_URLS, []) + self.assertIn( + "Unable to find purge backend for localhost", + log_output.output[0], + ) + + purge_url_from_cache("http://example.com/foo") + self.assertEqual(PURGED_URLS, ["http://example.com/foo"]) + @override_settings( WAGTAILFRONTENDCACHE={ "cloudflare": { "BACKEND": "wagtail.contrib.frontend_cache.tests.MockCloudflareBackend", + "ZONEID": "zone", + "BEARER_TOKEN": "token", }, } ) diff --git a/wagtail/contrib/frontend_cache/utils.py b/wagtail/contrib/frontend_cache/utils.py index 82df8306fb..944673dced 100644 --- a/wagtail/contrib/frontend_cache/utils.py +++ b/wagtail/contrib/frontend_cache/utils.py @@ -1,5 +1,6 @@ import logging import re +from collections import defaultdict from urllib.parse import urlparse, urlunparse from django.conf import settings @@ -100,11 +101,29 @@ def purge_urls_from_cache(urls, backend_settings=None, backends=None): urls = new_urls - for backend_name, backend in get_backends(backend_settings, backends).items(): - for url in urls: - logger.info("[%s] Purging URL: %s", backend_name, url) + urls_by_hostname = defaultdict(list) - backend.purge_batch(urls) + for url in urls: + urls_by_hostname[urlparse(url).netloc].append(url) + + backends = get_backends(backend_settings, backends) + + for hostname, urls in urls_by_hostname.items(): + backends_for_hostname = { + backend_name: backend + for backend_name, backend in backends.items() + if backend.invalidates_hostname(hostname) + } + + if not backends_for_hostname: + logger.warning("Unable to find purge backend for %s", hostname) + continue + + for backend_name, backend in backends_for_hostname.items(): + for url in urls: + logger.info("[%s] Purging URL: %s", backend_name, url) + + backend.purge_batch(urls) def _get_page_cached_urls(page):