Allow frontend caching backends to limit which hostnames they respond to

pull/11893/head
Jake Howard 2024-04-19 09:55:55 +01:00 zatwierdzone przez Matt Westcott
rodzic 91d7ae8a6e
commit c00c2b684f
5 zmienionych plików z 151 dodań i 33 usunięć

Wyświetl plik

@ -68,6 +68,8 @@ Many websites use a frontend cache such as Varnish, Squid, Cloudflare or CloudFr
Wagtail supports being [integrated](frontend_cache_purging) with many CDNs, so it can inform them when a page changes, so the cache can be cleared immediately and users see the changes sooner.
If you have multiple frontends configured (eg Cloudflare for one site, CloudFront for another), it's recommended to set the [`HOSTNAMES`](frontendcache_multiple_backends) key to the list of hostnames the backend can purge, to prevent unnecessary extra purge requests.
## Page URLs
To fully resolve the URL of a page, Wagtail requires information from a few different sources.

Wyświetl plik

@ -121,24 +121,6 @@ Configuration of credentials can done in multiple ways. You won't need to store
}
```
In case you run multiple sites with Wagtail and each site has its CloudFront distribution, provide a mapping instead of a single distribution. Make sure the mapping matches with the hostnames provided in your site settings.
```python
WAGTAILFRONTENDCACHE = {
'cloudfront': {
'BACKEND': 'wagtail.contrib.frontend_cache.backends.CloudfrontBackend',
'DISTRIBUTION_ID': {
'www.wagtail.org': 'your-distribution-id',
'www.madewithwagtail.org': 'your-distribution-id',
},
},
}
```
```{note}
In most cases, absolute URLs with ``www`` prefixed domain names should be used in your mapping. Only drop the ``www`` prefix if you're absolutely sure you're not using it (for example a subdomain).
```
### Azure CDN
With [Azure CDN](https://azure.microsoft.com/en-gb/products/cdn/) you will need a CDN profile with an endpoint configured.
@ -236,6 +218,56 @@ WAGTAILFRONTENDCACHE = {
Another option that can be set is `SUBSCRIPTION_ID`. By default the first encountered subscription will be used, but if your credential has access to more subscriptions, you should set this to an explicit value.
(frontendcache_multiple_backends)=
## Multiple backends
Multiple backends can be configured by adding multiple entries in `WAGTAILFRONTENDCACHE`.
By default, a backend will attempt to invalidate all invalidation requests. To only invalidate certain hostnames, specify them in `HOSTNAMES`:
```python
WAGTAILFRONTENDCACHE = {
'main-site': {
'BACKEND': 'wagtail.contrib.frontend_cache.backends.HTTPBackend',
'LOCATION': 'http://localhost:8000',
'HOSTNAMES': ['example.com']
},
'cdn': {
'BACKEND': 'wagtail.contrib.frontend_cache.backends.CloudflareBackend',
'BEARER_TOKEN': 'your cloudflare bearer token',
'ZONEID': 'your cloudflare domain zone id',
'HOSTNAMES': ['cdn.example.com']
},
}
```
In the above example, invalidations for `cdn.example.com/foo` will be invalidated by Cloudflare, whilst `example.com/foo` will be invalidated with the `main-site` backend. This allows different configuration to be used for each backend, for example by changing the `ZONEID` for the Cloudflare backend:
```python
WAGTAILFRONTENDCACHE = {
'main-site': {
'BACKEND': 'wagtail.contrib.frontend_cache.backends.CloudflareBackend',
'BEARER_TOKEN': os.environ["CLOUDFLARE_BEARER_TOKEN"],
'ZONEID': 'example.com zone id',
'HOSTNAMES': ['example.com']
},
'other-site': {
'BACKEND': 'wagtail.contrib.frontend_cache.backends.CloudflareBackend',
'BEARER_TOKEN': os.environ["CLOUDFLARE_BEARER_TOKEN"],
'ZONEID': 'example.net zone id',
'HOSTNAMES': ['example.net']
},
}
```
```{note}
In most cases, absolute URLs with ``www`` prefixed domain names should be used in your mapping. Only drop the ``www`` prefix if you're absolutely sure you're not using it (for example a subdomain).
```
Much like Django's `ALLOWED_HOSTS`, values in `HOSTNAMES` starting with a `.` can be used as a subdomain wildcard.
## Advanced usage
### Invalidating more than one URL per page

Wyświetl plik

@ -4,11 +4,14 @@ from collections import defaultdict
from urllib.error import HTTPError, URLError
from urllib.parse import urlparse, urlsplit, urlunparse, urlunsplit
from urllib.request import Request, urlopen
from warnings import warn
import requests
from django.core.exceptions import ImproperlyConfigured
from django.http.request import validate_host
from wagtail import __version__
from wagtail.utils.deprecation import RemovedInWagtail70Warning
logger = logging.getLogger("wagtail.frontendcache")
@ -19,6 +22,10 @@ class PurgeRequest(Request):
class BaseBackend:
def __init__(self, params):
# If unspecified, invalidate all hosts
self.hostnames = params.get("HOSTNAMES", ["*"])
def purge(self, url):
raise NotImplementedError
@ -27,9 +34,16 @@ class BaseBackend:
for url in urls:
self.purge(url)
def invalidates_hostname(self, hostname):
"""
Can `hostname` be invalidated by this backend?
"""
return validate_host(hostname, self.hostnames)
class HTTPBackend(BaseBackend):
def __init__(self, params):
super().__init__(params)
location_url_parsed = urlparse(params.pop("LOCATION"))
self.cache_scheme = location_url_parsed.scheme
self.cache_netloc = location_url_parsed.netloc
@ -78,6 +92,8 @@ class CloudflareBackend(BaseBackend):
CHUNK_SIZE = 30
def __init__(self, params):
super().__init__(params)
self.cloudflare_email = params.pop("EMAIL", None)
self.cloudflare_api_key = params.pop("TOKEN", None) or params.pop(
"API_KEY", None
@ -174,6 +190,8 @@ class CloudfrontBackend(BaseBackend):
def __init__(self, params):
import boto3
super().__init__(params)
self.client = boto3.client("cloudfront")
try:
self.cloudfront_distribution_id = params.pop("DISTRIBUTION_ID")
@ -182,6 +200,14 @@ class CloudfrontBackend(BaseBackend):
"The setting 'WAGTAILFRONTENDCACHE' requires the object 'DISTRIBUTION_ID'."
)
# Add known hostnames for hostname validation (if not already defined)
# RemovedInWagtail70Warning
if isinstance(self.cloudfront_distribution_id, dict):
if "HOSTNAMES" in params:
self.hostnames.extend(self.cloudfront_distribution_id.keys())
else:
self.hostnames = list(self.cloudfront_distribution_id.keys())
def purge_batch(self, urls):
paths_by_distribution_id = defaultdict(list)
@ -190,11 +216,15 @@ class CloudfrontBackend(BaseBackend):
distribution_id = None
if isinstance(self.cloudfront_distribution_id, dict):
warn(
"Using a `DISTRIBUTION_ID` mapping is deprecated - use `HOSTNAMES` in combination with multiple backends instead.",
category=RemovedInWagtail70Warning,
)
host = url_parsed.hostname
if host in self.cloudfront_distribution_id:
distribution_id = self.cloudfront_distribution_id.get(host)
else:
logger.info(
logger.warning(
"Couldn't purge '%s' from CloudFront. Hostname '%s' not found in the DISTRIBUTION_ID mapping",
url,
host,
@ -235,6 +265,7 @@ class CloudfrontBackend(BaseBackend):
class AzureBaseBackend(BaseBackend):
def __init__(self, params):
super().__init__(params)
self._credentials = params.pop("CREDENTIALS", None)
self._subscription_id = params.pop("SUBSCRIPTION_ID", None)
try:

Wyświetl plik

@ -19,6 +19,7 @@ from wagtail.contrib.frontend_cache.backends import (
from wagtail.contrib.frontend_cache.utils import get_backends
from wagtail.models import Page
from wagtail.test.testapp.models import EventIndex
from wagtail.utils.deprecation import RemovedInWagtail70Warning
from .utils import (
PurgeBatch,
@ -336,15 +337,31 @@ class TestBackendConfiguration(TestCase):
},
}
)
backends.get("cloudfront").purge(
"http://www.wagtail.org/home/events/christmas/"
)
backends.get("cloudfront").purge("http://torchbox.com/blog/")
with self.assertWarnsMessage(
RemovedInWagtail70Warning,
"Using a `DISTRIBUTION_ID` mapping is deprecated - use `HOSTNAMES` in combination with multiple backends instead.",
):
backends.get("cloudfront").purge(
"http://www.wagtail.org/home/events/christmas/"
)
with self.assertWarnsMessage(
RemovedInWagtail70Warning,
"Using a `DISTRIBUTION_ID` mapping is deprecated - use `HOSTNAMES` in combination with multiple backends instead.",
):
backends.get("cloudfront").purge("http://torchbox.com/blog/")
_create_invalidation.assert_called_once_with(
"frontend", ["/home/events/christmas/"]
)
self.assertTrue(
backends.get("cloudfront").invalidates_hostname("www.wagtail.org")
)
self.assertFalse(
backends.get("cloudfront").invalidates_hostname("torchbox.com")
)
def test_multiple(self):
backends = get_backends(
backend_settings={
@ -396,17 +413,11 @@ PURGED_URLS = []
class MockBackend(BaseBackend):
def __init__(self, config):
pass
def purge(self, url):
PURGED_URLS.append(url)
class MockCloudflareBackend(CloudflareBackend):
def __init__(self, config):
pass
def _purge_urls(self, urls):
if len(urls) > self.CHUNK_SIZE:
raise Exception("Cloudflare backend is not chunking requests as expected")
@ -465,11 +476,34 @@ class TestCachePurgingFunctions(TestCase):
],
)
@override_settings(
WAGTAILFRONTENDCACHE={
"varnish": {
"BACKEND": "wagtail.contrib.frontend_cache.tests.MockBackend",
"HOSTNAMES": ["example.com"],
},
}
)
def test_invalidate_specific_location(self):
with self.assertLogs(level="WARNING") as log_output:
purge_url_from_cache("http://localhost/foo")
self.assertEqual(PURGED_URLS, [])
self.assertIn(
"Unable to find purge backend for localhost",
log_output.output[0],
)
purge_url_from_cache("http://example.com/foo")
self.assertEqual(PURGED_URLS, ["http://example.com/foo"])
@override_settings(
WAGTAILFRONTENDCACHE={
"cloudflare": {
"BACKEND": "wagtail.contrib.frontend_cache.tests.MockCloudflareBackend",
"ZONEID": "zone",
"BEARER_TOKEN": "token",
},
}
)

Wyświetl plik

@ -1,5 +1,6 @@
import logging
import re
from collections import defaultdict
from urllib.parse import urlparse, urlunparse
from django.conf import settings
@ -100,11 +101,29 @@ def purge_urls_from_cache(urls, backend_settings=None, backends=None):
urls = new_urls
for backend_name, backend in get_backends(backend_settings, backends).items():
for url in urls:
logger.info("[%s] Purging URL: %s", backend_name, url)
urls_by_hostname = defaultdict(list)
backend.purge_batch(urls)
for url in urls:
urls_by_hostname[urlparse(url).netloc].append(url)
backends = get_backends(backend_settings, backends)
for hostname, urls in urls_by_hostname.items():
backends_for_hostname = {
backend_name: backend
for backend_name, backend in backends.items()
if backend.invalidates_hostname(hostname)
}
if not backends_for_hostname:
logger.warning("Unable to find purge backend for %s", hostname)
continue
for backend_name, backend in backends_for_hostname.items():
for url in urls:
logger.info("[%s] Purging URL: %s", backend_name, url)
backend.purge_batch(urls)
def _get_page_cached_urls(page):