From 7999cbf2c4ae179c93024b719797b37f23d691a3 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Sat, 18 Dec 2021 16:54:04 +0000 Subject: [PATCH] Revert "Add signal handler for automatic redirect creation (RFC 34) (3 of 3)" (#7795) * Revert "Release notes for #7774" This reverts commit e9eadb65c79ca7e57fbd3348cf60020effd67de8. * Revert "Docs for #7774" This reverts commit d00a4c8a65cc14e0c5bfb06d85fcd5da56d056d7. * Revert "Automatically create redirects for when pages are moved or have their slug updated" This reverts commit 31a7b119324c67a0eb0964b2827a1cd933b33287. --- CHANGELOG.txt | 1 - SPONSORS.md | 2 - docs/reference/contrib/redirects.rst | 19 -- docs/releases/2.16.md | 13 -- wagtail/contrib/redirects/apps.py | 8 - .../0007_add_autocreate_tracking_fields.py | 35 --- wagtail/contrib/redirects/models.py | 26 +-- wagtail/contrib/redirects/signal_handlers.py | 92 -------- .../redirects/tests/test_signal_handlers.py | 200 ------------------ wagtail/core/utils.py | 152 ------------- 10 files changed, 1 insertion(+), 547 deletions(-) delete mode 100644 wagtail/contrib/redirects/migrations/0007_add_autocreate_tracking_fields.py delete mode 100644 wagtail/contrib/redirects/signal_handlers.py delete mode 100644 wagtail/contrib/redirects/tests/test_signal_handlers.py diff --git a/CHANGELOG.txt b/CHANGELOG.txt index eb3295c572..9dc1c34dce 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -16,7 +16,6 @@ Changelog * Add `page_url_path_changed` signal for pages (Andy Babic) * Fix: 'Page move' actions being incorrectly logged as 'Page reorder' under some circumstances (Andy Babic) * Add `ever_live()` and `never_live()` filters to `PageQuerySet` (Andy Babic) - * Add automatic redirect creation (Andy Babic, with sponsorship from [The National Archives](https://www.nationalarchives.gov.uk)) 2.15.2 (xx.xx.xxxx) - IN DEVELOPMENT diff --git a/SPONSORS.md b/SPONSORS.md index 756d75cf64..919e4f7bac 100644 --- a/SPONSORS.md +++ b/SPONSORS.md @@ -45,5 +45,3 @@ The features below were developed thanks to the sponsorship of these organsation - Telepath for StreamField editing - [YouGov](https://yougov.co.uk/) - Commenting - [The Motley Fool](https://www.fool.com/) - -- Automatic redirect creation - [The National Archives](https://www.nationalarchives.gov.uk) diff --git a/docs/reference/contrib/redirects.rst b/docs/reference/contrib/redirects.rst index e017fcd10f..3b2fc44776 100644 --- a/docs/reference/contrib/redirects.rst +++ b/docs/reference/contrib/redirects.rst @@ -40,25 +40,6 @@ Page model recipe of to have redirects created automatically when changing a pag For an editor's guide to the interface, see :ref:`managing_redirects`. -Automatic redirect creation -=========================== - -.. versionadded:: 2.16 - -Wagtail comes with an option to automatically create permanent redirects for pages (and their descendants) when they are moved or have their slug changed. This feature is disabled by default, but you can enable it for your project by adding the following to your settings: - -.. code-block:: python - - WAGTAILREDIRECTS_AUTOCREATE = True - -Enabling this feature will help protect SEO rankings for your pages as they are moved or renamed, and can also be benefitial for sites that do some form of content caching, because site visitors clicking on page links in stale content should end up at the correct URL. - -Is this right for my project? ------------------------------ - -Wagtail's default implementation works best for small or medium-sized projects (5000 pages or fewer) that mostly use Wagtail's built-in methods for URL generation (Overrides to `Page.get_url_parts()` will be respected, but require additional database queries for each item where custom model fields are used to generate the return value). - -If your project does a lot of overriding of URL-related methods, or has sections with tens or hundreds of thousands of pages that you want to be able to move freely - the default implementation might not work so well. Management commands =================== diff --git a/docs/releases/2.16.md b/docs/releases/2.16.md index 835db71c8f..e28239443f 100644 --- a/docs/releases/2.16.md +++ b/docs/releases/2.16.md @@ -8,19 +8,6 @@ ## What's new -### Automatic redirect creation - -Wagtail projects using the `wagtail.contrib.redirects` app can now enable 'automatic redirect creation' - which creates redirects for pages and their descedants whenever a URL-impacting change is made; such as a slug being changed, or a page being moved to a different part of the tree. - -This feature should be benefitial to most 'standard' Wagtail projects and, in most cases, will have only a minor impact on responsiveness when making such changes. - -It can be enabled by adding the following to your project settings: - -```python -WAGTAILREDIRECTS_AUTOCREATE = True -``` - -Thank you to [The National Archives](https://www.nationalarchives.gov.uk) for kindly sponsoring this feature. ### Other features diff --git a/wagtail/contrib/redirects/apps.py b/wagtail/contrib/redirects/apps.py index 3328534c3b..d8e8080a54 100644 --- a/wagtail/contrib/redirects/apps.py +++ b/wagtail/contrib/redirects/apps.py @@ -1,5 +1,4 @@ from django.apps import AppConfig -from django.conf import settings from django.utils.translation import gettext_lazy as _ @@ -8,10 +7,3 @@ class WagtailRedirectsAppConfig(AppConfig): label = 'wagtailredirects' verbose_name = _("Wagtail redirects") default_auto_field = 'django.db.models.AutoField' - - def ready(self): - from wagtail.core.signals import page_url_path_changed - - from .signal_handlers import autocreate_redirects - if getattr(settings, "WAGTAILREDIRECTS_AUTOCREATE", False): - page_url_path_changed.connect(autocreate_redirects) diff --git a/wagtail/contrib/redirects/migrations/0007_add_autocreate_tracking_fields.py b/wagtail/contrib/redirects/migrations/0007_add_autocreate_tracking_fields.py deleted file mode 100644 index fe23b1abd8..0000000000 --- a/wagtail/contrib/redirects/migrations/0007_add_autocreate_tracking_fields.py +++ /dev/null @@ -1,35 +0,0 @@ -# Generated by Django 3.0.8 on 2021-12-11 17:49 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - dependencies = [ - ('contenttypes', '0002_remove_content_type_name'), - ('wagtailredirects', '0006_redirect_increase_max_length'), - ] - - operations = [ - migrations.AddField( - model_name='redirect', - name='automatically_created', - field=models.BooleanField(default=False, editable=False, verbose_name='automatically created'), - ), - migrations.AddField( - model_name='redirect', - name='created_at', - field=models.DateTimeField(auto_now=True, null=True, verbose_name='created at'), - ), - migrations.AddField( - model_name='redirect', - name='trigger_content_type', - field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to='contenttypes.ContentType'), - ), - migrations.AddField( - model_name='redirect', - name='trigger_id', - field=models.PositiveIntegerField(blank=True, db_index=True, null=True), - ), - ] diff --git a/wagtail/contrib/redirects/models.py b/wagtail/contrib/redirects/models.py index 27ed7454aa..c242c09030 100644 --- a/wagtail/contrib/redirects/models.py +++ b/wagtail/contrib/redirects/models.py @@ -1,7 +1,5 @@ from urllib.parse import urlparse -from django.contrib.contenttypes.fields import GenericForeignKey -from django.contrib.contenttypes.models import ContentType from django.db import models from django.utils.translation import gettext_lazy as _ @@ -29,25 +27,6 @@ class Redirect(models.Model): on_delete=models.CASCADE ) redirect_link = models.URLField(verbose_name=_("redirect to any URL"), blank=True, max_length=255) - automatically_created = models.BooleanField( - verbose_name=_("automatically created"), - default=False, - editable=False, - ) - created_at = models.DateTimeField(verbose_name=_("created at"), auto_now=True, null=True) - trigger_content_type = models.ForeignKey( - ContentType, - models.SET_NULL, - blank=True, - null=True, - related_name='+', - ) - trigger_id = models.PositiveIntegerField( - blank=True, - null=True, - db_index=True, - ) - trigger = GenericForeignKey("trigger_content_type", "trigger_id") @property def title(self): @@ -79,12 +58,11 @@ class Redirect(models.Model): return cls.objects.all() @staticmethod - def add_redirect(old_path, redirect_to=None, is_permanent=True, site=None, automatically_created=False): + def add_redirect(old_path, redirect_to=None, is_permanent=True): """ Create and save a Redirect instance with a single method. :param old_path: the path you wish to redirect - :param site: the Site (instance) the redirect is applicable to (if not all sites) :param redirect_to: a Page (instance) or path (string) where the redirect should point :param is_permanent: whether the redirect should be indicated as permanent (i.e. 301 redirect) :return: Redirect instance @@ -93,7 +71,6 @@ class Redirect(models.Model): # Set redirect properties from input parameters redirect.old_path = Redirect.normalise_path(old_path) - redirect.site = site # Check whether redirect to is string or Page if isinstance(redirect_to, Page): @@ -104,7 +81,6 @@ class Redirect(models.Model): redirect.redirect_link = redirect_to redirect.is_permanent = is_permanent - redirect.automatically_created = automatically_created redirect.save() diff --git a/wagtail/contrib/redirects/signal_handlers.py b/wagtail/contrib/redirects/signal_handlers.py deleted file mode 100644 index 8dc38c43dc..0000000000 --- a/wagtail/contrib/redirects/signal_handlers.py +++ /dev/null @@ -1,92 +0,0 @@ -import logging - -from typing import Optional - -from django.conf import settings -from django.db.models import Q - -from wagtail.core.models import BaseLogEntry, Page, PageBase -from wagtail.core.utils import BatchCreator, get_dummy_request - -from .models import Redirect - - -logger = logging.getLogger(__name__) - - -class BatchRedirectCreator(BatchCreator): - """ - A specialized ``BatchCreator`` class for saving ``Redirect`` objects. - """ - - model = Redirect - - def initialize_instance(self, kwargs): - # Automatically normalise paths when creating instances from kwargs - kwargs["old_path"] = Redirect.normalise_path(kwargs["old_path"]) - return super().initialize_instance(kwargs) - - def pre_process(self): - # delete any existing automatically-created redirects that might clash - # with the items in `self.items` - clashes_q = Q() - for item in self.items: - clashes_q |= Q(old_path=item.old_path, site_id=item.site_id) - Redirect.objects.filter(automatically_created=True).filter(clashes_q).delete() - - -def autocreate_redirects( - sender: PageBase, instance: Page, url_path_before: str, url_path_after: str, log_entry: Optional[BaseLogEntry] = None -) -> None: - if instance.is_site_root(): - # url_path changes for site root pages do not affect URLs generally, - # so we can exit early here. - return None - - logger.info(f"Creating redirects for page: '{instance}' id={instance.id}") - batch = BatchRedirectCreator( - max_size=getattr(settings, "WAGTAILREDIRECTS_AUTOCREATE_BATCH_SIZE", 2000), - ignore_conflicts=True, - ) - request = get_dummy_request() - - for page in ( - instance.get_descendants(inclusive=True) - .live() - .specific(defer=True) - .iterator() - ): - try: - new_site_id, _, new_url = page.get_url_parts(request) - except TypeError: - continue - - # Restore old 'url_path' value on in-memory instance - new_path_length = len(url_path_after) - original_url_path = page.url_path - page.url_path = url_path_before + original_url_path[new_path_length:] - try: - # Use modified page to retrieve old site and url - old_site_id, _, old_url = page.get_url_parts(request) - except TypeError: - continue - - if (old_site_id, old_url) != (new_site_id, new_url): - page_paths = [old_url] - page_paths.extend( - old_url.rstrip("/") + path for path in page.get_cached_paths() if path != "/" - ) - batch.extend( - dict( - old_path=path, - site_id=old_site_id, - redirect_page=page, - automatically_created=True, - trigger=log_entry - ) - for path in page_paths - ) - - # Process the final batch - batch.process() - logger.info(batch.get_summary()) diff --git a/wagtail/contrib/redirects/tests/test_signal_handlers.py b/wagtail/contrib/redirects/tests/test_signal_handlers.py deleted file mode 100644 index 0b86afc080..0000000000 --- a/wagtail/contrib/redirects/tests/test_signal_handlers.py +++ /dev/null @@ -1,200 +0,0 @@ -from django.contrib.auth import get_user_model -from django.test import TestCase - -from wagtail.contrib.redirects.models import Redirect -from wagtail.contrib.redirects.signal_handlers import autocreate_redirects -from wagtail.core.models import PageLogEntry, Site -from wagtail.core.utils import get_dummy_request -from wagtail.tests.testapp.models import EventIndex -from wagtail.tests.utils import WagtailTestUtils - - -User = get_user_model() - - -class TestAutocreateRedirects(TestCase, WagtailTestUtils): - fixtures = ["test.json"] - - @classmethod - def setUpTestData(cls): - cls.site = Site.objects.select_related("root_page").get(is_default_site=True) - cls.user = User.objects.first() - - def setUp(self): - self.root_page = self.site.root_page - self.event_index = EventIndex.objects.get() - - def test_redirects_created(self): - # the page we'll be triggering the change for here is... - test_subject = self.event_index - - # gather urls for this branch - request = get_dummy_request() - branch_urls = [] - for page in ( - test_subject.get_descendants(inclusive=True) - .live() - .specific(defer=True) - .iterator() - ): - main_url = page.get_url(request).rstrip("/") - branch_urls.extend( - main_url + path.rstrip("/") for path in page.get_cached_paths() - ) - - # make a change that affects the url_path - url_path_before = test_subject.url_path - test_subject.slug = "something-different" - test_subject.save(user=self.user, log_action="wagtail.publish") - log_entry = PageLogEntry.objects.last() - - # invoke the signal handler - autocreate_redirects( - sender=EventIndex, - instance=test_subject, - url_path_before=url_path_before, - url_path_after=test_subject.url_path, - log_entry=log_entry - ) - - # gather all of the redirects that were created - redirects = Redirect.objects.all() - redirect_page_ids = set(r.redirect_page_id for r in redirects) - - # a redirect should have been created for the page itself - self.assertIn(test_subject.id, redirect_page_ids) - - # as well as for each of its live descendants - for descendant in test_subject.get_descendants().live().iterator(): - self.assertIn(descendant.id, redirect_page_ids) - - # for each redirect created: - for r in redirects: - # the old_path accurately matches a url from this branch - self.assertIn(r.old_path, branch_urls) - # the automatically_created flag should have been set to True - self.assertTrue(r.automatically_created) - # the trigger GenericForeignKey returns the relevant log action - self.assertEqual(r.trigger, log_entry) - - def test_redirects_not_created_for_draft_pages(self): - # the page we'll be triggering the change for here is... - test_subject = self.event_index - - # but before we do, let's identify some 'draft-only' descendants - not_live = test_subject.get_descendants().not_live() - self.assertEqual(len(not_live), 4) - - # now, repeat the change from `test_redirects_created` to affect the `url_path`` - url_path_before = test_subject.url_path - test_subject.slug = "something-different" - test_subject.save(user=self.user, log_action="wagtail.publish") - - # invoke the signal handler - autocreate_redirects( - sender=EventIndex, - instance=test_subject, - url_path_before=url_path_before, - url_path_after=test_subject.url_path, - log_entry=PageLogEntry.objects.last() - ) - - # gather all of the redirects that were created - redirects_by_page_id = { - obj.redirect_page_id: obj for obj in Redirect.objects.all() - } - - # a redirect should have been created for the page itself - self.assertIn(test_subject.id, redirects_by_page_id) - - # and each of the 'live' descendants - for descendant in test_subject.get_descendants().live().iterator(): - self.assertIn(descendant.id, redirects_by_page_id) - - # but, not for the draft pages - for page in not_live: - self.assertNotIn(page.id, redirects_by_page_id) - - def test_handling_of_existing_redirects(self): - # the page we'll be triggering the change for here is... - test_subject = self.event_index - - descendants = test_subject.get_descendants().live() - - # but before we do, let's add some redirects that we'll expect to conflict - # with ones created by the signal handler - redirect1 = Redirect.objects.create( - old_path=Redirect.normalise_path(descendants.first().specific.url), - site=self.site, - redirect_link="/some-place", - automatically_created=False, - ) - redirect2 = Redirect.objects.create( - old_path=Redirect.normalise_path(descendants.last().specific.url), - site=self.site, - redirect_link="/some-other-place", - automatically_created=True, - ) - - # now, repeat the change from `test_redirects_created` to affect the `url_path`` - url_path_before = test_subject.url_path - test_subject.slug = "something-different" - test_subject.save(user=self.user, log_action="wagtail.publish") - - # invoke the signal handler - autocreate_redirects( - sender=EventIndex, - instance=test_subject, - url_path_before=url_path_before, - url_path_after=test_subject.url_path, - log_entry=PageLogEntry.objects.last() - ) - - # pre-existing manually-created redirects should be preserved - from_db = Redirect.objects.get(id=redirect1.id) - self.assertEqual( - ( - redirect1.old_path, - redirect1.site_id, - redirect1.is_permanent, - redirect1.redirect_link, - redirect1.redirect_page, - ), - ( - from_db.old_path, - from_db.site_id, - from_db.is_permanent, - from_db.redirect_link, - from_db.redirect_page, - ), - ) - - # pre-existing automatically-created redirects should be replaced completely - self.assertFalse(Redirect.objects.filter(pk=redirect2.pk).exists()) - self.assertTrue( - Redirect.objects.filter( - old_path=redirect2.old_path, - site_id=redirect2.site_id, - ).exists() - ) - - def test_no_redirects_created_for_site_root(self): - # the page we'll be triggering the change for here is... - test_subject = self.root_page - - # make a change that affects the url_path - url_path_before = test_subject.url_path - test_subject.slug = "something-different" - test_subject.save(user=self.user, log_action="wagtail.publish") - - # invoke the signal handler - autocreate_redirects( - sender=type(test_subject), - instance=test_subject, - url_path_before=url_path_before, - url_path_after=test_subject.url_path, - log_entry=PageLogEntry.objects.last() - ) - - # test that no redirects were created - self.assertFalse(Redirect.objects.all().exists()) diff --git a/wagtail/core/utils.py b/wagtail/core/utils.py index f3b8fa2571..34ba99d161 100644 --- a/wagtail/core/utils.py +++ b/wagtail/core/utils.py @@ -1,11 +1,8 @@ import functools import inspect -import logging import re import unicodedata -from typing import TYPE_CHECKING, Any, Dict, Iterable, Union - from anyascii import anyascii from django.apps import apps from django.conf import settings @@ -13,19 +10,12 @@ from django.conf.locale import LANG_INFO from django.core.exceptions import ImproperlyConfigured, SuspiciousOperation from django.core.signals import setting_changed from django.db.models import Model -from django.db.models.base import ModelBase from django.dispatch import receiver -from django.http import HttpRequest from django.utils.encoding import force_str from django.utils.text import slugify from django.utils.translation import check_for_language, get_supported_language_variant -if TYPE_CHECKING: - from wagtail.core.models import Site - -logger = logging.getLogger(__name__) - WAGTAIL_APPEND_SLASH = getattr(settings, 'WAGTAIL_APPEND_SLASH', True) @@ -366,145 +356,3 @@ def multigetattr(item, accessor): current = current() return current - - -def get_dummy_request(path: str = "/", site: 'Site' = None) -> HttpRequest: - """ - Return a simple ``HttpRequest`` instance that can be passed to - ``Page.get_url()`` and other methods to benefit from improved performance - when no real ``HttpRequest`` instance is available. - - If ``site`` is provided, the ``HttpRequest`` is made to look like it came - from that Wagtail ``Site``. - """ - request = HttpRequest() - request.path = path - request.method = "GET" - SERVER_PORT = 80 - if site: - SERVER_NAME = site.hostname - if site.port not in [80, 443]: - SERVER_NAME += f":{site.port}" - SERVER_PORT = site.port - if settings.ALLOWED_HOSTS == ["*"]: - SERVER_NAME = "example.com" - else: - SERVER_NAME = settings.ALLOWED_HOSTS[0] - request.META = {"SERVER_NAME": SERVER_NAME, "SERVER_PORT": SERVER_PORT} - return request - - -class BatchProcessor: - """ - A class to help with processing of an unknown (and potentially very - high) number of objects. - - Just set ``max_size`` to the maximum number of instances you want - to be held in memory at any one time, and batches will be sent to the - ``process()`` method as that number is reached, without you having to - invoke ``process()`` regularly yourself. Just remember to invoke - ``process()`` when you're done adding items, otherwise the final batch - of objects will not be processed. - """ - def __init__(self, max_size: int): - self.max_size = max_size - self.items = [] - self.added_count = 0 - - def __len__(self): - return self.added_count - - def add(self, item: Any) -> None: - self.items.append(item) - self.added_count += 1 - if self.max_size and len(self.items) == self.max_size: - self.process() - - def extend(self, iterable: Iterable[Any]) -> None: - for item in iterable: - self.add(item) - - def process(self): - self.pre_process() - self._do_processing() - self.post_process() - self.items.clear() - - def pre_process(self): - """ - A hook to allow subclasses to do any pre-processing of the data - before the ``process()`` method is called. - """ - pass - - def _do_processing(self): - """ - To be overridden by subclasses to do whatever it is - that needs to be done to the items in ``self.items``. - """ - raise NotImplementedError - - def post_process(self): - """ - A hook to allow subclasses to do any post-processing - after the ``process()`` method is called, and before - ``self.items`` is cleared - """ - pass - - -class BatchCreator(BatchProcessor): - """ - A class to help with bulk creation of an unknown (and potentially very - high) number of model instances. - - Just set ``max_size`` to the maximum number of instances you want - to be held in memory at any one time, and batches of objects will - be created as that number is reached, without you having to invoke - the ``process()`` method regularly yourself. Just remember to - invoke ``process()`` when you're done adding items, to ensure - that the final batch items is saved. - - ``BatchSaver`` is migration-friendly! Just use the ``model`` - keyword argument when initializing to override the hardcoded model - class with the version from your migration. - """ - - model: ModelBase = None - - def __init__(self, max_size: int, *, model: ModelBase = None, ignore_conflicts=False): - super().__init__(max_size) - self.ignore_conflicts = ignore_conflicts - self.created_count = 0 - if model is not None: - self.model = model - - def initialize_instance(self, kwargs): - return self.model(**kwargs) - - def add(self, *, instance: Model = None, **kwargs) -> None: - if instance is None: - instance = self.initialize_instance(kwargs) - self.items.append(instance) - self.added_count += 1 - if self.max_size and len(self.items) == self.max_size: - self.process() - - def extend(self, iterable: Iterable[Union[Model, Dict[str, Any]]]) -> None: - for value in iterable: - if isinstance(value, self.model): - self.add(instance=value) - else: - self.add(**value) - - def _do_processing(self): - """ - Use bulk_create() to save ``self.items``. - """ - if not self.items: - return None - self.created_count += len(self.model.objects.bulk_create(self.items, ignore_conflicts=self.ignore_conflicts)) - - def get_summary(self): - opts = self.model._meta - return f"{self.created_count}/{self.added_count} {opts.verbose_name_plural} were created successfully."