diff --git a/wagtail/core/management/commands/fixtree.py b/wagtail/core/management/commands/fixtree.py index b878c71fae..f4c8b555bd 100644 --- a/wagtail/core/management/commands/fixtree.py +++ b/wagtail/core/management/commands/fixtree.py @@ -5,7 +5,7 @@ from django.core.management.base import BaseCommand from django.db import models from django.db.models import Q -from wagtail.core.models import Page +from wagtail.core.models import Collection, Page class Command(BaseCommand): @@ -16,6 +16,9 @@ class Command(BaseCommand): parser.add_argument( '--noinput', action='store_false', dest='interactive', default=True, help='If provided, any fixes requiring user interaction will be skipped.') + parser.add_argument( + '--full', action='store_true', dest='full', default=False, + help='If provided, uses a more thorough but slower method that also fixes path ordering issues.') def numberlist_to_string(self, numberlist): # Converts a list of numbers into a string @@ -23,33 +26,35 @@ class Command(BaseCommand): return '[' + ', '.join(map(str, numberlist)) + ']' def handle(self, **options): - any_problems_fixed = False - + any_page_problems_fixed = False for page in Page.objects.all(): try: page.specific except page.specific_class.DoesNotExist: self.stdout.write("Page %d (%s) is missing a subclass record; deleting." % (page.id, page.title)) - any_problems_fixed = True + any_page_problems_fixed = True page.delete() - (bad_alpha, bad_path, orphans, bad_depth, bad_numchild) = Page.find_problems() + self.handle_model(Page, 'page', 'pages', any_page_problems_fixed, options) + self.handle_model(Collection, 'collection', 'collections', False, options) + + def handle_model(self, model, model_name, model_name_plural, any_problems_fixed, options): + fix_paths = options.get('full', False) + + self.stdout.write("Checking %s tree for problems..." % model_name) + (bad_alpha, bad_path, orphans, bad_depth, bad_numchild) = model.find_problems() if bad_depth: - self.stdout.write("Incorrect depth value found for pages: %s" % self.numberlist_to_string(bad_depth)) + self.stdout.write("Incorrect depth value found for %s: %s" % (model_name_plural, self.numberlist_to_string(bad_depth))) if bad_numchild: - self.stdout.write("Incorrect numchild value found for pages: %s" % self.numberlist_to_string(bad_numchild)) - - if bad_depth or bad_numchild: - Page.fix_tree(destructive=False) - any_problems_fixed = True + self.stdout.write("Incorrect numchild value found for %s: %s" % (model_name_plural, self.numberlist_to_string(bad_numchild))) if orphans: - # The 'orphans' list as returned by treebeard only includes pages that are + # The 'orphans' list as returned by treebeard only includes nodes that are # missing an immediate parent; descendants of orphans are not included. # Deleting only the *actual* orphans is a bit silly (since it'll just create # more orphans), so generate a queryset that contains descendants as well. - orphan_paths = Page.objects.filter(id__in=orphans).values_list('path', flat=True) + orphan_paths = model.objects.filter(id__in=orphans).values_list('path', flat=True) filter_conditions = [] for path in orphan_paths: filter_conditions.append(Q(path__startswith=path)) @@ -57,19 +62,19 @@ class Command(BaseCommand): # combine filter_conditions into a single ORed condition final_filter = functools.reduce(operator.or_, filter_conditions) - # build a queryset of all pages to be removed; this must be a vanilla Django + # build a queryset of all nodes to be removed; this must be a vanilla Django # queryset rather than a treebeard MP_NodeQuerySet, so that we bypass treebeard's # custom delete() logic that would trip up on the very same corruption that we're # trying to fix here. - pages_to_delete = models.query.QuerySet(Page).filter(final_filter) + nodes_to_delete = models.query.QuerySet(model).filter(final_filter) - self.stdout.write("Orphaned pages found:") - for page in pages_to_delete: - self.stdout.write("ID %d: %s" % (page.id, page.title)) + self.stdout.write("Orphaned %s found:" % model_name_plural) + for node in nodes_to_delete: + self.stdout.write("ID %d: %s" % (node.id, node)) self.stdout.write('') if options.get('interactive', True): - yes_or_no = input("Delete these pages? [y/N] ") + yes_or_no = input("Delete these %s? [y/N] " % model_name_plural) delete_orphans = yes_or_no.lower().startswith('y') self.stdout.write('') else: @@ -77,35 +82,42 @@ class Command(BaseCommand): delete_orphans = options.get('delete_orphans', False) if delete_orphans: - deletion_count = len(pages_to_delete) - pages_to_delete.delete() + deletion_count = len(nodes_to_delete) + nodes_to_delete.delete() self.stdout.write( - "%d orphaned page%s deleted." % (deletion_count, "s" if deletion_count != 1 else "") + "%d orphaned %s deleted." % (deletion_count, model_name_plural if deletion_count != 1 else model_name) ) any_problems_fixed = True + # fix_paths will fix problems not identified by find_problems, so if that option has been + # passed, run it regardless (and set any_problems_fixed=True, since we don't have a way to + # test whether anything was actually fixed in that process) + if bad_depth or bad_numchild or fix_paths: + model.fix_tree(destructive=False, fix_paths=fix_paths) + any_problems_fixed = True + if any_problems_fixed: # re-run find_problems to see if any new ones have surfaced - (bad_alpha, bad_path, orphans, bad_depth, bad_numchild) = Page.find_problems() + (bad_alpha, bad_path, orphans, bad_depth, bad_numchild) = model.find_problems() if any((bad_alpha, bad_path, orphans, bad_depth, bad_numchild)): self.stdout.write("Remaining problems (cannot fix automatically):") if bad_alpha: self.stdout.write( - "Invalid characters found in path for pages: %s" % self.numberlist_to_string(bad_alpha) + "Invalid characters found in path for %s: %s" % (model_name_plural, self.numberlist_to_string(bad_alpha)) ) if bad_path: - self.stdout.write("Invalid path length found for pages: %s" % self.numberlist_to_string(bad_path)) + self.stdout.write("Invalid path length found for %s: %s" % (model_name_plural, self.numberlist_to_string(bad_path))) if orphans: - self.stdout.write("Orphaned pages found: %s" % self.numberlist_to_string(orphans)) + self.stdout.write("Orphaned %s found: %s" % (model_name_plural, self.numberlist_to_string(orphans))) if bad_depth: - self.stdout.write("Incorrect depth value found for pages: %s" % self.numberlist_to_string(bad_depth)) + self.stdout.write("Incorrect depth value found for %s: %s" % (model_name_plural, self.numberlist_to_string(bad_depth))) if bad_numchild: self.stdout.write( - "Incorrect numchild value found for pages: %s" % self.numberlist_to_string(bad_numchild) + "Incorrect numchild value found for %s: %s" % (model_name_plural, self.numberlist_to_string(bad_numchild)) ) elif any_problems_fixed: - self.stdout.write("All problems fixed.") + self.stdout.write("All problems fixed.\n\n") else: - self.stdout.write("No problems found.") + self.stdout.write("No problems found.\n\n") diff --git a/wagtail/core/models.py b/wagtail/core/models.py index 531f562b1b..9f4e48aca0 100644 --- a/wagtail/core/models.py +++ b/wagtail/core/models.py @@ -39,6 +39,7 @@ from wagtail.core.signals import ( task_approved, task_cancelled, task_rejected, task_submitted, workflow_approved, workflow_cancelled, workflow_rejected, workflow_submitted) from wagtail.core.sites import get_site_for_hostname +from wagtail.core.treebeard import TreebeardPathFixMixin from wagtail.core.url_routing import RouteResult from wagtail.core.utils import WAGTAIL_APPEND_SLASH, camelcase_to_underscore, resolve_model_string from wagtail.search import index @@ -372,7 +373,7 @@ class PageBase(models.base.ModelBase): PAGE_MODEL_CLASSES.append(cls) -class AbstractPage(MP_Node): +class AbstractPage(TreebeardPathFixMixin, MP_Node): """ Abstract superclass for Page. According to Django's inheritance rules, managers set on abstract models are inherited by subclasses, but managers set on concrete models that are extended @@ -2730,7 +2731,7 @@ class CollectionViewRestriction(BaseViewRestriction): verbose_name_plural = _('collection view restrictions') -class Collection(MP_Node): +class Collection(TreebeardPathFixMixin, MP_Node): """ A location in which resources such as images and documents can be grouped """ diff --git a/wagtail/core/tests/test_management_commands.py b/wagtail/core/tests/test_management_commands.py index 3b4117b63d..8f21bbe18d 100644 --- a/wagtail/core/tests/test_management_commands.py +++ b/wagtail/core/tests/test_management_commands.py @@ -8,7 +8,7 @@ from django.db import models from django.test import TestCase from django.utils import timezone -from wagtail.core.models import Page, PageLogEntry, PageRevision +from wagtail.core.models import Collection, Page, PageLogEntry, PageRevision from wagtail.core.signals import page_published, page_unpublished from wagtail.tests.testapp.models import EventPage, SimplePage @@ -57,14 +57,21 @@ class TestFixTreeCommand(TestCase): homepage.depth = 12345 homepage.save() + # also break the root collection's depth + root_collection = Collection.get_first_root_node() + root_collection.depth = 42 + root_collection.save() + # Check that its broken self.assertEqual(Page.objects.get(url_path='/home/').depth, 12345) + self.assertEqual(Collection.objects.get(id=root_collection.id).depth, 42) # Call command self.run_command() # Check if its fixed self.assertEqual(Page.objects.get(url_path='/home/').depth, old_depth) + self.assertEqual(Collection.objects.get(id=root_collection.id).depth, 1) def test_detects_orphans(self): events_index = Page.objects.get(url_path='/home/events/') @@ -111,6 +118,16 @@ class TestFixTreeCommand(TestCase): # Check that christmas_page has been deleted self.assertFalse(Page.objects.filter(id=christmas_page.id).exists()) + def test_remove_path_holes(self): + events_index = Page.objects.get(url_path='/home/events/') + # Delete the event page in path position 0001 + Page.objects.get(path=events_index.path + '0001').delete() + + self.run_command(full=True) + # the gap at position 0001 should have been closed + events_index = Page.objects.get(url_path='/home/events/') + self.assertTrue(Page.objects.filter(path=events_index.path + '0001').exists()) + class TestMovePagesCommand(TestCase): fixtures = ['test.json'] diff --git a/wagtail/core/treebeard.py b/wagtail/core/treebeard.py new file mode 100644 index 0000000000..36a2ed5a00 --- /dev/null +++ b/wagtail/core/treebeard.py @@ -0,0 +1,102 @@ +"""Helper functions to support django-treebeard""" + +from django.db import transaction +from django.db.models import Value +from django.db.models.functions import Concat, Substr +from treebeard.exceptions import PathOverflow + + +class TreebeardPathFixMixin: + """ + Extends the fix_tree method with a `fix_paths` option that non-destructively fixes holes in + path sequences, and applies the node_order_by ordering, if specified. + Taken from https://github.com/django-treebeard/django-treebeard/pull/165 - can be removed + if / when that PR is included in a django-treebeard release. + """ + + @classmethod + def fix_tree(cls, fix_paths=False, **kwargs): + super().fix_tree(**kwargs) + + if fix_paths: + with transaction.atomic(): + # To fix holes and mis-orderings in paths, we consider each non-leaf node in turn + # and ensure that its children's path values are consecutive (and in the order + # given by node_order_by, if applicable). children_to_fix is a queue of child sets + # that we know about but have not yet fixed, expressed as a tuple of + # (parent_path, depth). Since we're updating paths as we go, we must take care to + # only add items to this list after the corresponding parent node has been fixed + # (and is thus not going to change). + + # Initially children_to_fix is the set of root nodes, i.e. ones with a path + # starting with '' and depth 1. + children_to_fix = [('', 1)] + + while children_to_fix: + parent_path, depth = children_to_fix.pop(0) + + children = cls.objects.filter( + path__startswith=parent_path, depth=depth + ).values('pk', 'path', 'depth', 'numchild') + + desired_sequence = children.order_by(*(cls.node_order_by or ['path'])) + + # mapping of current path position (converted to numeric) to item + actual_sequence = {} + + # highest numeric path position currently in use + max_position = None + + # loop over items to populate actual_sequence and max_position + for item in desired_sequence: + actual_position = cls._str2int(item['path'][-cls.steplen:]) + actual_sequence[actual_position] = item + if max_position is None or actual_position > max_position: + max_position = actual_position + + # loop over items to perform path adjustments + for (i, item) in enumerate(desired_sequence): + desired_position = i + 1 # positions are 1-indexed + actual_position = cls._str2int(item['path'][-cls.steplen:]) + if actual_position == desired_position: + pass + else: + # if a node is already in the desired position, move that node + # to max_position + 1 to get it out of the way + occupant = actual_sequence.get(desired_position) + if occupant: + old_path = occupant['path'] + max_position += 1 + new_path = cls._get_path(parent_path, depth, max_position) + if len(new_path) > len(old_path): + previous_max_path = cls._get_path(parent_path, depth, max_position - 1) + raise PathOverflow("Path Overflow from: '%s'" % (previous_max_path, )) + + cls._rewrite_node_path(old_path, new_path) + # update actual_sequence to reflect the new position + actual_sequence[max_position] = occupant + del(actual_sequence[desired_position]) + occupant['path'] = new_path + + # move item into the (now vacated) desired position + old_path = item['path'] + new_path = cls._get_path(parent_path, depth, desired_position) + cls._rewrite_node_path(old_path, new_path) + # update actual_sequence to reflect the new position + actual_sequence[desired_position] = item + del(actual_sequence[actual_position]) + item['path'] = new_path + + if item['numchild']: + # this item has children to process, and we have now moved the parent + # node into its final position, so it's safe to add to children_to_fix + children_to_fix.append((item['path'], depth + 1)) + + @classmethod + def _rewrite_node_path(cls, old_path, new_path): + cls.objects.filter(path__startswith=old_path).update( + path=Concat( + Value(new_path), + Substr('path', len(old_path) + 1) + ) + )