Extend treebeard's fix_tree method with the ability to non-destructively fix path issues and add a --full option to apply path fixes

pull/6273/merge
Matt Westcott 2020-06-29 16:32:47 +01:00 zatwierdzone przez Andy Babic
rodzic 10d68fdd09
commit 8f8f2e12b7
4 zmienionych plików z 165 dodań i 33 usunięć

Wyświetl plik

@ -5,7 +5,7 @@ from django.core.management.base import BaseCommand
from django.db import models
from django.db.models import Q
from wagtail.core.models import Page
from wagtail.core.models import Collection, Page
class Command(BaseCommand):
@ -16,6 +16,9 @@ class Command(BaseCommand):
parser.add_argument(
'--noinput', action='store_false', dest='interactive', default=True,
help='If provided, any fixes requiring user interaction will be skipped.')
parser.add_argument(
'--full', action='store_true', dest='full', default=False,
help='If provided, uses a more thorough but slower method that also fixes path ordering issues.')
def numberlist_to_string(self, numberlist):
# Converts a list of numbers into a string
@ -23,33 +26,35 @@ class Command(BaseCommand):
return '[' + ', '.join(map(str, numberlist)) + ']'
def handle(self, **options):
any_problems_fixed = False
any_page_problems_fixed = False
for page in Page.objects.all():
try:
page.specific
except page.specific_class.DoesNotExist:
self.stdout.write("Page %d (%s) is missing a subclass record; deleting." % (page.id, page.title))
any_problems_fixed = True
any_page_problems_fixed = True
page.delete()
(bad_alpha, bad_path, orphans, bad_depth, bad_numchild) = Page.find_problems()
self.handle_model(Page, 'page', 'pages', any_page_problems_fixed, options)
self.handle_model(Collection, 'collection', 'collections', False, options)
def handle_model(self, model, model_name, model_name_plural, any_problems_fixed, options):
fix_paths = options.get('full', False)
self.stdout.write("Checking %s tree for problems..." % model_name)
(bad_alpha, bad_path, orphans, bad_depth, bad_numchild) = model.find_problems()
if bad_depth:
self.stdout.write("Incorrect depth value found for pages: %s" % self.numberlist_to_string(bad_depth))
self.stdout.write("Incorrect depth value found for %s: %s" % (model_name_plural, self.numberlist_to_string(bad_depth)))
if bad_numchild:
self.stdout.write("Incorrect numchild value found for pages: %s" % self.numberlist_to_string(bad_numchild))
if bad_depth or bad_numchild:
Page.fix_tree(destructive=False)
any_problems_fixed = True
self.stdout.write("Incorrect numchild value found for %s: %s" % (model_name_plural, self.numberlist_to_string(bad_numchild)))
if orphans:
# The 'orphans' list as returned by treebeard only includes pages that are
# The 'orphans' list as returned by treebeard only includes nodes that are
# missing an immediate parent; descendants of orphans are not included.
# Deleting only the *actual* orphans is a bit silly (since it'll just create
# more orphans), so generate a queryset that contains descendants as well.
orphan_paths = Page.objects.filter(id__in=orphans).values_list('path', flat=True)
orphan_paths = model.objects.filter(id__in=orphans).values_list('path', flat=True)
filter_conditions = []
for path in orphan_paths:
filter_conditions.append(Q(path__startswith=path))
@ -57,19 +62,19 @@ class Command(BaseCommand):
# combine filter_conditions into a single ORed condition
final_filter = functools.reduce(operator.or_, filter_conditions)
# build a queryset of all pages to be removed; this must be a vanilla Django
# build a queryset of all nodes to be removed; this must be a vanilla Django
# queryset rather than a treebeard MP_NodeQuerySet, so that we bypass treebeard's
# custom delete() logic that would trip up on the very same corruption that we're
# trying to fix here.
pages_to_delete = models.query.QuerySet(Page).filter(final_filter)
nodes_to_delete = models.query.QuerySet(model).filter(final_filter)
self.stdout.write("Orphaned pages found:")
for page in pages_to_delete:
self.stdout.write("ID %d: %s" % (page.id, page.title))
self.stdout.write("Orphaned %s found:" % model_name_plural)
for node in nodes_to_delete:
self.stdout.write("ID %d: %s" % (node.id, node))
self.stdout.write('')
if options.get('interactive', True):
yes_or_no = input("Delete these pages? [y/N] ")
yes_or_no = input("Delete these %s? [y/N] " % model_name_plural)
delete_orphans = yes_or_no.lower().startswith('y')
self.stdout.write('')
else:
@ -77,35 +82,42 @@ class Command(BaseCommand):
delete_orphans = options.get('delete_orphans', False)
if delete_orphans:
deletion_count = len(pages_to_delete)
pages_to_delete.delete()
deletion_count = len(nodes_to_delete)
nodes_to_delete.delete()
self.stdout.write(
"%d orphaned page%s deleted." % (deletion_count, "s" if deletion_count != 1 else "")
"%d orphaned %s deleted." % (deletion_count, model_name_plural if deletion_count != 1 else model_name)
)
any_problems_fixed = True
# fix_paths will fix problems not identified by find_problems, so if that option has been
# passed, run it regardless (and set any_problems_fixed=True, since we don't have a way to
# test whether anything was actually fixed in that process)
if bad_depth or bad_numchild or fix_paths:
model.fix_tree(destructive=False, fix_paths=fix_paths)
any_problems_fixed = True
if any_problems_fixed:
# re-run find_problems to see if any new ones have surfaced
(bad_alpha, bad_path, orphans, bad_depth, bad_numchild) = Page.find_problems()
(bad_alpha, bad_path, orphans, bad_depth, bad_numchild) = model.find_problems()
if any((bad_alpha, bad_path, orphans, bad_depth, bad_numchild)):
self.stdout.write("Remaining problems (cannot fix automatically):")
if bad_alpha:
self.stdout.write(
"Invalid characters found in path for pages: %s" % self.numberlist_to_string(bad_alpha)
"Invalid characters found in path for %s: %s" % (model_name_plural, self.numberlist_to_string(bad_alpha))
)
if bad_path:
self.stdout.write("Invalid path length found for pages: %s" % self.numberlist_to_string(bad_path))
self.stdout.write("Invalid path length found for %s: %s" % (model_name_plural, self.numberlist_to_string(bad_path)))
if orphans:
self.stdout.write("Orphaned pages found: %s" % self.numberlist_to_string(orphans))
self.stdout.write("Orphaned %s found: %s" % (model_name_plural, self.numberlist_to_string(orphans)))
if bad_depth:
self.stdout.write("Incorrect depth value found for pages: %s" % self.numberlist_to_string(bad_depth))
self.stdout.write("Incorrect depth value found for %s: %s" % (model_name_plural, self.numberlist_to_string(bad_depth)))
if bad_numchild:
self.stdout.write(
"Incorrect numchild value found for pages: %s" % self.numberlist_to_string(bad_numchild)
"Incorrect numchild value found for %s: %s" % (model_name_plural, self.numberlist_to_string(bad_numchild))
)
elif any_problems_fixed:
self.stdout.write("All problems fixed.")
self.stdout.write("All problems fixed.\n\n")
else:
self.stdout.write("No problems found.")
self.stdout.write("No problems found.\n\n")

Wyświetl plik

@ -39,6 +39,7 @@ from wagtail.core.signals import (
task_approved, task_cancelled, task_rejected, task_submitted,
workflow_approved, workflow_cancelled, workflow_rejected, workflow_submitted)
from wagtail.core.sites import get_site_for_hostname
from wagtail.core.treebeard import TreebeardPathFixMixin
from wagtail.core.url_routing import RouteResult
from wagtail.core.utils import WAGTAIL_APPEND_SLASH, camelcase_to_underscore, resolve_model_string
from wagtail.search import index
@ -372,7 +373,7 @@ class PageBase(models.base.ModelBase):
PAGE_MODEL_CLASSES.append(cls)
class AbstractPage(MP_Node):
class AbstractPage(TreebeardPathFixMixin, MP_Node):
"""
Abstract superclass for Page. According to Django's inheritance rules, managers set on
abstract models are inherited by subclasses, but managers set on concrete models that are extended
@ -2730,7 +2731,7 @@ class CollectionViewRestriction(BaseViewRestriction):
verbose_name_plural = _('collection view restrictions')
class Collection(MP_Node):
class Collection(TreebeardPathFixMixin, MP_Node):
"""
A location in which resources such as images and documents can be grouped
"""

Wyświetl plik

@ -8,7 +8,7 @@ from django.db import models
from django.test import TestCase
from django.utils import timezone
from wagtail.core.models import Page, PageLogEntry, PageRevision
from wagtail.core.models import Collection, Page, PageLogEntry, PageRevision
from wagtail.core.signals import page_published, page_unpublished
from wagtail.tests.testapp.models import EventPage, SimplePage
@ -57,14 +57,21 @@ class TestFixTreeCommand(TestCase):
homepage.depth = 12345
homepage.save()
# also break the root collection's depth
root_collection = Collection.get_first_root_node()
root_collection.depth = 42
root_collection.save()
# Check that its broken
self.assertEqual(Page.objects.get(url_path='/home/').depth, 12345)
self.assertEqual(Collection.objects.get(id=root_collection.id).depth, 42)
# Call command
self.run_command()
# Check if its fixed
self.assertEqual(Page.objects.get(url_path='/home/').depth, old_depth)
self.assertEqual(Collection.objects.get(id=root_collection.id).depth, 1)
def test_detects_orphans(self):
events_index = Page.objects.get(url_path='/home/events/')
@ -111,6 +118,16 @@ class TestFixTreeCommand(TestCase):
# Check that christmas_page has been deleted
self.assertFalse(Page.objects.filter(id=christmas_page.id).exists())
def test_remove_path_holes(self):
events_index = Page.objects.get(url_path='/home/events/')
# Delete the event page in path position 0001
Page.objects.get(path=events_index.path + '0001').delete()
self.run_command(full=True)
# the gap at position 0001 should have been closed
events_index = Page.objects.get(url_path='/home/events/')
self.assertTrue(Page.objects.filter(path=events_index.path + '0001').exists())
class TestMovePagesCommand(TestCase):
fixtures = ['test.json']

Wyświetl plik

@ -0,0 +1,102 @@
"""Helper functions to support django-treebeard"""
from django.db import transaction
from django.db.models import Value
from django.db.models.functions import Concat, Substr
from treebeard.exceptions import PathOverflow
class TreebeardPathFixMixin:
"""
Extends the fix_tree method with a `fix_paths` option that non-destructively fixes holes in
path sequences, and applies the node_order_by ordering, if specified.
Taken from https://github.com/django-treebeard/django-treebeard/pull/165 - can be removed
if / when that PR is included in a django-treebeard release.
"""
@classmethod
def fix_tree(cls, fix_paths=False, **kwargs):
super().fix_tree(**kwargs)
if fix_paths:
with transaction.atomic():
# To fix holes and mis-orderings in paths, we consider each non-leaf node in turn
# and ensure that its children's path values are consecutive (and in the order
# given by node_order_by, if applicable). children_to_fix is a queue of child sets
# that we know about but have not yet fixed, expressed as a tuple of
# (parent_path, depth). Since we're updating paths as we go, we must take care to
# only add items to this list after the corresponding parent node has been fixed
# (and is thus not going to change).
# Initially children_to_fix is the set of root nodes, i.e. ones with a path
# starting with '' and depth 1.
children_to_fix = [('', 1)]
while children_to_fix:
parent_path, depth = children_to_fix.pop(0)
children = cls.objects.filter(
path__startswith=parent_path, depth=depth
).values('pk', 'path', 'depth', 'numchild')
desired_sequence = children.order_by(*(cls.node_order_by or ['path']))
# mapping of current path position (converted to numeric) to item
actual_sequence = {}
# highest numeric path position currently in use
max_position = None
# loop over items to populate actual_sequence and max_position
for item in desired_sequence:
actual_position = cls._str2int(item['path'][-cls.steplen:])
actual_sequence[actual_position] = item
if max_position is None or actual_position > max_position:
max_position = actual_position
# loop over items to perform path adjustments
for (i, item) in enumerate(desired_sequence):
desired_position = i + 1 # positions are 1-indexed
actual_position = cls._str2int(item['path'][-cls.steplen:])
if actual_position == desired_position:
pass
else:
# if a node is already in the desired position, move that node
# to max_position + 1 to get it out of the way
occupant = actual_sequence.get(desired_position)
if occupant:
old_path = occupant['path']
max_position += 1
new_path = cls._get_path(parent_path, depth, max_position)
if len(new_path) > len(old_path):
previous_max_path = cls._get_path(parent_path, depth, max_position - 1)
raise PathOverflow("Path Overflow from: '%s'" % (previous_max_path, ))
cls._rewrite_node_path(old_path, new_path)
# update actual_sequence to reflect the new position
actual_sequence[max_position] = occupant
del(actual_sequence[desired_position])
occupant['path'] = new_path
# move item into the (now vacated) desired position
old_path = item['path']
new_path = cls._get_path(parent_path, depth, desired_position)
cls._rewrite_node_path(old_path, new_path)
# update actual_sequence to reflect the new position
actual_sequence[desired_position] = item
del(actual_sequence[actual_position])
item['path'] = new_path
if item['numchild']:
# this item has children to process, and we have now moved the parent
# node into its final position, so it's safe to add to children_to_fix
children_to_fix.append((item['path'], depth + 1))
@classmethod
def _rewrite_node_path(cls, old_path, new_path):
cls.objects.filter(path__startswith=old_path).update(
path=Concat(
Value(new_path),
Substr('path', len(old_path) + 1)
)
)