Initial work on comparing revisions

pull/3275/merge
Karl Hobley 2016-11-21 09:42:54 +00:00 zatwierdzone przez Matt Westcott
rodzic e771fefa5f
commit 3cb578fa5f
8 zmienionych plików z 622 dodań i 4 usunięć

Wyświetl plik

@ -0,0 +1,382 @@
from __future__ import absolute_import, unicode_literals
import difflib
from bs4 import BeautifulSoup
from django.utils.encoding import force_text
from django.utils.html import escape
from django.utils.safestring import mark_safe
from django.utils.text import capfirst
class FieldComparison:
is_field = True
is_child_relation = False
def __init__(self, field, val_a, val_b):
self.field = field
self.val_a = val_a
self.val_b = val_b
def field_label(self):
"""
Returns a label for this field to be displayed to the user
"""
verbose_name = getattr(self.field, 'verbose_name', None)
if verbose_name is None:
# Relations don't have a verbose_name
verbose_name = self.field.name.replace('_', ' ')
return capfirst(verbose_name)
def htmldiff(self):
if self.val_a != self.val_b:
return TextDiff([('deletion', self.val_a), ('addition', self.val_b)]).to_html()
else:
return self.val_a
def has_changed(self):
"""
Returns True if the field has changed
"""
return self.val_a != self.val_b
class TextFieldComparison(FieldComparison):
def htmldiff(self):
return diff_text(self.val_a, self.val_b).to_html()
class RichTextFieldComparison(TextFieldComparison):
def htmldiff(self):
return diff_text(BeautifulSoup(force_text(self.val_a)).getText('\n'), BeautifulSoup(force_text(self.val_b)).getText('\n')).to_html()
class StreamFieldComparison(RichTextFieldComparison):
pass
class ChildRelationComparison:
is_field = False
is_child_relation = True
def __init__(self, field, val_a, val_b, field_comparisons):
self.field = field
self.val_a = val_a
self.val_b = val_b
self.field_comparisons = field_comparisons
def field_label(self):
"""
Returns a label for this field to be displayed to the user
"""
verbose_name = getattr(self.field, 'verbose_name', None)
if verbose_name is None:
# Relations don't have a verbose_name
verbose_name = self.field.name.replace('_', ' ')
return capfirst(verbose_name)
def get_mapping(self, objs_a, objs_b):
"""
This bit of code attempts to match the objects in the A revision with
their counterpart in the B revision.
A match is firstly attempted by ID (where a matching ID indicates they're the same).
We compare remaining the objects by their field data; the objects with the fewest
fields changed are matched until there are no more possible matches left.
This returns 4 values:
- map_forwards => a mapping of object indexes from the B version to the A version
- map_backwards => a mapping of object indexes from the A version to the B version
- added => a list of indices for objects that didn't exist in the B version
- deleted => a list of indices for objects that didn't exist in the A version
Note the indices are 0-based array indices indicating the location of the object in either
the objs_a or objs_b arrays.
For example:
objs_a => A, B, C, D
objs_b => B, C, D, E
Will return the following:
map_forwards = {
1: 0, # B (objs_a: objs_b)
2: 1, # C (objs_a: objs_b)
3: 2, # D (objs_a: objs_b)
}
map_backwards = {
0: 1, # B (objs_b: objs_a)
1: 2, # C (objs_b: objs_a)
2: 3, # D (objs_b: objs_a)
}
added = [4] # D in objs_b
deleted = [0] # A in objs_a
"""
map_forwards = {}
map_backwards = {}
added = []
deleted = []
# Match child objects on ID
for a_idx, a_child in enumerate(objs_a):
for b_idx, b_child in enumerate(objs_b):
if b_idx in map_backwards:
continue
if a_child.id is not None and b_child.id is not None and a_child.id == b_child.id:
map_forwards[a_idx] = b_idx
map_backwards[b_idx] = a_idx
# Now try to match them by data
matches = []
for a_idx, a_child in enumerate(objs_a):
if a_idx not in map_forwards:
for b_idx, b_child in enumerate(objs_b):
if b_idx not in map_backwards:
# If they both have an ID that is different, they can't be the same child object
if a_child.id and b_child.id and a_child.id != b_child.id:
continue
comparison = self.get_child_comparison(objs_a[a_idx], objs_b[b_idx])
num_differences = comparison.get_num_differences()
matches.append((a_idx, b_idx, num_differences))
# Objects with the least differences will be matched first. So only the best possible matches are made
matches.sort(key=lambda match: match[2])
for a_idx, b_idx, num_differences in matches:
# Make sure both objects were not matched previously
if a_idx in map_forwards or b_idx in map_backwards:
continue
# Match!
map_forwards[a_idx] = b_idx
map_backwards[b_idx] = a_idx
# Mark unmapped objects as added/deleted
for a_idx, a_child in enumerate(objs_a):
if a_idx not in map_forwards:
deleted.append(a_idx)
for b_idx, b_child in enumerate(objs_b):
if b_idx not in map_backwards:
added.append(b_idx)
return map_forwards, map_backwards, added, deleted
def get_child_comparison(self, obj_a, obj_b):
return ChildObjectComparison(self.field.related_model, obj_a, obj_b, self.field_comparisons)
def get_child_comparisons(self):
"""
Returns a list of ChildObjectComparison objects. Representing all child
objects that existed in either version.
They are returned in the order they appear in the B version with deletions
appended at the end.
All child objects are returned, regardless of whether they were actually changed.
"""
objs_a = list(self.val_a.all())
objs_b = list(self.val_b.all())
map_forwards, map_backwards, added, deleted = self.get_mapping(objs_a, objs_b)
objs_a = dict(enumerate(objs_a))
objs_b = dict(enumerate(objs_b))
comparisons = []
for b_idx, b_child in objs_b.items():
if b_idx in added:
comparisons.append(self.get_child_comparison(None, b_child))
else:
comparisons.append(self.get_child_comparison(objs_a[map_backwards[b_idx]], b_child))
for a_idx, a_child in objs_a.items():
if a_idx in deleted:
comparisons.append(self.get_child_comparison(a_child, None))
return comparisons
def has_changed(self):
"""
Returns true if any changes were made to any of the child objects. This includes
adding, deleting and reordering.
"""
objs_a = list(self.val_a.all())
objs_b = list(self.val_b.all())
map_forwards, map_backwards, added, deleted = self.get_mapping(objs_a, objs_b)
if added or deleted:
return True
for a_idx, b_idx in map_forwards.items():
comparison = self.get_child_comparison(objs_a[a_idx], objs_b[b_idx])
if comparison.has_changed():
return True
return False
class ChildObjectComparison:
def __init__(self, model, obj_a, obj_b, field_comparisons):
self.model = model
self.obj_a = obj_a
self.obj_b = obj_b
self.field_comparisons = field_comparisons
def is_addition(self):
"""
Returns True if this child object was created since obj_a
"""
return self.obj_b and not self.obj_a
def is_deletion(self):
"""
Returns True if this child object was deleted in obj_b
"""
return self.obj_a and not self.obj_b
def get_position_change(self):
"""
Returns the change in position as an integer. Positive if the object
was moved down, negative if it moved up.
For example: '3' indicates the object moved down three spaces. '-1'
indicates the object moved up one space.
"""
if not self.is_addition() and not self.is_deletion():
return getattr(self.obj_b, 'sort_order', 0) - getattr(self.obj_a, 'sort_order', 0)
def get_field_comparisons(self):
"""
Returns a list of comparisons for all the fields in this object.
Fields that haven't changed are included as well.
"""
comparisons = []
if self.is_addition() or self.is_deletion():
# Display the fields without diff as one of the versions are missing
obj = self.obj_a or self.obj_b
for field_comparison in self.field_comparisons:
comparisons.extend(field_comparison(obj, obj))
else:
for field_comparison in self.field_comparisons:
comparisons.extend(field_comparison(self.obj_a, self.obj_b))
return comparisons
def has_changed(self):
for comparison in self.get_field_comparisons():
if comparison.has_changed():
return True
return False
def get_num_differences(self):
"""
Returns the number of fields that differ between the two
objects.
"""
num_differences = 0
for comparison in self.get_field_comparisons():
if comparison.has_changed():
num_differences += 1
return num_differences
class TextDiff:
def __init__(self, changes):
self.changes = changes
def to_html(self, tag='span', addition_class='addition', deletion_class='deletion'):
html = ""
for change_type, value in self.changes:
if change_type == 'equal':
html += escape(value)
elif change_type == 'addition':
html += '<{tag} class="{classname}">{value}</{tag}>'.format(
tag=tag,
classname=addition_class,
value=escape(value)
)
elif change_type == 'deletion':
html += '<{tag} class="{classname}">{value}</{tag}>'.format(
tag=tag,
classname=deletion_class,
value=escape(value)
)
return mark_safe(html)
def diff_text(a, b):
"""
Performs a diffing algorithm on two pieces of text. Returns
a string of HTML containing the content of both texts with
<span> tags inserted indicating where the differences are.
"""
def tokenise(text):
"""
Tokenises a string by spliting it into individual characters
and grouping the alphanumeric ones together.
This means that punctuation, whitespace, CJK characters, etc
become separate tokens and words/numbers are merged together
to form bigger tokens.
This makes the output of the diff easier to read as words are
not broken up.
"""
tokens = []
current_token = ""
for c in text:
if c.isalnum():
current_token += c
else:
if current_token:
tokens.append(current_token)
current_token = ""
tokens.append(c)
if current_token:
tokens.append(current_token)
return tokens
a_tok = tokenise(a)
b_tok = tokenise(b)
sm = difflib.SequenceMatcher(lambda t: len(t) <= 4, a_tok, b_tok)
changes = []
for op, i1, i2, j1, j2 in sm.get_opcodes():
if op == 'replace':
for token in a_tok[i1:i2]:
changes.append(('deletion', token))
for token in b_tok[j1:j2]:
changes.append(('addition', token))
elif op == 'delete':
for token in a_tok[i1:i2]:
changes.append(('deletion', token))
elif op == 'insert':
for token in b_tok[j1:j2]:
changes.append(('addition', token))
elif op == 'equal':
for token in a_tok[i1:i2]:
changes.append(('equal', token))
return TextDiff(changes)

Wyświetl plik

@ -5,6 +5,7 @@ import re
import django
from django import forms
from django.db.models.fields import FieldDoesNotExist
from django.core.exceptions import ImproperlyConfigured
from django.forms.models import fields_for_model
from django.template.loader import render_to_string
@ -13,7 +14,7 @@ from django.utils.six import text_type
from django.utils.translation import ugettext_lazy
from wagtail.utils.decorators import cached_classmethod
from wagtail.wagtailadmin import widgets
from wagtail.wagtailadmin import compare, widgets
from wagtail.wagtailcore.models import Page
from wagtail.wagtailcore.utils import camelcase_to_underscore, resolve_model_string
@ -199,6 +200,10 @@ class EditHandler(object):
"""
return mark_safe(self.render_as_object() + self.render_missing_fields())
@classmethod
def get_comparison(cls, obj_a, obj_b):
return []
class BaseCompositeEditHandler(EditHandler):
"""
@ -265,6 +270,15 @@ class BaseCompositeEditHandler(EditHandler):
'self': self
}))
@classmethod
def get_comparison(cls, obj_a, obj_b):
comparators = []
for child in cls.children:
comparators.extend(child.get_comparison(obj_a, obj_b))
return comparators
class BaseFormEditHandler(BaseCompositeEditHandler):
"""
@ -324,7 +338,6 @@ class BaseObjectList(BaseFormEditHandler):
class ObjectList(object):
def __init__(self, children, heading="", classname="",
base_form_class=None):
self.children = children
@ -451,6 +464,35 @@ class BaseFieldPanel(EditHandler):
def required_fields(cls):
return [cls.field_name]
@classmethod
def get_comparison_class(cls):
# Hide fields with hidden widget
widget_override = cls.widget_overrides().get(cls.field_name, None)
if widget_override and widget_override.is_hidden:
return
try:
field = cls.model._meta.get_field(cls.field_name)
if field.get_internal_type() in ['CharField', 'TextField']:
return compare.RichTextFieldComparison
except FieldDoesNotExist:
pass
return compare.FieldComparison
@classmethod
def get_comparison(cls, obj_a, obj_b):
comparator_class = cls.get_comparison_class()
if comparator_class:
field = cls.model._meta.get_field(cls.field_name)
val_a = field.value_from_object(obj_a)
val_b = field.value_from_object(obj_b)
return [comparator_class(field, val_a, val_b)]
else:
return []
class FieldPanel(object):
def __init__(self, field_name, classname="", widget=None):
@ -472,7 +514,9 @@ class FieldPanel(object):
class BaseRichTextFieldPanel(BaseFieldPanel):
pass
@classmethod
def get_comparison_class(cls):
return compare.RichTextFieldComparison
class RichTextFieldPanel(object):
@ -621,6 +665,19 @@ class BaseInlinePanel(EditHandler):
def html_declarations(cls):
return cls.get_child_edit_handler_class().html_declarations()
@classmethod
def get_comparison(cls, obj_a, obj_b):
field = cls.model._meta.get_field(cls.relation_name)
val_a = getattr(obj_a, field.name)
val_b = getattr(obj_b, field.name)
field_comparisons = [
(lambda panel: lambda obj_a, obj_b: panel.bind_to_model(cls.related.related_model).get_comparison(obj_a, obj_b))(p)
for p in cls.get_panel_definitions()
]
return [compare.ChildRelationComparison(field, val_a, val_b, field_comparisons)]
def __init__(self, instance=None, form=None):
super(BaseInlinePanel, self).__init__(instance=instance, form=form)
@ -777,6 +834,10 @@ class BaseStreamFieldPanel(BaseFieldPanel):
def html_declarations(cls):
return cls.block_def.all_html_declarations()
@classmethod
def get_comparison_class(cls):
return compare.StreamFieldComparison
def id_for_label(self):
# a StreamField may consist of many input fields, so it's not meaningful to
# attach the label to any specific one

Wyświetl plik

@ -0,0 +1,97 @@
{% extends "wagtailadmin/base.html" %}
{% load i18n %}
{% block titletag %}{% blocktrans with title=page.get_admin_display_title %}Comparing {{ title }}{% endblocktrans %}{% endblock %}
{% block extra_css %}
{{ block.super }}
<style>
.comparison {
width: 100%;
}
.comparison td {
vertical-align: top;
}
.comparison .child-object {
margin-top: 10px;
padding: 5px;
border: 1px solid rgb(238, 238, 238);
}
.comparison .child-object.addition {
background-color: rgb(234, 255, 234);
border-color: rgb(166, 243, 166);
}
.comparison .child-object.deletion {
background-color: rgb(255, 236, 236);
border-color: rgb(248, 203, 203);
}
.comparison span.addition {
background-color: rgb(166, 243, 166);
text-decoration: underline;
}
.comparison span.deletion {
background-color: rgb(248, 203, 203);
text-decoration: line-through;
}
</style>
{% endblock %}
{% block content %}
{% trans "Comparing" as comparing_str %}
{% include "wagtailadmin/shared/header.html" with title=comparing_str subtitle=page.get_admin_display_title icon="doc-empty-inverse" %}
<div class="nice-padding">
<p><a href="{% url 'wagtailadmin_pages:edit' page.id %}">{% trans "Edit this page" %}</a></p>
<p><a href="{% url 'wagtailadmin_pages:revisions_index' page.id %}">{% trans "View revisions" %}</a></p>
<div class="comparison">
{% for comp in comparison %}
<h3>{{ comp.field_label }}</h3>
{% if comp.is_field %}
{{ comp.htmldiff }}
{% elif comp.is_child_relation %}
{% for child_comp in comp.get_child_comparisons %}
<div class="child-object {% if child_comp.is_addition %}addition{% elif child_comp.is_deletion %}deletion{% endif %}">
{% with child_comp.get_position_change as move %}
{% if move %}
{% if move > 0 %}
{% blocktrans count counter=move %}
Moved down one place.
{% plural %}
Moved down {{ counter }} places.
{% endblocktrans %}
{% elif move < 0 %}
{# TODO: How can we make this positive? #}
{% blocktrans count counter=move %}
Moved up one place.
{% plural %}
Moved up {{ counter }} places.
{% endblocktrans %}
{% endif %}
{% endif %}
{% endwith %}
<dl>
{% for field_comp in child_comp.get_field_comparisons %}
<dt>{{ field_comp.field_label }}</dt>
<dd>{{ field_comp.htmldiff }}</dd>
{% endfor %}
</dl>
</div>
{% endfor %}
{% endif %}
{% endfor %}
{% if not comparison %}
<h3>{% trans "These two revisions are the exact same" %}</h3>
{% endif %}
</div>
</div>
{% endblock %}

Wyświetl plik

@ -23,6 +23,11 @@
{% else %}
<li><a href="{% url 'wagtailadmin_pages:revisions_revert' page.id revision.id %}" class="button button-small button-secondary">{% trans 'Review this revision' %}</a></li>
{% endif %}
{% with revision.get_previous as previous_revision %}
{% if previous_revision %}
<li><a href="{% url 'wagtailadmin_pages:revisions_compare' page.id previous_revision.id revision.id %}" class="button button-small button-secondary">{% trans 'Compare with previous revision' %}</a></li>
{% endif %}
{% endwith %}
</ul>
</td>
</tr>

Wyświetl plik

@ -41,4 +41,5 @@ urlpatterns = [
url(r'^(\d+)/revisions/$', pages.revisions_index, name='revisions_index'),
url(r'^(\d+)/revisions/(\d+)/view/$', pages.revisions_view, name='revisions_view'),
url(r'^(\d+)/revisions/(\d+)/revert/$', pages.revisions_revert, name='revisions_revert'),
url(r'^(\d+)/revisions/compare/(live|earliest|\d+)...(live|latest|\d+)/$', pages.revisions_compare, name='revisions_compare'),
]

Wyświetl plik

@ -481,7 +481,15 @@ def edit(request, page_id):
# Check for revisions still undergoing moderation and warn
if latest_revision and latest_revision.submitted_for_moderation:
messages.warning(request, _("This page is currently awaiting moderation"))
buttons = []
if page.live:
buttons.append(messages.button(
reverse('wagtailadmin_pages:revisions_compare', args=(page.id, 'live', latest_revision.id)),
_('Compare with live version')
))
messages.warning(request, _("This page is currently awaiting moderation"), buttons=buttons)
return render(request, 'wagtailadmin/pages/edit.html', {
'page': page,
@ -1074,3 +1082,55 @@ def revisions_view(request, page_id, revision_id):
revision_page = revision.as_page_object()
return revision_page.serve_preview(page.dummy_request(request), page.default_preview_mode)
def revisions_compare(request, page_id, revision_id_a, revision_id_b):
page = get_object_or_404(Page, id=page_id).specific
# Get revision to compare from
if revision_id_a == 'live':
if not page.live:
raise Http404
revision_a = page
revision_a_heading = _("Live")
elif revision_id_a == 'earliest':
revision_a = page.revisions.order_by('created_at', 'id').first()
if revision_a:
revision_a = revision_a.as_page_object()
revision_a_heading = _("Earliest")
else:
raise Http404
else:
revision_a = get_object_or_404(page.revisions, id=revision_id_a).as_page_object()
revision_a_heading = str(get_object_or_404(page.revisions, id=revision_id_a).created_at)
# Get revision to compare to
if revision_id_b == 'live':
if not page.live:
raise Http404
revision_b = page
revision_b_heading = _("Live")
elif revision_id_b == 'latest':
revision_b = page.revisions.order_by('created_at', 'id').last()
if revision_b:
revision_b = revision_b.as_page_object()
revision_b_heading = _("Latest")
else:
raise Http404
else:
revision_b = get_object_or_404(page.revisions, id=revision_id_b).as_page_object()
revision_b_heading = str(get_object_or_404(page.revisions, id=revision_id_b).created_at)
comparison = page.get_edit_handler().get_comparison(revision_a, revision_b)
comparison = [comp for comp in comparison if comp.has_changed()]
return render(request, 'wagtailadmin/pages/revisions/compare.html', {
'page': page,
'revision_a_heading': revision_a_heading,
'revision_a': revision_a,
'revision_b_heading': revision_b_heading,
'revision_b': revision_b,
'comparison': comparison,
})

Wyświetl plik

@ -355,6 +355,12 @@ class StreamValue(collections.Sequence):
for i, value in zip(raw_values.keys(), converted_values):
self._bound_blocks[i] = StreamValue.StreamChild(child_block, value)
def __eq__(self, other):
if not isinstance(other, StreamValue):
return False
return self.stream_data == other.stream_data
def __len__(self):
return len(self.stream_data)

Wyświetl plik

@ -1527,6 +1527,12 @@ class PageRevision(models.Model):
page.go_live_at.isoformat()
)
def get_previous(self):
return self.get_previous_by_created_at(page=self.page)
def get_next(self):
return self.get_next_by_created_at(page=self.page)
def __str__(self):
return '"' + six.text_type(self.page) + '" at ' + six.text_type(self.created_at)