Apply proper HTML escaping on StreamField block comparisons

An individual StreamField block in the comparison view may be rendered as a plain value (for blocks that are unchanged, added or deleted) or a diff (for blocks that are changed). In both cases, the output is returned as HTML, but must not contain any unescaped editor-supplied HTML.

For CharBlock, TextBlock and RawHTMLBlock, the block value is escaped so that any HTML tags in the content are shown verbatim.

For RichTextBlock and any other block types that do not override the default comparison behaviour, we take the basic (non-templated) HTML rendering of the block and extract text-only content from it. This is then returned in HTML-escaped form for the plain view, and run through diff_text().to_html() for the diff view (which handles escaping itself).
pull/5952/head
Matt Westcott 2020-04-03 14:10:47 +01:00 zatwierdzone przez Matt Westcott
rodzic 72e8d861b6
commit b5aef74801
4 zmienionych plików z 176 dodań i 20 usunięć

Wyświetl plik

@ -10,6 +10,11 @@ from django.utils.translation import gettext_lazy as _
from wagtail.core import blocks
def text_from_html(val):
# Return the unescaped text content of an HTML string
return BeautifulSoup(force_str(val), 'html5lib').getText()
class FieldComparison:
is_field = True
is_child_relation = False
@ -52,15 +57,18 @@ class TextFieldComparison(FieldComparison):
class RichTextFieldComparison(TextFieldComparison):
def htmldiff(self):
return diff_text(
BeautifulSoup(force_str(self.val_a), 'html5lib').getText(),
BeautifulSoup(force_str(self.val_b), 'html5lib').getText()
text_from_html(self.val_a),
text_from_html(self.val_b)
).to_html()
def get_comparison_class_for_block(block):
if hasattr(block, 'get_comparison_class'):
return block.get_comparison_class()
elif isinstance(block, blocks.CharBlock):
elif isinstance(block, (blocks.CharBlock, blocks.TextBlock)):
return CharBlockComparison
elif isinstance(block, blocks.RawHTMLBlock):
# Compare raw HTML blocks as if they were plain text, so that tags are shown explicitly
return CharBlockComparison
elif isinstance(block, blocks.RichTextBlock):
return RichTextBlockComparison
@ -89,7 +97,19 @@ class BlockComparison:
return self.val_a != self.val_b
def htmlvalue(self, val):
return self.block.render_basic(val)
"""
Return an HTML representation of this block that is safe to be included
in comparison views
"""
return escape(text_from_html(self.block.render_basic(val)))
def htmldiff(self):
html_val_a = self.block.render_basic(self.val_a)
html_val_b = self.block.render_basic(self.val_b)
return diff_text(
text_from_html(html_val_a),
text_from_html(html_val_b)
).to_html()
class CharBlockComparison(BlockComparison):
@ -99,13 +119,12 @@ class CharBlockComparison(BlockComparison):
force_str(self.val_b)
).to_html()
def htmlvalue(self, val):
return escape(val)
class RichTextBlockComparison(BlockComparison):
def htmldiff(self):
return diff_text(
BeautifulSoup(force_str(self.val_a), 'html5lib').getText(),
BeautifulSoup(force_str(self.val_b), 'html5lib').getText()
).to_html()
pass
class StructBlockComparison(BlockComparison):
@ -219,8 +238,8 @@ class StreamFieldComparison(FieldComparison):
else:
# Fall back to diffing the HTML representation
return diff_text(
BeautifulSoup(force_str(self.val_a), 'html5lib').getText(),
BeautifulSoup(force_str(self.val_b), 'html5lib').getText()
text_from_html(self.val_a),
text_from_html(self.val_b)
).to_html()

Wyświetl plik

@ -247,36 +247,151 @@ class TestStreamFieldComparison(TestCase):
self.assertIsInstance(comparison.htmldiff(), SafeString)
self.assertTrue(comparison.has_changed())
def test_htmldiff_escapes_value(self):
def test_htmldiff_escapes_value_on_change(self):
field = StreamPage._meta.get_field('body')
comparison = self.comparison_class(
field,
StreamPage(body=StreamValue(field.stream_block, [
('text', "Original content", '1'),
('text', "I <b>really</b> like original<i>ish</i> content", '1'),
])),
StreamPage(body=StreamValue(field.stream_block, [
('text', '<script type="text/javascript">doSomethingBad();</script>', '1'),
('text', 'I <b>really</b> like evil code <script type="text/javascript">doSomethingBad();</script>', '1'),
])),
)
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object"><span class="deletion">Original content</span><span class="addition">&lt;script type=&quot;text/javascript&quot;&gt;doSomethingBad();&lt;/script&gt;</span></div>')
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object">I &lt;b&gt;really&lt;/b&gt; like <span class="deletion">original&lt;i&gt;ish&lt;/i&gt; content</span><span class="addition">evil code &lt;script type=&quot;text/javascript&quot;&gt;doSomethingBad();&lt;/script&gt;</span></div>')
self.assertIsInstance(comparison.htmldiff(), SafeString)
def test_htmldiff_escapes_value_richtext(self):
def test_htmldiff_escapes_value_on_addition(self):
field = StreamPage._meta.get_field('body')
comparison = self.comparison_class(
field,
StreamPage(body=StreamValue(field.stream_block, [
('rich_text', "Original content", '1'),
('text', "Original <em>and unchanged</em> content", '1'),
])),
StreamPage(body=StreamValue(field.stream_block, [
('rich_text', '<script type="text/javascript">doSomethingBad();</script>', '1'),
('text', "Original <em>and unchanged</em> content", '1'),
('text', '<script type="text/javascript">doSomethingBad();</script>', '2'),
])),
)
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object"><span class="deletion">Original content</span><span class="addition">doSomethingBad();</span></div>')
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object">Original &lt;em&gt;and unchanged&lt;/em&gt; content</div>\n<div class="comparison__child-object addition">&lt;script type=&quot;text/javascript&quot;&gt;doSomethingBad();&lt;/script&gt;</div>')
self.assertIsInstance(comparison.htmldiff(), SafeString)
def test_htmldiff_escapes_value_on_deletion(self):
field = StreamPage._meta.get_field('body')
comparison = self.comparison_class(
field,
StreamPage(body=StreamValue(field.stream_block, [
('text', "Original <em>and unchanged</em> content", '1'),
('text', '<script type="text/javascript">doSomethingBad();</script>', '2'),
])),
StreamPage(body=StreamValue(field.stream_block, [
('text', "Original <em>and unchanged</em> content", '1'),
])),
)
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object">Original &lt;em&gt;and unchanged&lt;/em&gt; content</div>\n<div class="comparison__child-object deletion">&lt;script type=&quot;text/javascript&quot;&gt;doSomethingBad();&lt;/script&gt;</div>')
self.assertIsInstance(comparison.htmldiff(), SafeString)
def test_htmldiff_richtext_strips_tags_on_change(self):
field = StreamPage._meta.get_field('body')
comparison = self.comparison_class(
field,
StreamPage(body=StreamValue(field.stream_block, [
('rich_text', "I <b>really</b> like Wagtail &lt;3", '1'),
])),
StreamPage(body=StreamValue(field.stream_block, [
('rich_text', 'I <b>really</b> like evil code &gt;_&lt; <script type="text/javascript">doSomethingBad();</script>', '1'),
])),
)
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object">I really like <span class="deletion">Wagtail &lt;3</span><span class="addition">evil code &gt;_&lt; doSomethingBad();</span></div>')
self.assertIsInstance(comparison.htmldiff(), SafeString)
def test_htmldiff_richtext_strips_tags_on_addition(self):
field = StreamPage._meta.get_field('body')
comparison = self.comparison_class(
field,
StreamPage(body=StreamValue(field.stream_block, [
('rich_text', "Original <em>and unchanged</em> content", '1'),
])),
StreamPage(body=StreamValue(field.stream_block, [
('rich_text', "Original <em>and unchanged</em> content", '1'),
('rich_text', 'I <b>really</b> like evil code &gt;_&lt; <script type="text/javascript">doSomethingBad();</script>', '2'),
])),
)
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object">Original and unchanged content</div>\n<div class="comparison__child-object addition">I really like evil code &gt;_&lt; doSomethingBad();</div>')
self.assertIsInstance(comparison.htmldiff(), SafeString)
def test_htmldiff_richtext_strips_tags_on_deletion(self):
field = StreamPage._meta.get_field('body')
comparison = self.comparison_class(
field,
StreamPage(body=StreamValue(field.stream_block, [
('rich_text', "Original <em>and unchanged</em> content", '1'),
('rich_text', 'I <b>really</b> like evil code &gt;_&lt; <script type="text/javascript">doSomethingBad();</script>', '2'),
])),
StreamPage(body=StreamValue(field.stream_block, [
('rich_text', "Original <em>and unchanged</em> content", '1'),
])),
)
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object">Original and unchanged content</div>\n<div class="comparison__child-object deletion">I really like evil code &gt;_&lt; doSomethingBad();</div>')
self.assertIsInstance(comparison.htmldiff(), SafeString)
def test_htmldiff_raw_html_escapes_value_on_change(self):
field = StreamPage._meta.get_field('body')
comparison = self.comparison_class(
field,
StreamPage(body=StreamValue(field.stream_block, [
('raw_html', "Original<i>ish</i> content", '1'),
])),
StreamPage(body=StreamValue(field.stream_block, [
('raw_html', '<script type="text/javascript">doSomethingBad();</script>', '1'),
])),
)
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object"><span class="deletion">Original&lt;i&gt;ish&lt;/i&gt; content</span><span class="addition">&lt;script type=&quot;text/javascript&quot;&gt;doSomethingBad();&lt;/script&gt;</span></div>')
self.assertIsInstance(comparison.htmldiff(), SafeString)
def test_htmldiff_raw_html_escapes_value_on_addition(self):
field = StreamPage._meta.get_field('body')
comparison = self.comparison_class(
field,
StreamPage(body=StreamValue(field.stream_block, [
('raw_html', "Original <em>and unchanged</em> content", '1'),
])),
StreamPage(body=StreamValue(field.stream_block, [
('raw_html', "Original <em>and unchanged</em> content", '1'),
('raw_html', '<script type="text/javascript">doSomethingBad();</script>', '2'),
])),
)
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object">Original &lt;em&gt;and unchanged&lt;/em&gt; content</div>\n<div class="comparison__child-object addition">&lt;script type=&quot;text/javascript&quot;&gt;doSomethingBad();&lt;/script&gt;</div>')
self.assertIsInstance(comparison.htmldiff(), SafeString)
def test_htmldiff_raw_html_escapes_value_on_deletion(self):
field = StreamPage._meta.get_field('body')
comparison = self.comparison_class(
field,
StreamPage(body=StreamValue(field.stream_block, [
('raw_html', "Original <em>and unchanged</em> content", '1'),
('raw_html', '<script type="text/javascript">doSomethingBad();</script>', '2'),
])),
StreamPage(body=StreamValue(field.stream_block, [
('raw_html', "Original <em>and unchanged</em> content", '1'),
])),
)
self.assertEqual(comparison.htmldiff(), '<div class="comparison__child-object">Original &lt;em&gt;and unchanged&lt;/em&gt; content</div>\n<div class="comparison__child-object deletion">&lt;script type=&quot;text/javascript&quot;&gt;doSomethingBad();&lt;/script&gt;</div>')
self.assertIsInstance(comparison.htmldiff(), SafeString)
def test_compare_structblock(self):

Wyświetl plik

@ -0,0 +1,21 @@
# Generated by Django 3.0.4 on 2020-04-06 09:46
from django.db import migrations
import wagtail.core.blocks
import wagtail.core.fields
import wagtail.tests.testapp.models
class Migration(migrations.Migration):
dependencies = [
('tests', '0047_restaurant_tags'),
]
operations = [
migrations.AlterField(
model_name='streampage',
name='body',
field=wagtail.core.fields.StreamField([('text', wagtail.core.blocks.CharBlock()), ('rich_text', wagtail.core.blocks.RichTextBlock()), ('image', wagtail.tests.testapp.models.ExtendedImageChooserBlock()), ('product', wagtail.core.blocks.StructBlock([('name', wagtail.core.blocks.CharBlock()), ('price', wagtail.core.blocks.CharBlock())])), ('raw_html', wagtail.core.blocks.RawHTMLBlock())]),
),
]

Wyświetl plik

@ -29,7 +29,7 @@ from wagtail.contrib.forms.models import (
from wagtail.contrib.settings.models import BaseSetting, register_setting
from wagtail.contrib.sitemaps import Sitemap
from wagtail.contrib.table_block.blocks import TableBlock
from wagtail.core.blocks import CharBlock, RichTextBlock, StructBlock
from wagtail.core.blocks import CharBlock, RawHTMLBlock, RichTextBlock, StructBlock
from wagtail.core.fields import RichTextField, StreamField
from wagtail.core.models import Orderable, Page, PageManager, PageQuerySet
from wagtail.documents.edit_handlers import DocumentChooserPanel
@ -972,6 +972,7 @@ class StreamPage(Page):
('name', CharBlock()),
('price', CharBlock()),
])),
('raw_html', RawHTMLBlock()),
])
api_fields = ('body',)