From 5fcaa64d81ecd2f3d3da796de9b1ef8b28f3faa2 Mon Sep 17 00:00:00 2001
From: frmdstryr
Date: Wed, 23 Jan 2019 10:20:13 -0500
Subject: [PATCH] Avoid a pop from an empty list in HTMLToContentStateHandler
(#5004)
---
CHANGELOG.txt | 1 +
CONTRIBUTORS.rst | 1 +
docs/releases/2.5.rst | 1 +
.../converters/html_to_contentstate.py | 2 ++
wagtail/admin/tests/test_contentstate.py | 34 +++++++++++++++++++
5 files changed, 39 insertions(+)
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
index 696cfd0a55..74d88776f5 100644
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@@ -9,6 +9,7 @@ Changelog
* Fix: Set `SERVER_PORT` to 443 in `Page.dummy_request()` for HTTPS sites (Sergey Fedoseev)
* Fix: Include port number in `Host` header of `Page.dummy_request()` (Sergey Fedoseev)
* Fix: Validation error messages in `InlinePanel` no longer count towards `max_num` when disabling the 'add' button (Todd Dembrey, Thibaud Colas)
+ * Fix: Rich text to contentstate conversion now ignores stray closing tags (frmdstryr)
2.4 (19.12.2018)
diff --git a/CONTRIBUTORS.rst b/CONTRIBUTORS.rst
index b2c6228a0f..12ab0fba53 100644
--- a/CONTRIBUTORS.rst
+++ b/CONTRIBUTORS.rst
@@ -338,6 +338,7 @@ Contributors
* Maylon Pedroso
* Thijs Walcarius
* mukesh5
+* frmdstryr
Translators
===========
diff --git a/docs/releases/2.5.rst b/docs/releases/2.5.rst
index b858a371a8..507c068d4a 100644
--- a/docs/releases/2.5.rst
+++ b/docs/releases/2.5.rst
@@ -24,6 +24,7 @@ Bug fixes
* Set ``SERVER_PORT`` to 443 in ``Page.dummy_request()`` for HTTPS sites (Sergey Fedoseev)
* Include port number in ``Host`` header of ``Page.dummy_request()`` (Sergey Fedoseev)
* Validation error messages in ``InlinePanel`` no longer count towards ``max_num`` when disabling the 'add' button (Todd Dembrey, Thibaud Colas)
+ * Rich text to contentstate conversion now ignores stray closing tags (frmdstryr)
Upgrade considerations
diff --git a/wagtail/admin/rich_text/converters/html_to_contentstate.py b/wagtail/admin/rich_text/converters/html_to_contentstate.py
index 67fe38afe3..9724c75c41 100644
--- a/wagtail/admin/rich_text/converters/html_to_contentstate.py
+++ b/wagtail/admin/rich_text/converters/html_to_contentstate.py
@@ -306,6 +306,8 @@ class HtmlToContentStateHandler(HTMLParser):
element_handler.handle_starttag(name, attrs, self.state, self.contentstate)
def handle_endtag(self, name):
+ if not self.open_elements:
+ return # avoid a pop from an empty list if we have an extra end tag
expected_name, element_handler = self.open_elements.pop()
assert name == expected_name, "Unmatched tags: expected %s, got %s" % (expected_name, name)
if element_handler:
diff --git a/wagtail/admin/tests/test_contentstate.py b/wagtail/admin/tests/test_contentstate.py
index 3cd6bdef7e..a20e0255cf 100644
--- a/wagtail/admin/tests/test_contentstate.py
+++ b/wagtail/admin/tests/test_contentstate.py
@@ -753,3 +753,37 @@ class TestHtmlToContentState(TestCase):
{'inlineStyleRanges': [], 'text': 'Arthur "two sheds" Jackson & his wife', 'depth': 0, 'type': 'unstyled', 'key': '00000', 'entityRanges': []},
]
})
+
+ def test_extra_end_tag_before(self):
+ converter = ContentstateConverter(features=[])
+ result = json.loads(converter.from_database_format(
+ '''
+
+ Before
+ '''
+ ))
+ # The leading tag should be ignored instead of blowing up with a
+ # pop from empty list error
+ self.assertContentStateEqual(result, {
+ 'entityMap': {},
+ 'blocks': [
+ {'inlineStyleRanges': [], 'text': 'Before', 'depth': 0, 'type': 'unstyled', 'key': '00000', 'entityRanges': []},
+ ]
+ })
+
+ def test_extra_end_tag_after(self):
+ converter = ContentstateConverter(features=[])
+ result = json.loads(converter.from_database_format(
+ '''
+ After
+
+ '''
+ ))
+ # The tailing tag should be ignored instead of blowing up with a
+ # pop from empty list error
+ self.assertContentStateEqual(result, {
+ 'entityMap': {},
+ 'blocks': [
+ {'inlineStyleRanges': [], 'text': 'After', 'depth': 0, 'type': 'unstyled', 'key': '00000', 'entityRanges': []},
+ ]
+ })