Move DbWhitelister into wagtail.admin.rich_text.converters.editor_html

2017-11-29 13:42:35 +00:00 · 2017-11-29 13:42:35 +00:00 · e65c5a6106
commit e65c5a6106
--- a/wagtail/admin/rich_text/init.py
+++ b/wagtail/admin/rich_text/init.py
@ -7,7 +7,8 @@ from django.utils.module_loading import import_string

 from wagtail.utils.widgets import WidgetWithScript
 from wagtail.admin.edit_handlers import RichTextFieldPanel
-from wagtail.core.rich_text import DbWhitelister, expand_db_html, features
+from wagtail.admin.rich_text.converters.editor_html import DbWhitelister
+from wagtail.core.rich_text import expand_db_html, features


 class HalloPlugin:
--- a/wagtail/admin/rich_text/converters/init.py
+++ b/wagtail/admin/rich_text/converters/init.py
--- a/wagtail/admin/rich_text/converters/editor_html.py
+++ b/wagtail/admin/rich_text/converters/editor_html.py
@ -0,0 +1,110 @@
+from django.utils.functional import cached_property
+
+from wagtail.core import hooks
+from wagtail.core.rich_text import features
+from wagtail.core.whitelist import allow_without_attributes, Whitelister, DEFAULT_ELEMENT_RULES
+
+
+class DbWhitelister(Whitelister):
+    """
+    A custom whitelisting engine to convert the HTML as returned by the rich text editor
+    into the pseudo-HTML format stored in the database (in which images, documents and other
+    linked objects are identified by ID rather than URL):
+
+    * implements a 'construct_whitelister_element_rules' hook so that other apps can modify
+      the whitelist ruleset (e.g. to permit additional HTML elements beyond those in the base
+      Whitelister module);
+    * replaces any element with a 'data-embedtype' attribute with an <embed> element, with
+      attributes supplied by the handler for that type as defined in embed_handlers;
+    * rewrites the attributes of any <a> element with a 'data-linktype' attribute, as
+      determined by the handler for that type defined in link_handlers, while keeping the
+      element content intact.
+    """
+    def __init__(self, features=None):
+        self.features = features
+
+    @cached_property
+    def element_rules(self):
+        if self.features is None:
+            # use the legacy construct_whitelister_element_rules hook to build up whitelist rules
+            element_rules = DEFAULT_ELEMENT_RULES.copy()
+            for fn in hooks.get_hooks('construct_whitelister_element_rules'):
+                element_rules.update(fn())
+        else:
+            # use the feature registry to build up whitelist rules
+            element_rules = {
+                '[document]': allow_without_attributes,
+                'p': allow_without_attributes,
+                'div': allow_without_attributes,
+                'br': allow_without_attributes,
+            }
+            for feature_name in self.features:
+                element_rules.update(features.get_whitelister_element_rules(feature_name))
+
+        return element_rules
+
+    @cached_property
+    def embed_handlers(self):
+        if self.features is None:
+            feature_list = features.get_default_features()
+        else:
+            feature_list = self.features
+
+        embed_handlers = {}
+        for feature in feature_list:
+            embed_handlers.update(features.get_embed_handler_rules(feature))
+
+        return embed_handlers
+
+    @cached_property
+    def link_handlers(self):
+        if self.features is None:
+            feature_list = features.get_default_features()
+        else:
+            feature_list = self.features
+
+        link_handlers = {}
+        for feature in feature_list:
+            link_handlers.update(features.get_link_handler_rules(feature))
+
+        return link_handlers
+
+    def clean_tag_node(self, doc, tag):
+        if 'data-embedtype' in tag.attrs:
+            embed_type = tag['data-embedtype']
+            # fetch the appropriate embed handler for this embedtype
+            try:
+                embed_handler = self.embed_handlers[embed_type]
+            except KeyError:
+                # discard embeds with unrecognised embedtypes
+                tag.decompose()
+                return
+
+            embed_attrs = embed_handler.get_db_attributes(tag)
+            embed_attrs['embedtype'] = embed_type
+
+            embed_tag = doc.new_tag('embed', **embed_attrs)
+            embed_tag.can_be_empty_element = True
+            tag.replace_with(embed_tag)
+        elif tag.name == 'a' and 'data-linktype' in tag.attrs:
+            # first, whitelist the contents of this tag
+            for child in tag.contents:
+                self.clean_node(doc, child)
+
+            link_type = tag['data-linktype']
+            try:
+                link_handler = self.link_handlers[link_type]
+            except KeyError:
+                # discard links with unrecognised linktypes
+                tag.unwrap()
+                return
+
+            link_attrs = link_handler.get_db_attributes(tag)
+            link_attrs['linktype'] = link_type
+            tag.attrs.clear()
+            tag.attrs.update(**link_attrs)
+        else:
+            if tag.name == 'div':
+                tag.name = 'p'
+
+            super(DbWhitelister, self).clean_tag_node(doc, tag)
--- a/wagtail/admin/tests/test_dbwhitelister.py
+++ b/wagtail/admin/tests/test_dbwhitelister.py
@ -1,10 +1,47 @@
 from bs4 import BeautifulSoup
 from django.test import TestCase

-from wagtail.core.rich_text import DbWhitelister
+from wagtail.admin.rich_text.converters.editor_html import DbWhitelister
 from wagtail.core.whitelist import Whitelister


+class TestDbWhitelisterMethods(TestCase):
+    def setUp(self):
+        self.whitelister = DbWhitelister()
+
+    def test_clean_tag_node_div(self):
+        soup = BeautifulSoup('<div>foo</div>', 'html5lib')
+        tag = soup.div
+        self.assertEqual(tag.name, 'div')
+        self.whitelister.clean_tag_node(soup, tag)
+        self.assertEqual(tag.name, 'p')
+
+    def test_clean_tag_node_with_data_embedtype(self):
+        soup = BeautifulSoup(
+            '<p><a data-embedtype="image" data-id=1 data-format="left" data-alt="bar" irrelevant="baz">foo</a></p>',
+            'html5lib'
+        )
+        tag = soup.p
+        self.whitelister.clean_tag_node(soup, tag)
+        self.assertEqual(str(tag),
+                         '<p><embed alt="bar" embedtype="image" format="left" id="1"/></p>')
+
+    def test_clean_tag_node_with_data_linktype(self):
+        soup = BeautifulSoup(
+            '<a data-linktype="document" data-id="1" irrelevant="baz">foo</a>',
+            'html5lib'
+        )
+        tag = soup.a
+        self.whitelister.clean_tag_node(soup, tag)
+        self.assertEqual(str(tag), '<a id="1" linktype="document">foo</a>')
+
+    def test_clean_tag_node(self):
+        soup = BeautifulSoup('<a irrelevant="baz">foo</a>', 'html5lib')
+        tag = soup.a
+        self.whitelister.clean_tag_node(soup, tag)
+        self.assertEqual(str(tag), '<a>foo</a>')
+
+
 class TestDbWhitelister(TestCase):
    def setUp(self):
        self.whitelister = DbWhitelister()
--- a/wagtail/core/rich_text/init.py
+++ b/wagtail/core/rich_text/init.py
@ -1,120 +1,12 @@
-from django.utils.functional import cached_property
 from django.utils.safestring import mark_safe

-from wagtail.core import hooks
 from wagtail.core.rich_text.feature_registry import FeatureRegistry
 from wagtail.core.rich_text.rewriters import EmbedRewriter, LinkRewriter, MultiRuleRewriter
-from wagtail.core.whitelist import allow_without_attributes, Whitelister, DEFAULT_ELEMENT_RULES


 features = FeatureRegistry()


-class DbWhitelister(Whitelister):
-    """
-    A custom whitelisting engine to convert the HTML as returned by the rich text editor
-    into the pseudo-HTML format stored in the database (in which images, documents and other
-    linked objects are identified by ID rather than URL):
-
-    * implements a 'construct_whitelister_element_rules' hook so that other apps can modify
-      the whitelist ruleset (e.g. to permit additional HTML elements beyond those in the base
-      Whitelister module);
-    * replaces any element with a 'data-embedtype' attribute with an <embed> element, with
-      attributes supplied by the handler for that type as defined in embed_handlers;
-    * rewrites the attributes of any <a> element with a 'data-linktype' attribute, as
-      determined by the handler for that type defined in link_handlers, while keeping the
-      element content intact.
-    """
-    def __init__(self, features=None):
-        self.features = features
-
-    @cached_property
-    def element_rules(self):
-        if self.features is None:
-            # use the legacy construct_whitelister_element_rules hook to build up whitelist rules
-            element_rules = DEFAULT_ELEMENT_RULES.copy()
-            for fn in hooks.get_hooks('construct_whitelister_element_rules'):
-                element_rules.update(fn())
-        else:
-            # use the feature registry to build up whitelist rules
-            element_rules = {
-                '[document]': allow_without_attributes,
-                'p': allow_without_attributes,
-                'div': allow_without_attributes,
-                'br': allow_without_attributes,
-            }
-            for feature_name in self.features:
-                element_rules.update(features.get_whitelister_element_rules(feature_name))
-
-        return element_rules
-
-    @cached_property
-    def embed_handlers(self):
-        if self.features is None:
-            feature_list = features.get_default_features()
-        else:
-            feature_list = self.features
-
-        embed_handlers = {}
-        for feature in feature_list:
-            embed_handlers.update(features.get_embed_handler_rules(feature))
-
-        return embed_handlers
-
-    @cached_property
-    def link_handlers(self):
-        if self.features is None:
-            feature_list = features.get_default_features()
-        else:
-            feature_list = self.features
-
-        link_handlers = {}
-        for feature in feature_list:
-            link_handlers.update(features.get_link_handler_rules(feature))
-
-        return link_handlers
-
-    def clean_tag_node(self, doc, tag):
-        if 'data-embedtype' in tag.attrs:
-            embed_type = tag['data-embedtype']
-            # fetch the appropriate embed handler for this embedtype
-            try:
-                embed_handler = self.embed_handlers[embed_type]
-            except KeyError:
-                # discard embeds with unrecognised embedtypes
-                tag.decompose()
-                return
-
-            embed_attrs = embed_handler.get_db_attributes(tag)
-            embed_attrs['embedtype'] = embed_type
-
-            embed_tag = doc.new_tag('embed', **embed_attrs)
-            embed_tag.can_be_empty_element = True
-            tag.replace_with(embed_tag)
-        elif tag.name == 'a' and 'data-linktype' in tag.attrs:
-            # first, whitelist the contents of this tag
-            for child in tag.contents:
-                self.clean_node(doc, child)
-
-            link_type = tag['data-linktype']
-            try:
-                link_handler = self.link_handlers[link_type]
-            except KeyError:
-                # discard links with unrecognised linktypes
-                tag.unwrap()
-                return
-
-            link_attrs = link_handler.get_db_attributes(tag)
-            link_attrs['linktype'] = link_type
-            tag.attrs.clear()
-            tag.attrs.update(**link_attrs)
-        else:
-            if tag.name == 'div':
-                tag.name = 'p'
-
-            super(DbWhitelister, self).clean_tag_node(doc, tag)
-
-
 # Rewriter functions to be built up on first call to expand_db_html, using the utility classes
 # from wagtail.core.rich_text.rewriters along with the embed handlers / link handlers registered
 # with the feature registry
--- a/wagtail/core/tests/test_rich_text.py
+++ b/wagtail/core/tests/test_rich_text.py
@ -3,8 +3,7 @@ from django.test import TestCase
 from mock import patch

 from wagtail.core.models import Page
-from wagtail.core.rich_text import (
-    DbWhitelister, RichText, expand_db_html)
+from wagtail.core.rich_text import RichText, expand_db_html
 from wagtail.core.rich_text.feature_registry import FeatureRegistry
 from wagtail.core.rich_text.pages import PageLinkHandler
 from wagtail.core.rich_text.rewriters import extract_attrs
@ -43,43 +42,6 @@ class TestPageLinkHandler(TestCase):
        self.assertEqual(result, '<a href="None">')


-class TestDbWhiteLister(TestCase):
-    def setUp(self):
-        self.whitelister = DbWhitelister()
-
-    def test_clean_tag_node_div(self):
-        soup = BeautifulSoup('<div>foo</div>', 'html5lib')
-        tag = soup.div
-        self.assertEqual(tag.name, 'div')
-        self.whitelister.clean_tag_node(soup, tag)
-        self.assertEqual(tag.name, 'p')
-
-    def test_clean_tag_node_with_data_embedtype(self):
-        soup = BeautifulSoup(
-            '<p><a data-embedtype="image" data-id=1 data-format="left" data-alt="bar" irrelevant="baz">foo</a></p>',
-            'html5lib'
-        )
-        tag = soup.p
-        self.whitelister.clean_tag_node(soup, tag)
-        self.assertEqual(str(tag),
-                         '<p><embed alt="bar" embedtype="image" format="left" id="1"/></p>')
-
-    def test_clean_tag_node_with_data_linktype(self):
-        soup = BeautifulSoup(
-            '<a data-linktype="document" data-id="1" irrelevant="baz">foo</a>',
-            'html5lib'
-        )
-        tag = soup.a
-        self.whitelister.clean_tag_node(soup, tag)
-        self.assertEqual(str(tag), '<a id="1" linktype="document">foo</a>')
-
-    def test_clean_tag_node(self):
-        soup = BeautifulSoup('<a irrelevant="baz">foo</a>', 'html5lib')
-        tag = soup.a
-        self.whitelister.clean_tag_node(soup, tag)
-        self.assertEqual(str(tag), '<a>foo</a>')
-
-
 class TestExtractAttrs(TestCase):
    def test_extract_attr(self):
        html = '<a foo="bar" baz="quux">snowman</a>'