Move construct_whitelister_element_rules hook logic into DbWhitelister, as the whitelist module should be Wagtail-agnostic

2014-06-20 15:07:58 +01:00 · 2014-06-20 15:07:58 +01:00 · 505a1291a8
commit 505a1291a8
--- a/wagtail/wagtailcore/rich_text.py
+++ b/wagtail/wagtailcore/rich_text.py
@ -13,6 +13,8 @@ from wagtail.wagtaildocs.models import Document
 from wagtail.wagtailimages.models import get_image_model
 from wagtail.wagtailimages.formats import get_image_format

+from wagtail.wagtailadmin import hooks
+

 # Define a set of 'embed handlers' and 'link handlers'. These handle the translation
 # of 'special' HTML elements in rich text - ones which we do not want to include
@ -158,6 +160,18 @@ LINK_HANDLERS = {
 # Prepare a whitelisting engine with custom behaviour:
 # rewrite any elements with a data-embedtype or data-linktype attribute
 class DbWhitelister(Whitelister):
+    has_loaded_custom_whitelist_rules = False
+
+    @classmethod
+    def clean(cls, html):
+        if not cls.has_loaded_custom_whitelist_rules:
+            for fn in hooks.get_hooks('construct_whitelister_element_rules'):
+                cls.element_rules = dict(
+                    cls.element_rules.items() + fn().items())
+            cls.has_loaded_custom_whitelist_rules = True
+
+        return super(DbWhitelister, cls).clean(html)
+
    @classmethod
    def clean_tag_node(cls, doc, tag):
        if 'data-embedtype' in tag.attrs:
--- a/wagtail/wagtailcore/tests/test_dbwhitelister.py
+++ b/wagtail/wagtailcore/tests/test_dbwhitelister.py
@ -1,5 +1,6 @@
 from django.test import TestCase
 from wagtail.wagtailcore.rich_text import DbWhitelister
+from wagtail.wagtailcore.whitelist import Whitelister

 from bs4 import BeautifulSoup

@ -37,7 +38,13 @@ class TestDbWhitelister(TestCase):

    def test_whitelist_hooks(self):
        # wagtail.tests.wagtail_hooks overrides the whitelist to permit <blockquote> and <a target="...">
-        input_html = '<blockquote>I would put a tax on all people who <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1" target="_blank" tea="darjeeling">stand in water</a>.</blockquote><p>- <character>Gumby</character>'
+        input_html = '<blockquote>I would put a tax on all people who <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1" target="_blank" tea="darjeeling">stand in water</a>.</blockquote><p>- <character>Gumby</character></p>'
        output_html = DbWhitelister.clean(input_html)
-        expected = '<blockquote>I would put a tax on all people who <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1" target="_blank">stand in water</a>.</blockquote><p>- Gumby'
+        expected = '<blockquote>I would put a tax on all people who <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1" target="_blank">stand in water</a>.</blockquote><p>- Gumby</p>'
+        self.assertHtmlEqual(expected, output_html)
+
+        # check that the base Whitelister class is unaffected by these custom whitelist rules
+        input_html = '<blockquote>I would put a tax on all people who <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1" target="_blank" tea="darjeeling">stand in water</a>.</blockquote><p>- <character>Gumby</character></p>'
+        output_html = Whitelister.clean(input_html)
+        expected = 'I would put a tax on all people who <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1">stand in water</a>.<p>- Gumby</p>'
        self.assertHtmlEqual(expected, output_html)
--- a/wagtail/wagtailcore/whitelist.py
+++ b/wagtail/wagtailcore/whitelist.py
@ -5,8 +5,6 @@ specific rules.
 from bs4 import BeautifulSoup, NavigableString, Tag
 from urlparse import urlparse

-from wagtail.wagtailadmin import hooks
-

 ALLOWED_URL_SCHEMES = ['', 'http', 'https', 'ftp', 'mailto', 'tel']

@ -82,10 +80,6 @@ class Whitelister(object):
    def clean(cls, html):
        """Clean up an HTML string to contain just the allowed elements /
        attributes"""
-        for fn in hooks.get_hooks('construct_whitelister_element_rules'):
-            cls.element_rules = dict(
-                cls.element_rules.items() + fn().items())
-
        doc = BeautifulSoup(html, 'lxml')
        cls.clean_node(doc, doc)
        return unicode(doc)