Move DbWhitelister into wagtail.admin.rich_text.converters.editor_html

pull/4079/merge
Matt Westcott 2017-11-29 13:42:35 +00:00
rodzic c60d9b7cc8
commit e65c5a6106
6 zmienionych plików z 151 dodań i 149 usunięć

Wyświetl plik

@ -7,7 +7,8 @@ from django.utils.module_loading import import_string
from wagtail.utils.widgets import WidgetWithScript
from wagtail.admin.edit_handlers import RichTextFieldPanel
from wagtail.core.rich_text import DbWhitelister, expand_db_html, features
from wagtail.admin.rich_text.converters.editor_html import DbWhitelister
from wagtail.core.rich_text import expand_db_html, features
class HalloPlugin:

Wyświetl plik

@ -0,0 +1,110 @@
from django.utils.functional import cached_property
from wagtail.core import hooks
from wagtail.core.rich_text import features
from wagtail.core.whitelist import allow_without_attributes, Whitelister, DEFAULT_ELEMENT_RULES
class DbWhitelister(Whitelister):
"""
A custom whitelisting engine to convert the HTML as returned by the rich text editor
into the pseudo-HTML format stored in the database (in which images, documents and other
linked objects are identified by ID rather than URL):
* implements a 'construct_whitelister_element_rules' hook so that other apps can modify
the whitelist ruleset (e.g. to permit additional HTML elements beyond those in the base
Whitelister module);
* replaces any element with a 'data-embedtype' attribute with an <embed> element, with
attributes supplied by the handler for that type as defined in embed_handlers;
* rewrites the attributes of any <a> element with a 'data-linktype' attribute, as
determined by the handler for that type defined in link_handlers, while keeping the
element content intact.
"""
def __init__(self, features=None):
self.features = features
@cached_property
def element_rules(self):
if self.features is None:
# use the legacy construct_whitelister_element_rules hook to build up whitelist rules
element_rules = DEFAULT_ELEMENT_RULES.copy()
for fn in hooks.get_hooks('construct_whitelister_element_rules'):
element_rules.update(fn())
else:
# use the feature registry to build up whitelist rules
element_rules = {
'[document]': allow_without_attributes,
'p': allow_without_attributes,
'div': allow_without_attributes,
'br': allow_without_attributes,
}
for feature_name in self.features:
element_rules.update(features.get_whitelister_element_rules(feature_name))
return element_rules
@cached_property
def embed_handlers(self):
if self.features is None:
feature_list = features.get_default_features()
else:
feature_list = self.features
embed_handlers = {}
for feature in feature_list:
embed_handlers.update(features.get_embed_handler_rules(feature))
return embed_handlers
@cached_property
def link_handlers(self):
if self.features is None:
feature_list = features.get_default_features()
else:
feature_list = self.features
link_handlers = {}
for feature in feature_list:
link_handlers.update(features.get_link_handler_rules(feature))
return link_handlers
def clean_tag_node(self, doc, tag):
if 'data-embedtype' in tag.attrs:
embed_type = tag['data-embedtype']
# fetch the appropriate embed handler for this embedtype
try:
embed_handler = self.embed_handlers[embed_type]
except KeyError:
# discard embeds with unrecognised embedtypes
tag.decompose()
return
embed_attrs = embed_handler.get_db_attributes(tag)
embed_attrs['embedtype'] = embed_type
embed_tag = doc.new_tag('embed', **embed_attrs)
embed_tag.can_be_empty_element = True
tag.replace_with(embed_tag)
elif tag.name == 'a' and 'data-linktype' in tag.attrs:
# first, whitelist the contents of this tag
for child in tag.contents:
self.clean_node(doc, child)
link_type = tag['data-linktype']
try:
link_handler = self.link_handlers[link_type]
except KeyError:
# discard links with unrecognised linktypes
tag.unwrap()
return
link_attrs = link_handler.get_db_attributes(tag)
link_attrs['linktype'] = link_type
tag.attrs.clear()
tag.attrs.update(**link_attrs)
else:
if tag.name == 'div':
tag.name = 'p'
super(DbWhitelister, self).clean_tag_node(doc, tag)

Wyświetl plik

@ -1,10 +1,47 @@
from bs4 import BeautifulSoup
from django.test import TestCase
from wagtail.core.rich_text import DbWhitelister
from wagtail.admin.rich_text.converters.editor_html import DbWhitelister
from wagtail.core.whitelist import Whitelister
class TestDbWhitelisterMethods(TestCase):
def setUp(self):
self.whitelister = DbWhitelister()
def test_clean_tag_node_div(self):
soup = BeautifulSoup('<div>foo</div>', 'html5lib')
tag = soup.div
self.assertEqual(tag.name, 'div')
self.whitelister.clean_tag_node(soup, tag)
self.assertEqual(tag.name, 'p')
def test_clean_tag_node_with_data_embedtype(self):
soup = BeautifulSoup(
'<p><a data-embedtype="image" data-id=1 data-format="left" data-alt="bar" irrelevant="baz">foo</a></p>',
'html5lib'
)
tag = soup.p
self.whitelister.clean_tag_node(soup, tag)
self.assertEqual(str(tag),
'<p><embed alt="bar" embedtype="image" format="left" id="1"/></p>')
def test_clean_tag_node_with_data_linktype(self):
soup = BeautifulSoup(
'<a data-linktype="document" data-id="1" irrelevant="baz">foo</a>',
'html5lib'
)
tag = soup.a
self.whitelister.clean_tag_node(soup, tag)
self.assertEqual(str(tag), '<a id="1" linktype="document">foo</a>')
def test_clean_tag_node(self):
soup = BeautifulSoup('<a irrelevant="baz">foo</a>', 'html5lib')
tag = soup.a
self.whitelister.clean_tag_node(soup, tag)
self.assertEqual(str(tag), '<a>foo</a>')
class TestDbWhitelister(TestCase):
def setUp(self):
self.whitelister = DbWhitelister()

Wyświetl plik

@ -1,120 +1,12 @@
from django.utils.functional import cached_property
from django.utils.safestring import mark_safe
from wagtail.core import hooks
from wagtail.core.rich_text.feature_registry import FeatureRegistry
from wagtail.core.rich_text.rewriters import EmbedRewriter, LinkRewriter, MultiRuleRewriter
from wagtail.core.whitelist import allow_without_attributes, Whitelister, DEFAULT_ELEMENT_RULES
features = FeatureRegistry()
class DbWhitelister(Whitelister):
"""
A custom whitelisting engine to convert the HTML as returned by the rich text editor
into the pseudo-HTML format stored in the database (in which images, documents and other
linked objects are identified by ID rather than URL):
* implements a 'construct_whitelister_element_rules' hook so that other apps can modify
the whitelist ruleset (e.g. to permit additional HTML elements beyond those in the base
Whitelister module);
* replaces any element with a 'data-embedtype' attribute with an <embed> element, with
attributes supplied by the handler for that type as defined in embed_handlers;
* rewrites the attributes of any <a> element with a 'data-linktype' attribute, as
determined by the handler for that type defined in link_handlers, while keeping the
element content intact.
"""
def __init__(self, features=None):
self.features = features
@cached_property
def element_rules(self):
if self.features is None:
# use the legacy construct_whitelister_element_rules hook to build up whitelist rules
element_rules = DEFAULT_ELEMENT_RULES.copy()
for fn in hooks.get_hooks('construct_whitelister_element_rules'):
element_rules.update(fn())
else:
# use the feature registry to build up whitelist rules
element_rules = {
'[document]': allow_without_attributes,
'p': allow_without_attributes,
'div': allow_without_attributes,
'br': allow_without_attributes,
}
for feature_name in self.features:
element_rules.update(features.get_whitelister_element_rules(feature_name))
return element_rules
@cached_property
def embed_handlers(self):
if self.features is None:
feature_list = features.get_default_features()
else:
feature_list = self.features
embed_handlers = {}
for feature in feature_list:
embed_handlers.update(features.get_embed_handler_rules(feature))
return embed_handlers
@cached_property
def link_handlers(self):
if self.features is None:
feature_list = features.get_default_features()
else:
feature_list = self.features
link_handlers = {}
for feature in feature_list:
link_handlers.update(features.get_link_handler_rules(feature))
return link_handlers
def clean_tag_node(self, doc, tag):
if 'data-embedtype' in tag.attrs:
embed_type = tag['data-embedtype']
# fetch the appropriate embed handler for this embedtype
try:
embed_handler = self.embed_handlers[embed_type]
except KeyError:
# discard embeds with unrecognised embedtypes
tag.decompose()
return
embed_attrs = embed_handler.get_db_attributes(tag)
embed_attrs['embedtype'] = embed_type
embed_tag = doc.new_tag('embed', **embed_attrs)
embed_tag.can_be_empty_element = True
tag.replace_with(embed_tag)
elif tag.name == 'a' and 'data-linktype' in tag.attrs:
# first, whitelist the contents of this tag
for child in tag.contents:
self.clean_node(doc, child)
link_type = tag['data-linktype']
try:
link_handler = self.link_handlers[link_type]
except KeyError:
# discard links with unrecognised linktypes
tag.unwrap()
return
link_attrs = link_handler.get_db_attributes(tag)
link_attrs['linktype'] = link_type
tag.attrs.clear()
tag.attrs.update(**link_attrs)
else:
if tag.name == 'div':
tag.name = 'p'
super(DbWhitelister, self).clean_tag_node(doc, tag)
# Rewriter functions to be built up on first call to expand_db_html, using the utility classes
# from wagtail.core.rich_text.rewriters along with the embed handlers / link handlers registered
# with the feature registry

Wyświetl plik

@ -3,8 +3,7 @@ from django.test import TestCase
from mock import patch
from wagtail.core.models import Page
from wagtail.core.rich_text import (
DbWhitelister, RichText, expand_db_html)
from wagtail.core.rich_text import RichText, expand_db_html
from wagtail.core.rich_text.feature_registry import FeatureRegistry
from wagtail.core.rich_text.pages import PageLinkHandler
from wagtail.core.rich_text.rewriters import extract_attrs
@ -43,43 +42,6 @@ class TestPageLinkHandler(TestCase):
self.assertEqual(result, '<a href="None">')
class TestDbWhiteLister(TestCase):
def setUp(self):
self.whitelister = DbWhitelister()
def test_clean_tag_node_div(self):
soup = BeautifulSoup('<div>foo</div>', 'html5lib')
tag = soup.div
self.assertEqual(tag.name, 'div')
self.whitelister.clean_tag_node(soup, tag)
self.assertEqual(tag.name, 'p')
def test_clean_tag_node_with_data_embedtype(self):
soup = BeautifulSoup(
'<p><a data-embedtype="image" data-id=1 data-format="left" data-alt="bar" irrelevant="baz">foo</a></p>',
'html5lib'
)
tag = soup.p
self.whitelister.clean_tag_node(soup, tag)
self.assertEqual(str(tag),
'<p><embed alt="bar" embedtype="image" format="left" id="1"/></p>')
def test_clean_tag_node_with_data_linktype(self):
soup = BeautifulSoup(
'<a data-linktype="document" data-id="1" irrelevant="baz">foo</a>',
'html5lib'
)
tag = soup.a
self.whitelister.clean_tag_node(soup, tag)
self.assertEqual(str(tag), '<a id="1" linktype="document">foo</a>')
def test_clean_tag_node(self):
soup = BeautifulSoup('<a irrelevant="baz">foo</a>', 'html5lib')
tag = soup.a
self.whitelister.clean_tag_node(soup, tag)
self.assertEqual(str(tag), '<a>foo</a>')
class TestExtractAttrs(TestCase):
def test_extract_attr(self):
html = '<a foo="bar" baz="quux">snowman</a>'