Explicity set html5lib as BeautifulSoup parser

Fixes some warnings during tests

I also fixed a few import order issues
pull/1637/head
Karl Hobley 2015-08-24 10:12:56 +01:00
rodzic 868d0fb3e4
commit 5cd957d264
7 zmienionych plików z 32 dodań i 39 usunięć

Wyświetl plik

@ -1,8 +1,10 @@
from bs4 import BeautifulSoup
from django.test import TestCase
from wagtail.wagtailcore.rich_text import DbWhitelister
from wagtail.wagtailcore.whitelist import Whitelister
from bs4 import BeautifulSoup
class TestDbWhitelister(TestCase):
def assertHtmlEqual(self, str1, str2):
@ -10,7 +12,7 @@ class TestDbWhitelister(TestCase):
Assert that two HTML strings are equal at the DOM level
(necessary because we can't guarantee the order that attributes are output in)
"""
self.assertEqual(BeautifulSoup(str1), BeautifulSoup(str2))
self.assertEqual(BeautifulSoup(str1, 'html5lib'), BeautifulSoup(str2, 'html5lib'))
def test_page_link_is_rewritten(self):
input_html = '<p>Look at the <a data-linktype="page" data-id="2" href="/">lovely homepage</a> of my <a href="http://wagtail.io/">Wagtail</a> site</p>'

Wyświetl plik

@ -1,4 +1,5 @@
from mock import patch
from bs4 import BeautifulSoup
from django.test import TestCase
@ -9,16 +10,13 @@ from wagtail.wagtailcore.rich_text import (
expand_db_html,
RichText
)
from bs4 import BeautifulSoup
class TestPageLinkHandler(TestCase):
fixtures = ['test.json']
def test_get_db_attributes(self):
soup = BeautifulSoup(
'<a data-id="test-id">foo</a>'
)
soup = BeautifulSoup('<a data-id="test-id">foo</a>', 'html5lib')
tag = soup.a
result = PageLinkHandler.get_db_attributes(tag)
self.assertEqual(result,
@ -49,9 +47,7 @@ class TestPageLinkHandler(TestCase):
class TestDbWhiteLister(TestCase):
def test_clean_tag_node_div(self):
soup = BeautifulSoup(
'<div>foo</div>'
)
soup = BeautifulSoup('<div>foo</div>', 'html5lib')
tag = soup.div
self.assertEqual(tag.name, 'div')
DbWhitelister.clean_tag_node(soup, tag)
@ -59,7 +55,8 @@ class TestDbWhiteLister(TestCase):
def test_clean_tag_node_with_data_embedtype(self):
soup = BeautifulSoup(
'<p><a data-embedtype="image" data-id=1 data-format="left" data-alt="bar" irrelevant="baz">foo</a></p>'
'<p><a data-embedtype="image" data-id=1 data-format="left" data-alt="bar" irrelevant="baz">foo</a></p>',
'html5lib'
)
tag = soup.p
DbWhitelister.clean_tag_node(soup, tag)
@ -68,16 +65,15 @@ class TestDbWhiteLister(TestCase):
def test_clean_tag_node_with_data_linktype(self):
soup = BeautifulSoup(
'<a data-linktype="document" data-id="1" irrelevant="baz">foo</a>'
'<a data-linktype="document" data-id="1" irrelevant="baz">foo</a>',
'html5lib'
)
tag = soup.a
DbWhitelister.clean_tag_node(soup, tag)
self.assertEqual(str(tag), '<a id="1" linktype="document">foo</a>')
def test_clean_tag_node(self):
soup = BeautifulSoup(
'<a irrelevant="baz">foo</a>'
)
soup = BeautifulSoup('<a irrelevant="baz">foo</a>', 'html5lib')
tag = soup.a
DbWhitelister.clean_tag_node(soup, tag)
self.assertEqual(str(tag), '<a>foo</a>')

Wyświetl plik

@ -1,6 +1,7 @@
from bs4 import BeautifulSoup
from django.test import TestCase
from wagtail.wagtailcore.whitelist import (
check_url,
attribute_rule,
@ -8,6 +9,7 @@ from wagtail.wagtailcore.whitelist import (
Whitelister
)
class TestCheckUrl(TestCase):
def test_allowed_url_schemes(self):
for url_scheme in ['', 'http', 'https', 'ftp', 'mailto', 'tel']:
@ -27,7 +29,7 @@ class TestCheckUrl(TestCase):
class TestAttributeRule(TestCase):
def setUp(self):
self.soup = BeautifulSoup('<b foo="bar">baz</b>')
self.soup = BeautifulSoup('<b foo="bar">baz</b>', 'html5lib')
def test_no_rule_for_attr(self):
"""
@ -86,7 +88,7 @@ class TestAttributeRule(TestCase):
Test that attribute_rule() with will drop all
attributes.
"""
soup = BeautifulSoup('<b foo="bar" baz="quux" snowman="barbecue"></b>')
soup = BeautifulSoup('<b foo="bar" baz="quux" snowman="barbecue"></b>', 'html5lib')
tag = soup.b
allow_without_attributes(tag)
self.assertEqual(str(tag), '<b></b>')
@ -97,7 +99,7 @@ class TestWhitelister(TestCase):
"""
Unknown node should remove a node from the parent document
"""
soup = BeautifulSoup('<foo><bar>baz</bar>quux</foo>')
soup = BeautifulSoup('<foo><bar>baz</bar>quux</foo>', 'html5lib')
tag = soup.foo
Whitelister.clean_unknown_node('', soup.bar)
self.assertEqual(str(tag), '<foo>quux</foo>')
@ -107,7 +109,7 @@ class TestWhitelister(TestCase):
<b> tags are allowed without attributes. This remains true
when tags are nested.
"""
soup = BeautifulSoup('<b><b class="delete me">foo</b></b>')
soup = BeautifulSoup('<b><b class="delete me">foo</b></b>', 'html5lib')
tag = soup.b
Whitelister.clean_tag_node(tag, tag)
self.assertEqual(str(tag), '<b><b>foo</b></b>')
@ -116,19 +118,19 @@ class TestWhitelister(TestCase):
"""
<foo> tags should be removed, even when nested.
"""
soup = BeautifulSoup('<b><foo>bar</foo></b>')
soup = BeautifulSoup('<b><foo>bar</foo></b>', 'html5lib')
tag = soup.b
Whitelister.clean_tag_node(tag, tag)
self.assertEqual(str(tag), '<b>bar</b>')
def test_clean_string_node_does_nothing(self):
soup = BeautifulSoup('<b>bar</b>')
soup = BeautifulSoup('<b>bar</b>', 'html5lib')
string = soup.b.string
Whitelister.clean_string_node(string, string)
self.assertEqual(str(string), 'bar')
def test_clean_node_does_not_change_navigable_strings(self):
soup = BeautifulSoup('<b>bar</b>')
soup = BeautifulSoup('<b>bar</b>', 'html5lib')
string = soup.b.string
Whitelister.clean_node(string, string)
self.assertEqual(str(string), 'bar')

Wyświetl plik

@ -3,8 +3,6 @@ A generic HTML whitelisting engine, designed to accommodate subclassing to overr
specific rules.
"""
import re
from bs4 import BeautifulSoup, NavigableString, Tag

Wyświetl plik

@ -659,9 +659,7 @@ class TestDocumentRichTextLinkHandler(TestCase):
fixtures = ['test.json']
def test_get_db_attributes(self):
soup = BeautifulSoup(
'<a data-id="test-id">foo</a>'
)
soup = BeautifulSoup('<a data-id="test-id">foo</a>', 'html5lib')
tag = soup.a
result = DocumentLinkHandler.get_db_attributes(tag)
self.assertEqual(result,

Wyświetl plik

@ -1,24 +1,23 @@
import django.utils.six.moves.urllib.request
from django.utils.six.moves.urllib.error import URLError
from mock import patch
import unittest
from mock import patch
from bs4 import BeautifulSoup
from wagtail.wagtailembeds.rich_text import MediaEmbedHandler
try:
import embedly # noqa
no_embedly = False
except ImportError:
no_embedly = True
import django.utils.six.moves.urllib.request
from django import template
from django.test import TestCase
from django.core.exceptions import ValidationError
from django.utils.six.moves.urllib.error import URLError
from wagtail.wagtailcore import blocks
from wagtail.tests.utils import WagtailTestUtils
from wagtail.wagtailembeds.rich_text import MediaEmbedHandler
from wagtail.wagtailembeds.embeds import (
EmbedNotFoundException,
EmbedlyException,
@ -27,7 +26,6 @@ from wagtail.wagtailembeds.embeds import (
embedly as wagtail_embedly,
oembed as wagtail_oembed,
)
from wagtail.wagtailcore import blocks
from wagtail.wagtailembeds.templatetags.wagtailembeds_tags import embed as embed_filter
from wagtail.wagtailembeds.blocks import EmbedBlock, EmbedValue
from wagtail.wagtailembeds.models import Embed
@ -435,9 +433,7 @@ class TestEmbedBlock(TestCase):
class TestMediaEmbedHandler(TestCase):
def test_get_db_attributes(self):
soup = BeautifulSoup(
'<b data-url="test-url">foo</b>'
)
soup = BeautifulSoup('<b data-url="test-url">foo</b>', 'html5lib')
tag = soup.b
result = MediaEmbedHandler.get_db_attributes(tag)
self.assertEqual(result,

Wyświetl plik

@ -1,15 +1,16 @@
from django.test import TestCase
from bs4 import BeautifulSoup
from mock import patch
from django.test import TestCase
from wagtail.wagtailimages.rich_text import ImageEmbedHandler
class TestImageEmbedHandler(TestCase):
def test_get_db_attributes(self):
soup = BeautifulSoup(
'<b data-id="test-id" data-format="test-format" data-alt="test-alt">foo</b>'
'<b data-id="test-id" data-format="test-format" data-alt="test-alt">foo</b>',
'html5lib'
)
tag = soup.b
result = ImageEmbedHandler.get_db_attributes(tag)