From 5cd957d2646df6fd07e56b13253bcdc441a56232 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Mon, 24 Aug 2015 10:12:56 +0100 Subject: [PATCH] Explicity set html5lib as BeautifulSoup parser Fixes some warnings during tests I also fixed a few import order issues --- .../wagtailcore/tests/test_dbwhitelister.py | 6 ++++-- wagtail/wagtailcore/tests/test_rich_text.py | 20 ++++++++----------- wagtail/wagtailcore/tests/test_whitelist.py | 16 ++++++++------- wagtail/wagtailcore/whitelist.py | 2 -- wagtail/wagtaildocs/tests.py | 4 +--- wagtail/wagtailembeds/tests.py | 16 ++++++--------- wagtail/wagtailimages/tests/test_rich_text.py | 7 ++++--- 7 files changed, 32 insertions(+), 39 deletions(-) diff --git a/wagtail/wagtailcore/tests/test_dbwhitelister.py b/wagtail/wagtailcore/tests/test_dbwhitelister.py index 3ab7868fa5..dc1f76ceaa 100644 --- a/wagtail/wagtailcore/tests/test_dbwhitelister.py +++ b/wagtail/wagtailcore/tests/test_dbwhitelister.py @@ -1,8 +1,10 @@ +from bs4 import BeautifulSoup + from django.test import TestCase + from wagtail.wagtailcore.rich_text import DbWhitelister from wagtail.wagtailcore.whitelist import Whitelister -from bs4 import BeautifulSoup class TestDbWhitelister(TestCase): def assertHtmlEqual(self, str1, str2): @@ -10,7 +12,7 @@ class TestDbWhitelister(TestCase): Assert that two HTML strings are equal at the DOM level (necessary because we can't guarantee the order that attributes are output in) """ - self.assertEqual(BeautifulSoup(str1), BeautifulSoup(str2)) + self.assertEqual(BeautifulSoup(str1, 'html5lib'), BeautifulSoup(str2, 'html5lib')) def test_page_link_is_rewritten(self): input_html = '

Look at the lovely homepage of my Wagtail site

' diff --git a/wagtail/wagtailcore/tests/test_rich_text.py b/wagtail/wagtailcore/tests/test_rich_text.py index b14b635b4f..850877437e 100644 --- a/wagtail/wagtailcore/tests/test_rich_text.py +++ b/wagtail/wagtailcore/tests/test_rich_text.py @@ -1,4 +1,5 @@ from mock import patch +from bs4 import BeautifulSoup from django.test import TestCase @@ -9,16 +10,13 @@ from wagtail.wagtailcore.rich_text import ( expand_db_html, RichText ) -from bs4 import BeautifulSoup class TestPageLinkHandler(TestCase): fixtures = ['test.json'] def test_get_db_attributes(self): - soup = BeautifulSoup( - 'foo' - ) + soup = BeautifulSoup('foo', 'html5lib') tag = soup.a result = PageLinkHandler.get_db_attributes(tag) self.assertEqual(result, @@ -49,9 +47,7 @@ class TestPageLinkHandler(TestCase): class TestDbWhiteLister(TestCase): def test_clean_tag_node_div(self): - soup = BeautifulSoup( - '
foo
' - ) + soup = BeautifulSoup('
foo
', 'html5lib') tag = soup.div self.assertEqual(tag.name, 'div') DbWhitelister.clean_tag_node(soup, tag) @@ -59,7 +55,8 @@ class TestDbWhiteLister(TestCase): def test_clean_tag_node_with_data_embedtype(self): soup = BeautifulSoup( - '

foo

' + '

foo

', + 'html5lib' ) tag = soup.p DbWhitelister.clean_tag_node(soup, tag) @@ -68,16 +65,15 @@ class TestDbWhiteLister(TestCase): def test_clean_tag_node_with_data_linktype(self): soup = BeautifulSoup( - 'foo' + 'foo', + 'html5lib' ) tag = soup.a DbWhitelister.clean_tag_node(soup, tag) self.assertEqual(str(tag), 'foo') def test_clean_tag_node(self): - soup = BeautifulSoup( - 'foo' - ) + soup = BeautifulSoup('foo', 'html5lib') tag = soup.a DbWhitelister.clean_tag_node(soup, tag) self.assertEqual(str(tag), 'foo') diff --git a/wagtail/wagtailcore/tests/test_whitelist.py b/wagtail/wagtailcore/tests/test_whitelist.py index 6589ecab5d..ce2ff44b84 100644 --- a/wagtail/wagtailcore/tests/test_whitelist.py +++ b/wagtail/wagtailcore/tests/test_whitelist.py @@ -1,6 +1,7 @@ from bs4 import BeautifulSoup from django.test import TestCase + from wagtail.wagtailcore.whitelist import ( check_url, attribute_rule, @@ -8,6 +9,7 @@ from wagtail.wagtailcore.whitelist import ( Whitelister ) + class TestCheckUrl(TestCase): def test_allowed_url_schemes(self): for url_scheme in ['', 'http', 'https', 'ftp', 'mailto', 'tel']: @@ -27,7 +29,7 @@ class TestCheckUrl(TestCase): class TestAttributeRule(TestCase): def setUp(self): - self.soup = BeautifulSoup('baz') + self.soup = BeautifulSoup('baz', 'html5lib') def test_no_rule_for_attr(self): """ @@ -86,7 +88,7 @@ class TestAttributeRule(TestCase): Test that attribute_rule() with will drop all attributes. """ - soup = BeautifulSoup('') + soup = BeautifulSoup('', 'html5lib') tag = soup.b allow_without_attributes(tag) self.assertEqual(str(tag), '') @@ -97,7 +99,7 @@ class TestWhitelister(TestCase): """ Unknown node should remove a node from the parent document """ - soup = BeautifulSoup('bazquux') + soup = BeautifulSoup('bazquux', 'html5lib') tag = soup.foo Whitelister.clean_unknown_node('', soup.bar) self.assertEqual(str(tag), 'quux') @@ -107,7 +109,7 @@ class TestWhitelister(TestCase): tags are allowed without attributes. This remains true when tags are nested. """ - soup = BeautifulSoup('foo') + soup = BeautifulSoup('foo', 'html5lib') tag = soup.b Whitelister.clean_tag_node(tag, tag) self.assertEqual(str(tag), 'foo') @@ -116,19 +118,19 @@ class TestWhitelister(TestCase): """ tags should be removed, even when nested. """ - soup = BeautifulSoup('bar') + soup = BeautifulSoup('bar', 'html5lib') tag = soup.b Whitelister.clean_tag_node(tag, tag) self.assertEqual(str(tag), 'bar') def test_clean_string_node_does_nothing(self): - soup = BeautifulSoup('bar') + soup = BeautifulSoup('bar', 'html5lib') string = soup.b.string Whitelister.clean_string_node(string, string) self.assertEqual(str(string), 'bar') def test_clean_node_does_not_change_navigable_strings(self): - soup = BeautifulSoup('bar') + soup = BeautifulSoup('bar', 'html5lib') string = soup.b.string Whitelister.clean_node(string, string) self.assertEqual(str(string), 'bar') diff --git a/wagtail/wagtailcore/whitelist.py b/wagtail/wagtailcore/whitelist.py index 356be9ecb9..3f521275ac 100644 --- a/wagtail/wagtailcore/whitelist.py +++ b/wagtail/wagtailcore/whitelist.py @@ -3,8 +3,6 @@ A generic HTML whitelisting engine, designed to accommodate subclassing to overr specific rules. """ import re - - from bs4 import BeautifulSoup, NavigableString, Tag diff --git a/wagtail/wagtaildocs/tests.py b/wagtail/wagtaildocs/tests.py index 6a8ca3dcaf..c1f0381307 100644 --- a/wagtail/wagtaildocs/tests.py +++ b/wagtail/wagtaildocs/tests.py @@ -659,9 +659,7 @@ class TestDocumentRichTextLinkHandler(TestCase): fixtures = ['test.json'] def test_get_db_attributes(self): - soup = BeautifulSoup( - 'foo' - ) + soup = BeautifulSoup('foo', 'html5lib') tag = soup.a result = DocumentLinkHandler.get_db_attributes(tag) self.assertEqual(result, diff --git a/wagtail/wagtailembeds/tests.py b/wagtail/wagtailembeds/tests.py index a477e8ea12..59a8fd0f09 100644 --- a/wagtail/wagtailembeds/tests.py +++ b/wagtail/wagtailembeds/tests.py @@ -1,24 +1,23 @@ -import django.utils.six.moves.urllib.request -from django.utils.six.moves.urllib.error import URLError - -from mock import patch import unittest +from mock import patch from bs4 import BeautifulSoup -from wagtail.wagtailembeds.rich_text import MediaEmbedHandler - try: import embedly # noqa no_embedly = False except ImportError: no_embedly = True +import django.utils.six.moves.urllib.request from django import template from django.test import TestCase from django.core.exceptions import ValidationError +from django.utils.six.moves.urllib.error import URLError +from wagtail.wagtailcore import blocks from wagtail.tests.utils import WagtailTestUtils +from wagtail.wagtailembeds.rich_text import MediaEmbedHandler from wagtail.wagtailembeds.embeds import ( EmbedNotFoundException, EmbedlyException, @@ -27,7 +26,6 @@ from wagtail.wagtailembeds.embeds import ( embedly as wagtail_embedly, oembed as wagtail_oembed, ) -from wagtail.wagtailcore import blocks from wagtail.wagtailembeds.templatetags.wagtailembeds_tags import embed as embed_filter from wagtail.wagtailembeds.blocks import EmbedBlock, EmbedValue from wagtail.wagtailembeds.models import Embed @@ -435,9 +433,7 @@ class TestEmbedBlock(TestCase): class TestMediaEmbedHandler(TestCase): def test_get_db_attributes(self): - soup = BeautifulSoup( - 'foo' - ) + soup = BeautifulSoup('foo', 'html5lib') tag = soup.b result = MediaEmbedHandler.get_db_attributes(tag) self.assertEqual(result, diff --git a/wagtail/wagtailimages/tests/test_rich_text.py b/wagtail/wagtailimages/tests/test_rich_text.py index 473a6faab5..b0ed55ce85 100644 --- a/wagtail/wagtailimages/tests/test_rich_text.py +++ b/wagtail/wagtailimages/tests/test_rich_text.py @@ -1,15 +1,16 @@ -from django.test import TestCase - from bs4 import BeautifulSoup from mock import patch +from django.test import TestCase + from wagtail.wagtailimages.rich_text import ImageEmbedHandler class TestImageEmbedHandler(TestCase): def test_get_db_attributes(self): soup = BeautifulSoup( - 'foo' + 'foo', + 'html5lib' ) tag = soup.b result = ImageEmbedHandler.get_db_attributes(tag)