From 00f707a4389403ac9f0a0769d3afd8c71f4966c6 Mon Sep 17 00:00:00 2001 From: Matt Westcott Date: Tue, 14 Jul 2015 16:14:45 +0100 Subject: [PATCH] Ensure that tabs in non-Latin languages are given non-blank IDs - fixes #1428 --- .../edit_handlers/tabbed_interface.html | 5 ++- .../templatetags/wagtailadmin_tags.py | 8 ++++ wagtail/wagtailcore/tests/test_utils.py | 38 +++++++++++++++++++ wagtail/wagtailcore/utils.py | 38 +++++++++++++++++++ 4 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 wagtail/wagtailcore/tests/test_utils.py diff --git a/wagtail/wagtailadmin/templates/wagtailadmin/edit_handlers/tabbed_interface.html b/wagtail/wagtailadmin/templates/wagtailadmin/edit_handlers/tabbed_interface.html index b70c1d47af..761f04c0ee 100644 --- a/wagtail/wagtailadmin/templates/wagtailadmin/edit_handlers/tabbed_interface.html +++ b/wagtail/wagtailadmin/templates/wagtailadmin/edit_handlers/tabbed_interface.html @@ -1,12 +1,13 @@ +{% load wagtailadmin_tags %}
{% for child in self.children %} -
+
{{ child.render_as_object }}
{% endfor %} diff --git a/wagtail/wagtailadmin/templatetags/wagtailadmin_tags.py b/wagtail/wagtailadmin/templatetags/wagtailadmin_tags.py index 3a4a21d41b..f66d77bc19 100644 --- a/wagtail/wagtailadmin/templatetags/wagtailadmin_tags.py +++ b/wagtail/wagtailadmin/templatetags/wagtailadmin_tags.py @@ -3,10 +3,12 @@ from __future__ import unicode_literals from django.conf import settings from django import template from django.contrib.humanize.templatetags.humanize import intcomma +from django.template.defaultfilters import stringfilter from wagtail.wagtailcore import hooks from wagtail.wagtailcore.models import get_navigation_menu_items, UserPagePermissionsProxy, PageViewRestriction from wagtail.wagtailcore.utils import camelcase_to_underscore, escape_script +from wagtail.wagtailcore.utils import cautious_slugify as _cautious_slugify from wagtail.wagtailadmin.menu import admin_menu @@ -183,3 +185,9 @@ def has_unrendered_errors(bound_field): the widget does not support the render_with_errors method """ return bound_field.errors and not hasattr(bound_field.field.widget, 'render_with_errors') + + +@register.filter(is_safe=True) +@stringfilter +def cautious_slugify(value): + return _cautious_slugify(value) diff --git a/wagtail/wagtailcore/tests/test_utils.py b/wagtail/wagtailcore/tests/test_utils.py new file mode 100644 index 0000000000..581f3271a9 --- /dev/null +++ b/wagtail/wagtailcore/tests/test_utils.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -* +from __future__ import unicode_literals + +from django.test import TestCase + +from django.utils.text import slugify +from wagtail.wagtailcore.utils import cautious_slugify + + +class TestCautiousSlugify(TestCase): + + def test_behaves_same_as_slugify_for_latin_chars(self): + test_cases = [ + ('', ''), + ('???', ''), + ('Hello world', 'hello-world'), + ('Hello_world', 'hello_world'), + ('Hellö wörld', 'hello-world'), + ('Hello world', 'hello-world'), + (' Hello world ', 'hello-world'), + ('Hello, world!', 'hello-world'), + ('Hello*world', 'helloworld'), + ('Hello☃world', 'helloworld'), + ] + + for (original, expected_result) in test_cases: + self.assertEqual(slugify(original), expected_result) + self.assertEqual(cautious_slugify(original), expected_result) + + def test_escapes_non_latin_chars(self): + test_cases = [ + ('Straßenbahn', 'straxdfenbahn'), + ('Спорт!', 'u0421u043fu043eu0440u0442'), + ('〔山脈〕', 'u5c71u8108'), + ] + + for (original, expected_result) in test_cases: + self.assertEqual(cautious_slugify(original), expected_result) diff --git a/wagtail/wagtailcore/utils.py b/wagtail/wagtailcore/utils.py index e8ad697817..5b848705f5 100644 --- a/wagtail/wagtailcore/utils.py +++ b/wagtail/wagtailcore/utils.py @@ -1,7 +1,10 @@ import re +import unicodedata from django.db.models import Model from django.apps import apps +from django.utils.encoding import force_text +from django.utils.text import slugify from django.utils.six import string_types @@ -45,3 +48,38 @@ def escape_script(text): `<-/script>`, `<--/script>` etc. """ return SCRIPT_RE.sub(r'<-\1/script>', text) + + +SLUGIFY_RE = re.compile(r'[^\w\s-]', re.UNICODE) + + +def cautious_slugify(value): + """ + Convert a string to ASCII exactly as Django's slugify does, with the exception + that any non-ASCII alphanumeric characters (that cannot be ASCIIfied under Unicode + normalisation) are escaped into codes like 'u0421' instead of being deleted entirely. + + This ensures that the result of slugifying e.g. Cyrillic text will not be an empty + string, and can thus be safely used as an identifier (albeit not a human-readable one). + """ + value = force_text(value) + + # Normalize the string to decomposed unicode form. This causes accented Latin + # characters to be split into 'base character' + 'accent modifier'; the latter will + # be stripped out by the regexp, resulting in an ASCII-clean character that doesn't + # need to be escaped + value = unicodedata.normalize('NFKD', value) + + # Strip out characters that aren't letterlike, underscores or hyphens, + # using the same regexp that slugify uses. This ensures that non-ASCII non-letters + # (e.g. accent modifiers, fancy punctuation) get stripped rather than escaped + value = SLUGIFY_RE.sub('', value) + + # Encode as ASCII, escaping non-ASCII characters with backslashreplace, then convert + # back to a unicode string (which is what slugify expects) + value = value.encode('ascii', 'backslashreplace').decode('ascii') + + # Pass to slugify to perform final conversion (whitespace stripping, applying + # mark_safe); this will also strip out the backslashes from the 'backslashreplace' + # conversion + return slugify(value)