Add core util for converting Regex to JS RegExp params

pull/12598/head
LB 2024-11-17 00:08:48 +10:00
rodzic 2ea0b8b6f2
commit 50ca1f9675
2 zmienionych plików z 149 dodań i 1 usunięć

Wyświetl plik

@ -5,7 +5,7 @@ import re
import unicodedata import unicodedata
from collections.abc import Iterable from collections.abc import Iterable
from hashlib import md5 from hashlib import md5
from typing import TYPE_CHECKING, Any, Union from typing import TYPE_CHECKING, Any, List, Optional, Union
from anyascii import anyascii from anyascii import anyascii
from django.apps import apps from django.apps import apps
@ -560,3 +560,59 @@ def make_wagtail_template_fragment_key(fragment_name, page, site, vary_on=None):
vary_on = [] vary_on = []
vary_on.extend([page.cache_key, site.id]) vary_on.extend([page.cache_key, site.id])
return make_template_fragment_key(fragment_name, vary_on) return make_template_fragment_key(fragment_name, vary_on)
def get_js_regex(
regex: Optional[Union[re.Pattern, str, bytes]] = None,
base_js_flags: Optional[str] = "gu",
) -> List[str]:
"""
Converts a Python regex (or pattern string) to an array of
JavaScript regex params list, that can be used for ``new RegExp(..._)``.
It assumes that any provided pattern string is already correctly escaped.
JavaScript does not have a 'default' flag like re.UNICODE,
in addition, the global flag is required for finding all occurrences.
So the ``base_js_flags`` argument is used to set the assumed common flags
and defaults to 'gu' (global, unicode) which would cover common usage.
"""
if not regex:
# When calling new RegExp()/RegExp('') in JavaScript with no arguments, it will match nothing.
return []
if isinstance(regex, re.Pattern):
flags = regex.flags
else:
# compile the regex, assuming it's a string (or bytestring when attempting to use re.LOCALE)
regex = re.compile(regex)
flags = regex.flags
flag_map = {
re.IGNORECASE: "i", # Ignore case
re.MULTILINE: "m", # Multiline mode
re.DOTALL: "s", # Dot (.) matches newlines
}
# Throw an error if unsupported flags are provided
if flags & re.LOCALE:
# Python re.LOCALE flag is not supported in JavaScript and it's not encouraged in the Python docs.
raise ValueError("Python re.LOCALE flag is not supported in JavaScript.")
if flags & re.VERBOSE:
# Python re.VERBOSE flag is not supported in JavaScript, unless we add custom cleaning logic in the future.
raise ValueError("Python re.VERBOSE flag is not supported in JavaScript.")
js_flags = []
for py_flag, js_flag in flag_map.items():
if flags & py_flag:
js_flags.append(js_flag)
# Add base flags, remove duplicates, and sort to ensure consistent order
final_flags = "".join(sorted(set(base_js_flags + "".join(js_flags))))
# Clean the pattern of any inline flags, these are not supported in JavaScript
pattern = re.sub(r"(?i)(\(\?[a-z]+\))", "", regex.pattern)
return [pattern, final_flags]

Wyświetl plik

@ -1,4 +1,5 @@
import json import json
import re
from django import template from django import template
from django.core.cache import cache from django.core.cache import cache
@ -13,6 +14,7 @@ from django.utils.translation import gettext_lazy
from wagtail.coreutils import ( from wagtail.coreutils import (
get_dummy_request, get_dummy_request,
get_js_regex,
make_wagtail_template_fragment_key, make_wagtail_template_fragment_key,
resolve_model_string, resolve_model_string,
) )
@ -810,3 +812,93 @@ class TestWagtailPageCacheTag(TestCase):
self.assertEqual( self.assertEqual(
e.exception.args[0], "'wagtailpagecache' tag requires at least 2 arguments." e.exception.args[0], "'wagtailpagecache' tag requires at least 2 arguments."
) )
class TestRegexJavaScriptConversion(TestCase):
"""
Tests for the get_js_regex function that converts Python regex to JavaScript regex.
"""
def test_empty_value(self):
self.assertEqual(get_js_regex(), [])
self.assertEqual(get_js_regex(None), [])
self.assertEqual(get_js_regex(""), [])
def test_compiled_regex(self):
self.assertEqual(get_js_regex(re.compile(r"\D")), ["\\D", "gu"])
def test_string(self):
self.assertEqual(get_js_regex(r"\D"), ["\\D", "gu"])
def test_regex_with_flags(self):
self.assertEqual(
get_js_regex(re.compile(r"w{1,3}", re.IGNORECASE)), ["w{1,3}", "giu"]
)
self.assertEqual(
get_js_regex(re.compile(r".*", re.MULTILINE | re.S)), [".*", "gmsu"]
)
def test_custom_base_js_flags(self):
self.assertEqual(get_js_regex(r"\D", base_js_flags="g"), ["\\D", "g"])
self.assertEqual(
get_js_regex(re.compile(r"\D", re.MULTILINE), base_js_flags="g"),
["\\D", "gm"],
)
self.assertEqual(get_js_regex(r"\D", base_js_flags=""), ["\\D", ""])
def test_regex_with_inline_flags(self):
self.assertEqual(get_js_regex(r"(?i)\D"), ["\\D", "giu"])
self.assertEqual(
get_js_regex(re.compile(r"(?i)\D", re.MULTILINE)), ["\\D", "gimu"]
)
def test_regex_with_invalid_flag_locale(self):
"""
Using the re.LOCALE flag is discouraged in Python docs & not supported in JavaScript
"""
with self.assertRaises(ValueError) as error:
get_js_regex(re.compile(rb"\w", re.LOCALE))
self.assertEqual(
str(error.exception),
"Python re.LOCALE flag is not supported in JavaScript.",
)
with self.assertRaises(ValueError) as error:
get_js_regex(rb"(?L)\w")
self.assertEqual(
str(error.exception),
"Python re.LOCALE flag is not supported in JavaScript.",
)
def test_regex_with_invalid_flag_verbose(self):
"""
Until we do advanced cleaning on the Verbose style of regex
we will not support it in JavaScript.
"""
with self.assertRaises(ValueError) as error:
get_js_regex(r"(?x)\d{ 3 }")
self.assertEqual(
str(error.exception),
"Python re.VERBOSE flag is not supported in JavaScript.",
)
with self.assertRaises(ValueError) as error:
verbose_regex = re.compile(
r"""
\d+ # match one or more digits
\s* # match zero or more whitespace characters
\w+ # match one or more word characters
""",
re.X,
)
get_js_regex(verbose_regex)
self.assertEqual(
str(error.exception),
"Python re.VERBOSE flag is not supported in JavaScript.",
)