From 0373045505defae1d18dc3694e5a7ee8c1a56b33 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Sun, 17 May 2015 00:42:26 +0300 Subject: [PATCH] html.parser: PCRE cannot handle literal NULs, requires quoted hex repr. --- html.parser/html/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/html.parser/html/parser.py b/html.parser/html/parser.py index 60a322a9..c31320e2 100644 --- a/html.parser/html/parser.py +++ b/html.parser/html/parser.py @@ -26,7 +26,7 @@ commentclose = re.compile(r'--\s*>') tagfind = re.compile('([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*') # see http://www.w3.org/TR/html5/tokenization.html#tag-open-state # and http://www.w3.org/TR/html5/tokenization.html#tag-name-state -tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\x00]*') +tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\\x00]*') # Note: # 1) the strict attrfind isn't really strict, but we can't make it # correctly strict without breaking backward compatibility;