kopia lustrzana https://github.com/micropython/micropython-lib
unix-ffi/re: Convert to PCRE2.
PCRE is marked as EOL and won't receive any new security update. Convert the re module to PCRE2 API to enforce security. Additional dependency is now needed with uctypes due to changes in how PCRE2 return the match_data in a pointer and require special handling. The converted module is tested with the test_re.py with no regression. Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>pull/737/head
rodzic
0620d02290
commit
d8e163bb5f
|
@ -1,36 +1,55 @@
|
||||||
import sys
|
import sys
|
||||||
import ffilib
|
import ffilib
|
||||||
import array
|
import array
|
||||||
|
import uctypes
|
||||||
|
|
||||||
|
pcre2 = ffilib.open("libpcre2-8")
|
||||||
|
|
||||||
|
# pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length,
|
||||||
|
# uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
|
||||||
|
# pcre2_compile_context *ccontext);
|
||||||
|
pcre2_compile = pcre2.func("p", "pcre2_compile_8", "siippp")
|
||||||
|
|
||||||
|
# int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject,
|
||||||
|
# PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options,
|
||||||
|
# pcre2_match_data *match_data, pcre2_match_context *mcontext);
|
||||||
|
pcre2_match = pcre2.func("i", "pcre2_match_8", "Psiiipp")
|
||||||
|
|
||||||
|
# int pcre2_pattern_info(const pcre2_code *code, uint32_t what,
|
||||||
|
# void *where);
|
||||||
|
pcre2_pattern_info = pcre2.func("i", "pcre2_pattern_info_8", "Pip")
|
||||||
|
|
||||||
|
# PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data);
|
||||||
|
pcre2_get_ovector_pointer = pcre2.func("p", "pcre2_get_ovector_pointer_8", "p")
|
||||||
|
|
||||||
|
# pcre2_match_data *pcre2_match_data_create_from_pattern(const pcre2_code *code,
|
||||||
|
# pcre2_general_context *gcontext);
|
||||||
|
pcre2_match_data_create_from_pattern = pcre2.func(
|
||||||
|
"p", "pcre2_match_data_create_from_pattern_8", "Pp"
|
||||||
|
)
|
||||||
|
|
||||||
|
# PCRE2_SIZE that is of type size_t.
|
||||||
|
# Use ULONG as type to support both 32bit and 64bit.
|
||||||
|
PCRE2_SIZE_SIZE = uctypes.sizeof({"field": 0 | uctypes.ULONG})
|
||||||
|
PCRE2_SIZE_TYPE = "L"
|
||||||
|
|
||||||
|
# Real value in pcre2.h is 0xFFFFFFFF for 32bit and
|
||||||
|
# 0x0xFFFFFFFFFFFFFFFF for 64bit that is equivalent
|
||||||
|
# to -1
|
||||||
|
PCRE2_ZERO_TERMINATED = -1
|
||||||
|
|
||||||
|
|
||||||
pcre = ffilib.open("libpcre")
|
IGNORECASE = I = 0x8
|
||||||
|
MULTILINE = M = 0x400
|
||||||
# pcre *pcre_compile(const char *pattern, int options,
|
DOTALL = S = 0x20
|
||||||
# const char **errptr, int *erroffset,
|
VERBOSE = X = 0x80
|
||||||
# const unsigned char *tableptr);
|
PCRE2_ANCHORED = 0x80000000
|
||||||
pcre_compile = pcre.func("p", "pcre_compile", "sipps")
|
|
||||||
|
|
||||||
# int pcre_exec(const pcre *code, const pcre_extra *extra,
|
|
||||||
# const char *subject, int length, int startoffset,
|
|
||||||
# int options, int *ovector, int ovecsize);
|
|
||||||
pcre_exec = pcre.func("i", "pcre_exec", "PPsiiipi")
|
|
||||||
|
|
||||||
# int pcre_fullinfo(const pcre *code, const pcre_extra *extra,
|
|
||||||
# int what, void *where);
|
|
||||||
pcre_fullinfo = pcre.func("i", "pcre_fullinfo", "PPip")
|
|
||||||
|
|
||||||
|
|
||||||
IGNORECASE = I = 1
|
|
||||||
MULTILINE = M = 2
|
|
||||||
DOTALL = S = 4
|
|
||||||
VERBOSE = X = 8
|
|
||||||
PCRE_ANCHORED = 0x10
|
|
||||||
|
|
||||||
# TODO. Note that Python3 has unicode by default
|
# TODO. Note that Python3 has unicode by default
|
||||||
ASCII = A = 0
|
ASCII = A = 0
|
||||||
UNICODE = U = 0
|
UNICODE = U = 0
|
||||||
|
|
||||||
PCRE_INFO_CAPTURECOUNT = 2
|
PCRE2_INFO_CAPTURECOUNT = 0x4
|
||||||
|
|
||||||
|
|
||||||
class PCREMatch:
|
class PCREMatch:
|
||||||
|
@ -67,19 +86,23 @@ class PCREPattern:
|
||||||
def search(self, s, pos=0, endpos=-1, _flags=0):
|
def search(self, s, pos=0, endpos=-1, _flags=0):
|
||||||
assert endpos == -1, "pos: %d, endpos: %d" % (pos, endpos)
|
assert endpos == -1, "pos: %d, endpos: %d" % (pos, endpos)
|
||||||
buf = array.array("i", [0])
|
buf = array.array("i", [0])
|
||||||
pcre_fullinfo(self.obj, None, PCRE_INFO_CAPTURECOUNT, buf)
|
pcre2_pattern_info(self.obj, PCRE2_INFO_CAPTURECOUNT, buf)
|
||||||
cap_count = buf[0]
|
cap_count = buf[0]
|
||||||
ov = array.array("i", [0, 0, 0] * (cap_count + 1))
|
match_data = pcre2_match_data_create_from_pattern(self.obj, None)
|
||||||
num = pcre_exec(self.obj, None, s, len(s), pos, _flags, ov, len(ov))
|
num = pcre2_match(self.obj, s, len(s), pos, _flags, match_data, None)
|
||||||
if num == -1:
|
if num == -1:
|
||||||
# No match
|
# No match
|
||||||
return None
|
return None
|
||||||
|
ov_ptr = pcre2_get_ovector_pointer(match_data)
|
||||||
|
# pcre2_get_ovector_pointer return PCRE2_SIZE
|
||||||
|
ov_buf = uctypes.bytearray_at(ov_ptr, PCRE2_SIZE_SIZE * (cap_count + 1) * 2)
|
||||||
|
ov = array.array(PCRE2_SIZE_TYPE, ov_buf)
|
||||||
# We don't care how many matching subexpressions we got, we
|
# We don't care how many matching subexpressions we got, we
|
||||||
# care only about total # of capturing ones (including empty)
|
# care only about total # of capturing ones (including empty)
|
||||||
return PCREMatch(s, cap_count + 1, ov)
|
return PCREMatch(s, cap_count + 1, ov)
|
||||||
|
|
||||||
def match(self, s, pos=0, endpos=-1):
|
def match(self, s, pos=0, endpos=-1):
|
||||||
return self.search(s, pos, endpos, PCRE_ANCHORED)
|
return self.search(s, pos, endpos, PCRE2_ANCHORED)
|
||||||
|
|
||||||
def sub(self, repl, s, count=0):
|
def sub(self, repl, s, count=0):
|
||||||
if not callable(repl):
|
if not callable(repl):
|
||||||
|
@ -141,9 +164,9 @@ class PCREPattern:
|
||||||
|
|
||||||
|
|
||||||
def compile(pattern, flags=0):
|
def compile(pattern, flags=0):
|
||||||
errptr = bytes(4)
|
errcode = bytes(4)
|
||||||
erroffset = bytes(4)
|
erroffset = bytes(4)
|
||||||
regex = pcre_compile(pattern, flags, errptr, erroffset, None)
|
regex = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, flags, errcode, erroffset, None)
|
||||||
assert regex
|
assert regex
|
||||||
return PCREPattern(regex)
|
return PCREPattern(regex)
|
||||||
|
|
||||||
|
@ -154,7 +177,7 @@ def search(pattern, string, flags=0):
|
||||||
|
|
||||||
|
|
||||||
def match(pattern, string, flags=0):
|
def match(pattern, string, flags=0):
|
||||||
r = compile(pattern, flags | PCRE_ANCHORED)
|
r = compile(pattern, flags | PCRE2_ANCHORED)
|
||||||
return r.search(string)
|
return r.search(string)
|
||||||
|
|
||||||
|
|
||||||
|
|
Ładowanie…
Reference in New Issue