micropython-lib/unix-ffi/re/re.py

207 wiersze
6.0 KiB
Python

import sys
import ffilib
import array
import uctypes
pcre2 = ffilib.open("libpcre2-8")
# pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length,
# uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
# pcre2_compile_context *ccontext);
pcre2_compile = pcre2.func("p", "pcre2_compile_8", "siippp")
# int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject,
# PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options,
# pcre2_match_data *match_data, pcre2_match_context *mcontext);
pcre2_match = pcre2.func("i", "pcre2_match_8", "Psiiipp")
# int pcre2_pattern_info(const pcre2_code *code, uint32_t what,
# void *where);
pcre2_pattern_info = pcre2.func("i", "pcre2_pattern_info_8", "Pip")
# PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data);
pcre2_get_ovector_pointer = pcre2.func("p", "pcre2_get_ovector_pointer_8", "p")
# pcre2_match_data *pcre2_match_data_create_from_pattern(const pcre2_code *code,
# pcre2_general_context *gcontext);
pcre2_match_data_create_from_pattern = pcre2.func(
"p", "pcre2_match_data_create_from_pattern_8", "Pp"
)
# PCRE2_SIZE that is of type size_t.
# Use ULONG as type to support both 32bit and 64bit.
PCRE2_SIZE_SIZE = uctypes.sizeof({"field": 0 | uctypes.ULONG})
PCRE2_SIZE_TYPE = "L"
# Real value in pcre2.h is 0xFFFFFFFF for 32bit and
# 0x0xFFFFFFFFFFFFFFFF for 64bit that is equivalent
# to -1
PCRE2_ZERO_TERMINATED = -1
IGNORECASE = I = 0x8
MULTILINE = M = 0x400
DOTALL = S = 0x20
VERBOSE = X = 0x80
PCRE2_ANCHORED = 0x80000000
# TODO. Note that Python3 has unicode by default
ASCII = A = 0
UNICODE = U = 0
PCRE2_INFO_CAPTURECOUNT = 0x4
class PCREMatch:
def __init__(self, s, num_matches, offsets):
self.s = s
self.num = num_matches
self.offsets = offsets
def group(self, *n):
if not n:
return self.s[self.offsets[0] : self.offsets[1]]
if len(n) == 1:
return self.s[self.offsets[n[0] * 2] : self.offsets[n[0] * 2 + 1]]
return tuple(self.s[self.offsets[i * 2] : self.offsets[i * 2 + 1]] for i in n)
def groups(self, default=None):
assert default is None
return tuple(self.group(i + 1) for i in range(self.num - 1))
def start(self, n=0):
return self.offsets[n * 2]
def end(self, n=0):
return self.offsets[n * 2 + 1]
def span(self, n=0):
return self.offsets[n * 2], self.offsets[n * 2 + 1]
class PCREPattern:
def __init__(self, compiled_ptn):
self.obj = compiled_ptn
def search(self, s, pos=0, endpos=-1, _flags=0):
assert endpos == -1, "pos: %d, endpos: %d" % (pos, endpos)
buf = array.array("i", [0])
pcre2_pattern_info(self.obj, PCRE2_INFO_CAPTURECOUNT, buf)
cap_count = buf[0]
match_data = pcre2_match_data_create_from_pattern(self.obj, None)
num = pcre2_match(self.obj, s, len(s), pos, _flags, match_data, None)
if num == -1:
# No match
return None
ov_ptr = pcre2_get_ovector_pointer(match_data)
# pcre2_get_ovector_pointer return PCRE2_SIZE
ov_buf = uctypes.bytearray_at(ov_ptr, PCRE2_SIZE_SIZE * (cap_count + 1) * 2)
ov = array.array(PCRE2_SIZE_TYPE, ov_buf)
# We don't care how many matching subexpressions we got, we
# care only about total # of capturing ones (including empty)
return PCREMatch(s, cap_count + 1, ov)
def match(self, s, pos=0, endpos=-1):
return self.search(s, pos, endpos, PCRE2_ANCHORED)
def sub(self, repl, s, count=0):
if not callable(repl):
assert "\\" not in repl, "Backrefs not implemented"
res = ""
while s:
m = self.search(s)
if not m:
return res + s
beg, end = m.span()
res += s[:beg]
if callable(repl):
res += repl(m)
else:
res += repl
s = s[end:]
if count != 0:
count -= 1
if count == 0:
return res + s
return res
def split(self, s, maxsplit=0):
res = []
while True:
m = self.search(s)
g = None
if m:
g = m.group(0)
if not m or not g:
res.append(s)
return res
beg, end = m.span(0)
res.append(s[:beg])
if m.num > 1:
res.extend(m.groups())
s = s[end:]
if maxsplit > 0:
maxsplit -= 1
if maxsplit == 0:
res.append(s)
return res
def findall(self, s):
res = []
start = 0
while True:
m = self.search(s, start)
if not m:
return res
if m.num == 1:
res.append(m.group(0))
elif m.num == 2:
res.append(m.group(1))
else:
res.append(m.groups())
beg, end = m.span(0)
start = end
def compile(pattern, flags=0):
errcode = bytes(4)
erroffset = bytes(4)
regex = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, flags, errcode, erroffset, None)
assert regex
return PCREPattern(regex)
def search(pattern, string, flags=0):
r = compile(pattern, flags)
return r.search(string)
def match(pattern, string, flags=0):
r = compile(pattern, flags | PCRE2_ANCHORED)
return r.search(string)
def sub(pattern, repl, s, count=0, flags=0):
r = compile(pattern, flags)
return r.sub(repl, s, count)
def split(pattern, s, maxsplit=0, flags=0):
r = compile(pattern, flags)
return r.split(s, maxsplit)
def findall(pattern, s, flags=0):
r = compile(pattern, flags)
return r.findall(s)
def escape(s):
res = ""
for c in s:
if "0" <= c <= "9" or "A" <= c <= "Z" or "a" <= c <= "z" or c == "_":
res += c
else:
res += "\\" + c
return res