kopia lustrzana https://github.com/micropython/micropython-lib
182 wiersze
4.8 KiB
Python
182 wiersze
4.8 KiB
Python
import ffilib
|
|
import array
|
|
|
|
|
|
pcre = ffilib.open("libpcre")
|
|
|
|
# pcre *pcre_compile(const char *pattern, int options,
|
|
# const char **errptr, int *erroffset,
|
|
# const unsigned char *tableptr);
|
|
pcre_compile = pcre.func("p", "pcre_compile", "sipps")
|
|
|
|
# int pcre_exec(const pcre *code, const pcre_extra *extra,
|
|
# const char *subject, int length, int startoffset,
|
|
# int options, int *ovector, int ovecsize);
|
|
pcre_exec = pcre.func("i", "pcre_exec", "PPsiiipi")
|
|
|
|
# int pcre_fullinfo(const pcre *code, const pcre_extra *extra,
|
|
# int what, void *where);
|
|
pcre_fullinfo = pcre.func("i", "pcre_fullinfo", "PPip")
|
|
|
|
|
|
IGNORECASE = I = 1
|
|
MULTILINE = M = 2
|
|
DOTALL = S = 4
|
|
VERBOSE = X = 8
|
|
PCRE_ANCHORED = 0x10
|
|
|
|
# TODO. Note that Python3 has unicode by default
|
|
ASCII = A = 0
|
|
UNICODE = U = 0
|
|
|
|
PCRE_INFO_CAPTURECOUNT = 2
|
|
|
|
|
|
class PCREMatch:
|
|
|
|
def __init__(self, s, num_matches, offsets):
|
|
self.s = s
|
|
self.num = num_matches
|
|
self.offsets = offsets
|
|
|
|
def group(self, *n):
|
|
if len(n) == 1:
|
|
return self.s[self.offsets[n[0]*2]:self.offsets[n[0]*2+1]]
|
|
return tuple(self.s[self.offsets[i*2]:self.offsets[i*2+1]] for i in n)
|
|
|
|
def groups(self, default=None):
|
|
assert default is None
|
|
return tuple(self.group(i + 1) for i in range(self.num - 1))
|
|
|
|
def start(self, n=0):
|
|
return self.offsets[n*2]
|
|
|
|
def end(self, n=0):
|
|
return self.offsets[n*2+1]
|
|
|
|
def span(self, n=0):
|
|
return self.offsets[n*2], self.offsets[n*2+1]
|
|
|
|
|
|
class PCREPattern:
|
|
|
|
def __init__(self, compiled_ptn):
|
|
self.obj = compiled_ptn
|
|
|
|
def search(self, s, pos=0, endpos=-1, _flags=0):
|
|
assert endpos == -1, "pos: %d, endpos: %d" % (pos, endpos)
|
|
buf = bytes(4)
|
|
pcre_fullinfo(self.obj, None, PCRE_INFO_CAPTURECOUNT, buf)
|
|
cap_count = int.from_bytes(buf)
|
|
ov = array.array('i', [0, 0, 0] * (cap_count + 1))
|
|
num = pcre_exec(self.obj, None, s, len(s), pos, _flags, ov, len(ov))
|
|
if num == -1:
|
|
# No match
|
|
return None
|
|
# We don't care how many matching subexpressions we got, we
|
|
# care only about total # of capturing ones (including empty)
|
|
return PCREMatch(s, cap_count + 1, ov)
|
|
|
|
def match(self, s, pos=0, endpos=-1):
|
|
return self.search(s, pos, endpos, PCRE_ANCHORED)
|
|
|
|
def sub(self, repl, s, count=0):
|
|
if not callable(repl):
|
|
assert "\\" not in repl, "Backrefs not implemented"
|
|
res = ""
|
|
while s:
|
|
m = self.search(s)
|
|
if not m:
|
|
return res + s
|
|
beg, end = m.span()
|
|
res += s[:beg]
|
|
if callable(repl):
|
|
res += repl(m)
|
|
else:
|
|
res += repl
|
|
s = s[end:]
|
|
if count != 0:
|
|
count -= 1
|
|
if count == 0:
|
|
return res + s
|
|
return res
|
|
|
|
def split(self, s, maxsplit=0):
|
|
res = []
|
|
while True:
|
|
m = self.search(s)
|
|
g = None
|
|
if m:
|
|
g = m.group(0)
|
|
if not m or not g:
|
|
res.append(s)
|
|
return res
|
|
beg, end = m.span(0)
|
|
res.append(s[:beg])
|
|
if m.num > 1:
|
|
res.extend(m.groups())
|
|
s = s[end:]
|
|
if maxsplit > 0:
|
|
maxsplit -= 1
|
|
if maxsplit == 0:
|
|
res.append(s)
|
|
return res
|
|
|
|
def findall(self, s):
|
|
res = []
|
|
start = 0
|
|
while True:
|
|
m = self.search(s, start)
|
|
if not m:
|
|
return res
|
|
if m.num == 1:
|
|
res.append(m.group(0))
|
|
elif m.num == 2:
|
|
res.append(m.group(1))
|
|
else:
|
|
res.append(m.groups())
|
|
beg, end = m.span(0)
|
|
start = end
|
|
|
|
|
|
def compile(pattern, flags=0):
|
|
errptr = bytes(4)
|
|
erroffset = bytes(4)
|
|
regex = pcre_compile(pattern, flags, errptr, erroffset, None)
|
|
assert regex
|
|
return PCREPattern(regex)
|
|
|
|
|
|
def search(pattern, string, flags=0):
|
|
r = compile(pattern, flags)
|
|
return r.search(string)
|
|
|
|
|
|
def match(pattern, string, flags=0):
|
|
r = compile(pattern, flags | PCRE_ANCHORED)
|
|
return r.search(string)
|
|
|
|
|
|
def sub(pattern, repl, s, count=0, flags=0):
|
|
r = compile(pattern, flags)
|
|
return r.sub(repl, s, count)
|
|
|
|
|
|
def split(pattern, s, maxsplit=0, flags=0):
|
|
r = compile(pattern, flags)
|
|
return r.split(s, maxsplit)
|
|
|
|
def findall(pattern, s, flags=0):
|
|
r = compile(pattern, flags)
|
|
return r.findall(s)
|
|
|
|
|
|
def escape(s):
|
|
res = ""
|
|
for c in s:
|
|
if '0' <= c <= '9' or 'A' <= c <= 'Z' or 'a' <= c <= 'z' or c == '_':
|
|
res += c
|
|
else:
|
|
res += "\\" + c
|
|
return res
|