kopia lustrzana https://github.com/micropython/micropython
lib/re1.5: Allow null characters in regex patterns.
Signed-off-by: Andrew Leech <andrew@alelec.net>pull/8152/head
rodzic
01c758e26a
commit
1fc5e17987
|
@ -21,19 +21,20 @@ static void _emit_checked(int at, char *code, int val, bool *err) {
|
|||
}
|
||||
}
|
||||
|
||||
static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
|
||||
static const char *_compilecode(const char *re, size_t len, ByteProg *prog, int sizecode)
|
||||
{
|
||||
char *code = sizecode ? NULL : prog->insts;
|
||||
bool err = false;
|
||||
int start = PC;
|
||||
int term = PC;
|
||||
int alt_label = 0;
|
||||
|
||||
for (; *re && *re != ')'; re++) {
|
||||
const char *re_top = re + len;
|
||||
|
||||
while (re < re_top && *re != ')') {
|
||||
switch (*re) {
|
||||
case '\\':
|
||||
re++;
|
||||
if (!*re) return NULL; // Trailing backslash
|
||||
if (re >= re_top) return NULL; // Trailing backslash
|
||||
if (MATCH_NAMED_CLASS_CHAR(*re)) {
|
||||
term = PC;
|
||||
EMIT(PC++, NamedClass);
|
||||
|
@ -57,18 +58,22 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
|
|||
int cnt;
|
||||
term = PC;
|
||||
re++;
|
||||
if (re >= re_top) return NULL; // Trailing bracket
|
||||
if (*re == '^') {
|
||||
EMIT(PC++, ClassNot);
|
||||
re++;
|
||||
if (re >= re_top) return NULL; // Trailing ^
|
||||
} else {
|
||||
EMIT(PC++, Class);
|
||||
}
|
||||
PC++; // Skip # of pair byte
|
||||
prog->len++;
|
||||
for (cnt = 0; *re != ']'; re++, cnt++) {
|
||||
if (re >= re_top) return NULL; // Missing closing bracket
|
||||
char c = *re;
|
||||
if (c == '\\') {
|
||||
++re;
|
||||
if (re >= re_top) return NULL; // Trailing backslash
|
||||
c = *re;
|
||||
if (MATCH_NAMED_CLASS_CHAR(c)) {
|
||||
c = RE15_CLASS_NAMED_CLASS_INDICATOR;
|
||||
|
@ -76,7 +81,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
|
|||
}
|
||||
}
|
||||
if (!c) return NULL;
|
||||
if (re[1] == '-' && re[2] != ']') {
|
||||
if (re_top - re > 2 && re[1] == '-' && re[2] != ']') {
|
||||
re += 2;
|
||||
}
|
||||
emit_char_pair:
|
||||
|
@ -89,7 +94,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
|
|||
case '(': {
|
||||
term = PC;
|
||||
int sub = 0;
|
||||
int capture = re[1] != '?' || re[2] != ':';
|
||||
int capture = re_top - re > 2 && (re[1] != '?' || re[2] != ':');
|
||||
|
||||
if (capture) {
|
||||
sub = ++prog->sub;
|
||||
|
@ -97,11 +102,13 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
|
|||
EMIT_CHECKED(PC++, 2 * sub);
|
||||
prog->len++;
|
||||
} else {
|
||||
re += 2;
|
||||
re += 2;
|
||||
}
|
||||
|
||||
re = _compilecode(re + 1, prog, sizecode);
|
||||
if (re == NULL || *re != ')') return NULL; // error, or no matching paren
|
||||
re++;
|
||||
if (re >= re_top) return NULL; // Trailing bracket
|
||||
re = _compilecode(re, re_top - re, prog, sizecode);
|
||||
if (re == NULL || re >= re_top || *re != ')') return NULL; // error, or no matching paren
|
||||
|
||||
if (capture) {
|
||||
EMIT(PC++, Save);
|
||||
|
@ -114,7 +121,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
|
|||
case '?':
|
||||
if (PC == term) return NULL; // nothing to repeat
|
||||
INSERT_CODE(term, 2, PC);
|
||||
if (re[1] == '?') {
|
||||
if (re_top - re > 1 && re[1] == '?') {
|
||||
EMIT(term, RSplit);
|
||||
re++;
|
||||
} else {
|
||||
|
@ -130,7 +137,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
|
|||
EMIT(PC, Jmp);
|
||||
EMIT_CHECKED(PC + 1, REL(PC, term));
|
||||
PC += 2;
|
||||
if (re[1] == '?') {
|
||||
if (re_top - re > 1 && re[1] == '?') {
|
||||
EMIT(term, RSplit);
|
||||
re++;
|
||||
} else {
|
||||
|
@ -142,7 +149,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
|
|||
break;
|
||||
case '+':
|
||||
if (PC == term) return NULL; // nothing to repeat
|
||||
if (re[1] == '?') {
|
||||
if (re_top - re > 1 && re[1] == '?') {
|
||||
EMIT(PC, Split);
|
||||
re++;
|
||||
} else {
|
||||
|
@ -176,27 +183,31 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
|
|||
term = PC;
|
||||
break;
|
||||
}
|
||||
re++;
|
||||
}
|
||||
|
||||
if (alt_label) {
|
||||
EMIT_CHECKED(alt_label, REL(alt_label, PC) + 1);
|
||||
}
|
||||
return err ? NULL : re;
|
||||
if (err) {
|
||||
return NULL;
|
||||
}
|
||||
return re;
|
||||
}
|
||||
|
||||
int re1_5_sizecode(const char *re)
|
||||
int re1_5_sizecode(const char *re, size_t len)
|
||||
{
|
||||
ByteProg dummyprog = {
|
||||
// Save 0, Save 1, Match; more bytes for "search" (vs "match") prefix code
|
||||
.bytelen = 5 + NON_ANCHORED_PREFIX
|
||||
};
|
||||
|
||||
if (_compilecode(re, &dummyprog, /*sizecode*/1) == NULL) return -1;
|
||||
if (_compilecode(re, len, &dummyprog, /*sizecode*/1) == NULL) return -1;
|
||||
|
||||
return dummyprog.bytelen;
|
||||
}
|
||||
|
||||
int re1_5_compilecode(ByteProg *prog, const char *re)
|
||||
int re1_5_compilecode(ByteProg *prog, const char *re, size_t len)
|
||||
{
|
||||
prog->len = 0;
|
||||
prog->bytelen = 0;
|
||||
|
@ -216,7 +227,7 @@ int re1_5_compilecode(ByteProg *prog, const char *re)
|
|||
prog->insts[prog->bytelen++] = 0;
|
||||
prog->len++;
|
||||
|
||||
re = _compilecode(re, prog, /*sizecode*/0);
|
||||
re = _compilecode(re, len, prog, /*sizecode*/0);
|
||||
if (re == NULL || *re) return 1;
|
||||
|
||||
prog->insts[prog->bytelen++] = Save;
|
||||
|
|
|
@ -146,8 +146,8 @@ int re1_5_recursiveloopprog(ByteProg*, Subject*, const char**, int, int);
|
|||
int re1_5_recursiveprog(ByteProg*, Subject*, const char**, int, int);
|
||||
int re1_5_thompsonvm(ByteProg*, Subject*, const char**, int, int);
|
||||
|
||||
int re1_5_sizecode(const char *re);
|
||||
int re1_5_compilecode(ByteProg *prog, const char *re);
|
||||
int re1_5_sizecode(const char *re, size_t len);
|
||||
int re1_5_compilecode(ByteProg *prog, const char *re, size_t len);
|
||||
void re1_5_dumpcode(ByteProg *prog);
|
||||
void cleanmarks(ByteProg *prog);
|
||||
int _re1_5_classmatch(const char *pc, const char *sp);
|
||||
|
|
Ładowanie…
Reference in New Issue