kopia lustrzana https://github.com/micropython/micropython
Merge 5ad16c3678
into e60e8079a7
commit
4bcaf88c22
|
@ -140,6 +140,12 @@ Functions
|
||||||
|
|
||||||
Note: availability of this function depends on :term:`MicroPython port`.
|
Note: availability of this function depends on :term:`MicroPython port`.
|
||||||
|
|
||||||
|
.. function:: finditer(regex_str, string)
|
||||||
|
|
||||||
|
Return an iterator yielding ``Match`` objects over all non-overlapping
|
||||||
|
matches for the RE *regex_str* in *string*. The string is scanned
|
||||||
|
left-to-right, and matches are returned in the order found.
|
||||||
|
|
||||||
.. data:: DEBUG
|
.. data:: DEBUG
|
||||||
|
|
||||||
Flag value, display debug information about compiled expression.
|
Flag value, display debug information about compiled expression.
|
||||||
|
@ -154,15 +160,26 @@ Regex objects
|
||||||
Compiled regular expression. Instances of this class are created using
|
Compiled regular expression. Instances of this class are created using
|
||||||
`re.compile()`.
|
`re.compile()`.
|
||||||
|
|
||||||
.. method:: regex.match(string)
|
.. method:: regex.match(string, [pos, [endpos]])
|
||||||
regex.search(string)
|
regex.search(string, [pos, [endpos]])
|
||||||
|
regex.finditer(string, [pos, [endpos]])
|
||||||
regex.sub(replace, string, count=0, flags=0, /)
|
regex.sub(replace, string, count=0, flags=0, /)
|
||||||
|
|
||||||
Similar to the module-level functions :meth:`match`, :meth:`search`
|
Similar to the module-level functions :meth:`match`, :meth:`search`,
|
||||||
and :meth:`sub`.
|
:meth:`finditer`, and :meth:`sub`.
|
||||||
Using methods is (much) more efficient if the same regex is applied to
|
Using methods is (much) more efficient if the same regex is applied to
|
||||||
multiple strings.
|
multiple strings.
|
||||||
|
|
||||||
|
The optional second parameter *pos* gives an index in the string where the
|
||||||
|
search is to start; it defaults to ``0``. This is not completely equivalent
|
||||||
|
to slicing the string; the ``'^'`` pattern character matches at the real
|
||||||
|
beginning of the string and at positions just after a newline, but not
|
||||||
|
necessarily at the index where the search is to start.
|
||||||
|
|
||||||
|
The optional parameter *endpos* limits how far the string will be searched;
|
||||||
|
it will be as if the string is *endpos* characters long, so only the
|
||||||
|
characters from *pos* to ``endpos - 1`` will be searched for a match.
|
||||||
|
|
||||||
.. method:: regex.split(string, max_split=-1, /)
|
.. method:: regex.split(string, max_split=-1, /)
|
||||||
|
|
||||||
Split a *string* using regex. If *max_split* is given, it specifies
|
Split a *string* using regex. If *max_split* is given, it specifies
|
||||||
|
|
|
@ -195,10 +195,11 @@ static void re_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t
|
||||||
}
|
}
|
||||||
|
|
||||||
static mp_obj_t re_exec(bool is_anchored, uint n_args, const mp_obj_t *args) {
|
static mp_obj_t re_exec(bool is_anchored, uint n_args, const mp_obj_t *args) {
|
||||||
(void)n_args;
|
|
||||||
mp_obj_re_t *self;
|
mp_obj_re_t *self;
|
||||||
|
bool was_compiled = false;
|
||||||
if (mp_obj_is_type(args[0], (mp_obj_type_t *)&re_type)) {
|
if (mp_obj_is_type(args[0], (mp_obj_type_t *)&re_type)) {
|
||||||
self = MP_OBJ_TO_PTR(args[0]);
|
self = MP_OBJ_TO_PTR(args[0]);
|
||||||
|
was_compiled = true;
|
||||||
} else {
|
} else {
|
||||||
self = MP_OBJ_TO_PTR(mod_re_compile(1, args));
|
self = MP_OBJ_TO_PTR(mod_re_compile(1, args));
|
||||||
}
|
}
|
||||||
|
@ -206,6 +207,30 @@ static mp_obj_t re_exec(bool is_anchored, uint n_args, const mp_obj_t *args) {
|
||||||
size_t len;
|
size_t len;
|
||||||
subj.begin_line = subj.begin = mp_obj_str_get_data(args[1], &len);
|
subj.begin_line = subj.begin = mp_obj_str_get_data(args[1], &len);
|
||||||
subj.end = subj.begin + len;
|
subj.end = subj.begin + len;
|
||||||
|
|
||||||
|
if (was_compiled && n_args > 2) {
|
||||||
|
// Arg #2 is starting-pos
|
||||||
|
mp_int_t startpos = mp_obj_get_int(args[2]);
|
||||||
|
if (startpos > (mp_int_t) len) {
|
||||||
|
startpos = len;
|
||||||
|
}
|
||||||
|
else if (startpos < 0) {
|
||||||
|
startpos = 0;
|
||||||
|
}
|
||||||
|
subj.begin += startpos;
|
||||||
|
if (n_args > 3) {
|
||||||
|
// Arg #3 is ending-pos
|
||||||
|
mp_int_t endpos = mp_obj_get_int(args[3]);
|
||||||
|
if (endpos > (mp_int_t) len) {
|
||||||
|
endpos = len;
|
||||||
|
}
|
||||||
|
else if (endpos < startpos) {
|
||||||
|
endpos = startpos;
|
||||||
|
}
|
||||||
|
subj.end = subj.begin_line + endpos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int caps_num = (self->re.sub + 1) * 2;
|
int caps_num = (self->re.sub + 1) * 2;
|
||||||
mp_obj_match_t *match = m_new_obj_var(mp_obj_match_t, caps, char *, caps_num);
|
mp_obj_match_t *match = m_new_obj_var(mp_obj_match_t, caps, char *, caps_num);
|
||||||
// cast is a workaround for a bug in msvc: it treats const char** as a const pointer instead of a pointer to pointer to const char
|
// cast is a workaround for a bug in msvc: it treats const char** as a const pointer instead of a pointer to pointer to const char
|
||||||
|
@ -400,11 +425,75 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_sub_obj, 3, 5, re_sub_helper);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if MICROPY_PY_RE_FINDITER
|
||||||
|
|
||||||
|
typedef struct _mp_re_finditer_it_t {
|
||||||
|
mp_obj_base_t base;
|
||||||
|
mp_fun_1_t iternext;
|
||||||
|
mp_obj_t pattern;
|
||||||
|
mp_obj_t str;
|
||||||
|
mp_obj_t start;
|
||||||
|
mp_obj_t end;
|
||||||
|
} mp_re_finditer_it_t;
|
||||||
|
|
||||||
|
|
||||||
|
static mp_obj_t mp_re_finditer_it_iternext(mp_obj_t self_in) {
|
||||||
|
mp_re_finditer_it_t *self = MP_OBJ_TO_PTR(self_in);
|
||||||
|
|
||||||
|
mp_obj_t args[4] = {
|
||||||
|
self->pattern,
|
||||||
|
self->str,
|
||||||
|
self->start,
|
||||||
|
self->end
|
||||||
|
};
|
||||||
|
int n_args = (self->end == mp_const_none) ? 3 : 4;
|
||||||
|
|
||||||
|
mp_obj_t obj_match = re_exec(false, n_args, args);
|
||||||
|
if (obj_match == mp_const_none) {
|
||||||
|
return MP_OBJ_STOP_ITERATION;
|
||||||
|
}
|
||||||
|
|
||||||
|
mp_obj_match_t *match = MP_OBJ_TO_PTR(obj_match);
|
||||||
|
const char *begin = mp_obj_str_get_str(self->str);
|
||||||
|
self->start = MP_OBJ_NEW_SMALL_INT(match->caps[1] - begin);
|
||||||
|
return obj_match;
|
||||||
|
}
|
||||||
|
|
||||||
|
static mp_obj_t re_finditer(size_t n_args, const mp_obj_t *args) {
|
||||||
|
mp_re_finditer_it_t *iter = mp_obj_malloc(mp_re_finditer_it_t, &mp_type_polymorph_iter);
|
||||||
|
iter->iternext = mp_re_finditer_it_iternext;
|
||||||
|
iter->str = args[1];
|
||||||
|
iter->start = MP_OBJ_NEW_SMALL_INT(0);
|
||||||
|
iter->end = mp_const_none;
|
||||||
|
|
||||||
|
if (mp_obj_is_type(args[0], (mp_obj_type_t *)&re_type)) {
|
||||||
|
iter->pattern = args[0];
|
||||||
|
if (n_args > 2) {
|
||||||
|
iter->start = args[2];
|
||||||
|
if (n_args > 3) {
|
||||||
|
iter->end = args[3];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
iter->pattern = mod_re_compile(1, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
return MP_OBJ_FROM_PTR(iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_finditer_obj, 2, 4, re_finditer);
|
||||||
|
|
||||||
|
#endif // MICROPY_PY_RE_FINDITER
|
||||||
|
|
||||||
#if !MICROPY_ENABLE_DYNRUNTIME
|
#if !MICROPY_ENABLE_DYNRUNTIME
|
||||||
static const mp_rom_map_elem_t re_locals_dict_table[] = {
|
static const mp_rom_map_elem_t re_locals_dict_table[] = {
|
||||||
{ MP_ROM_QSTR(MP_QSTR_match), MP_ROM_PTR(&re_match_obj) },
|
{ MP_ROM_QSTR(MP_QSTR_match), MP_ROM_PTR(&re_match_obj) },
|
||||||
{ MP_ROM_QSTR(MP_QSTR_search), MP_ROM_PTR(&re_search_obj) },
|
{ MP_ROM_QSTR(MP_QSTR_search), MP_ROM_PTR(&re_search_obj) },
|
||||||
{ MP_ROM_QSTR(MP_QSTR_split), MP_ROM_PTR(&re_split_obj) },
|
{ MP_ROM_QSTR(MP_QSTR_split), MP_ROM_PTR(&re_split_obj) },
|
||||||
|
#if MICROPY_PY_RE_FINDITER
|
||||||
|
{ MP_ROM_QSTR(MP_QSTR_finditer), MP_ROM_PTR(&re_finditer_obj) },
|
||||||
|
#endif
|
||||||
#if MICROPY_PY_RE_SUB
|
#if MICROPY_PY_RE_SUB
|
||||||
{ MP_ROM_QSTR(MP_QSTR_sub), MP_ROM_PTR(&re_sub_obj) },
|
{ MP_ROM_QSTR(MP_QSTR_sub), MP_ROM_PTR(&re_sub_obj) },
|
||||||
#endif
|
#endif
|
||||||
|
@ -455,6 +544,9 @@ static const mp_rom_map_elem_t mp_module_re_globals_table[] = {
|
||||||
{ MP_ROM_QSTR(MP_QSTR_compile), MP_ROM_PTR(&mod_re_compile_obj) },
|
{ MP_ROM_QSTR(MP_QSTR_compile), MP_ROM_PTR(&mod_re_compile_obj) },
|
||||||
{ MP_ROM_QSTR(MP_QSTR_match), MP_ROM_PTR(&re_match_obj) },
|
{ MP_ROM_QSTR(MP_QSTR_match), MP_ROM_PTR(&re_match_obj) },
|
||||||
{ MP_ROM_QSTR(MP_QSTR_search), MP_ROM_PTR(&re_search_obj) },
|
{ MP_ROM_QSTR(MP_QSTR_search), MP_ROM_PTR(&re_search_obj) },
|
||||||
|
#if MICROPY_PY_RE_FINDITER
|
||||||
|
{ MP_ROM_QSTR(MP_QSTR_finditer), MP_ROM_PTR(&re_finditer_obj) },
|
||||||
|
#endif
|
||||||
#if MICROPY_PY_RE_SUB
|
#if MICROPY_PY_RE_SUB
|
||||||
{ MP_ROM_QSTR(MP_QSTR_sub), MP_ROM_PTR(&re_sub_obj) },
|
{ MP_ROM_QSTR(MP_QSTR_sub), MP_ROM_PTR(&re_sub_obj) },
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1663,6 +1663,10 @@ typedef double mp_float_t;
|
||||||
#define MICROPY_PY_RE_MATCH_SPAN_START_END (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EVERYTHING)
|
#define MICROPY_PY_RE_MATCH_SPAN_START_END (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EVERYTHING)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef MICROPY_PY_RE_FINDITER
|
||||||
|
#define MICROPY_PY_RE_FINDITER (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EXTRA_FEATURES)
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef MICROPY_PY_RE_SUB
|
#ifndef MICROPY_PY_RE_SUB
|
||||||
#define MICROPY_PY_RE_SUB (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EXTRA_FEATURES)
|
#define MICROPY_PY_RE_SUB (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EXTRA_FEATURES)
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
try:
|
||||||
|
import re
|
||||||
|
from re import finditer
|
||||||
|
except ImportError:
|
||||||
|
print("SKIP")
|
||||||
|
raise SystemExit
|
||||||
|
|
||||||
|
ms = re.finditer(r"f[a-z]*", "which foot or hand fell fastest")
|
||||||
|
print(list(x.group(0) for x in ms))
|
||||||
|
|
||||||
|
p = re.compile(r"f[a-z]*")
|
||||||
|
ms = p.finditer("which foot or hand fell fastest")
|
||||||
|
print(list(x.group(0) for x in ms))
|
||||||
|
|
||||||
|
ms = p.finditer("which foot or hand fell fastest", 10)
|
||||||
|
print(list(x.group(0) for x in ms))
|
||||||
|
|
||||||
|
ms = p.finditer("which foot or hand fell fastest", 10, 21)
|
||||||
|
print(list(x.group(0) for x in ms))
|
||||||
|
|
||||||
|
ms = re.finditer(r"\s+", "which foot or hand fell fastest")
|
||||||
|
print(list(x.group(0) for x in ms))
|
||||||
|
|
||||||
|
ms = re.finditer(r"zz", "which foot or hand fell fastest")
|
||||||
|
print(list(x.group(0) for x in ms))
|
|
@ -0,0 +1,78 @@
|
||||||
|
# test start and end pos specification
|
||||||
|
|
||||||
|
try:
|
||||||
|
import re
|
||||||
|
except ImportError:
|
||||||
|
print("SKIP")
|
||||||
|
raise SystemExit
|
||||||
|
|
||||||
|
|
||||||
|
def print_groups(match):
|
||||||
|
print("----")
|
||||||
|
try:
|
||||||
|
if match is not None:
|
||||||
|
i = 0
|
||||||
|
while True:
|
||||||
|
print(match.group(i))
|
||||||
|
i += 1
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
p = re.compile(r"o")
|
||||||
|
m = p.match("dog")
|
||||||
|
print_groups(m)
|
||||||
|
|
||||||
|
m = p.match("dog", 1)
|
||||||
|
print_groups(m)
|
||||||
|
|
||||||
|
m = p.match("dog", 2)
|
||||||
|
print_groups(m)
|
||||||
|
|
||||||
|
# No match past end of input
|
||||||
|
m = p.match("dog", 5)
|
||||||
|
print_groups(m)
|
||||||
|
|
||||||
|
m = p.match("dog", 0, 1)
|
||||||
|
print_groups(m)
|
||||||
|
|
||||||
|
# Caret only matches the actual beginning
|
||||||
|
p = re.compile(r"^o")
|
||||||
|
m = p.match("dog", 1)
|
||||||
|
print_groups(m)
|
||||||
|
|
||||||
|
# End at beginning means searching empty string
|
||||||
|
p = re.compile(r"o")
|
||||||
|
m = p.match("dog", 1, 1)
|
||||||
|
print_groups(m)
|
||||||
|
|
||||||
|
# End before the beginning doesn't match anything
|
||||||
|
m = p.match("dog", 2, 1)
|
||||||
|
print_groups(m)
|
||||||
|
|
||||||
|
# Negative starting values don't crash
|
||||||
|
m = p.search("dog", -2)
|
||||||
|
print_groups(m)
|
||||||
|
|
||||||
|
m = p.search("dog", -2, -5)
|
||||||
|
print_groups(m)
|
||||||
|
|
||||||
|
# Search also works
|
||||||
|
print("--search")
|
||||||
|
|
||||||
|
p = re.compile(r"o")
|
||||||
|
m = p.search("dog")
|
||||||
|
print_groups(m)
|
||||||
|
|
||||||
|
m = p.search("dog", 1)
|
||||||
|
print_groups(m)
|
||||||
|
|
||||||
|
m = p.search("dog", 2)
|
||||||
|
print_groups(m)
|
||||||
|
|
||||||
|
# Negative starting values don't crash
|
||||||
|
m = p.search("dog", -2)
|
||||||
|
print_groups(m)
|
||||||
|
|
||||||
|
m = p.search("dog", -2, -5)
|
||||||
|
print_groups(m)
|
Ładowanie…
Reference in New Issue