Jared Hancock 2024-04-26 17:56:50 -05:00 zatwierdzone przez GitHub
commit 4bcaf88c22
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: B5690EEEBB952194
5 zmienionych plików z 221 dodań i 5 usunięć

Wyświetl plik

@ -140,6 +140,12 @@ Functions
Note: availability of this function depends on :term:`MicroPython port`. Note: availability of this function depends on :term:`MicroPython port`.
.. function:: finditer(regex_str, string)
Return an iterator yielding ``Match`` objects over all non-overlapping
matches for the RE *regex_str* in *string*. The string is scanned
left-to-right, and matches are returned in the order found.
.. data:: DEBUG .. data:: DEBUG
Flag value, display debug information about compiled expression. Flag value, display debug information about compiled expression.
@ -154,15 +160,26 @@ Regex objects
Compiled regular expression. Instances of this class are created using Compiled regular expression. Instances of this class are created using
`re.compile()`. `re.compile()`.
.. method:: regex.match(string) .. method:: regex.match(string, [pos, [endpos]])
regex.search(string) regex.search(string, [pos, [endpos]])
regex.finditer(string, [pos, [endpos]])
regex.sub(replace, string, count=0, flags=0, /) regex.sub(replace, string, count=0, flags=0, /)
Similar to the module-level functions :meth:`match`, :meth:`search` Similar to the module-level functions :meth:`match`, :meth:`search`,
and :meth:`sub`. :meth:`finditer`, and :meth:`sub`.
Using methods is (much) more efficient if the same regex is applied to Using methods is (much) more efficient if the same regex is applied to
multiple strings. multiple strings.
The optional second parameter *pos* gives an index in the string where the
search is to start; it defaults to ``0``. This is not completely equivalent
to slicing the string; the ``'^'`` pattern character matches at the real
beginning of the string and at positions just after a newline, but not
necessarily at the index where the search is to start.
The optional parameter *endpos* limits how far the string will be searched;
it will be as if the string is *endpos* characters long, so only the
characters from *pos* to ``endpos - 1`` will be searched for a match.
.. method:: regex.split(string, max_split=-1, /) .. method:: regex.split(string, max_split=-1, /)
Split a *string* using regex. If *max_split* is given, it specifies Split a *string* using regex. If *max_split* is given, it specifies

Wyświetl plik

@ -195,10 +195,11 @@ static void re_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t
} }
static mp_obj_t re_exec(bool is_anchored, uint n_args, const mp_obj_t *args) { static mp_obj_t re_exec(bool is_anchored, uint n_args, const mp_obj_t *args) {
(void)n_args;
mp_obj_re_t *self; mp_obj_re_t *self;
bool was_compiled = false;
if (mp_obj_is_type(args[0], (mp_obj_type_t *)&re_type)) { if (mp_obj_is_type(args[0], (mp_obj_type_t *)&re_type)) {
self = MP_OBJ_TO_PTR(args[0]); self = MP_OBJ_TO_PTR(args[0]);
was_compiled = true;
} else { } else {
self = MP_OBJ_TO_PTR(mod_re_compile(1, args)); self = MP_OBJ_TO_PTR(mod_re_compile(1, args));
} }
@ -206,6 +207,30 @@ static mp_obj_t re_exec(bool is_anchored, uint n_args, const mp_obj_t *args) {
size_t len; size_t len;
subj.begin_line = subj.begin = mp_obj_str_get_data(args[1], &len); subj.begin_line = subj.begin = mp_obj_str_get_data(args[1], &len);
subj.end = subj.begin + len; subj.end = subj.begin + len;
if (was_compiled && n_args > 2) {
// Arg #2 is starting-pos
mp_int_t startpos = mp_obj_get_int(args[2]);
if (startpos > (mp_int_t) len) {
startpos = len;
}
else if (startpos < 0) {
startpos = 0;
}
subj.begin += startpos;
if (n_args > 3) {
// Arg #3 is ending-pos
mp_int_t endpos = mp_obj_get_int(args[3]);
if (endpos > (mp_int_t) len) {
endpos = len;
}
else if (endpos < startpos) {
endpos = startpos;
}
subj.end = subj.begin_line + endpos;
}
}
int caps_num = (self->re.sub + 1) * 2; int caps_num = (self->re.sub + 1) * 2;
mp_obj_match_t *match = m_new_obj_var(mp_obj_match_t, caps, char *, caps_num); mp_obj_match_t *match = m_new_obj_var(mp_obj_match_t, caps, char *, caps_num);
// cast is a workaround for a bug in msvc: it treats const char** as a const pointer instead of a pointer to pointer to const char // cast is a workaround for a bug in msvc: it treats const char** as a const pointer instead of a pointer to pointer to const char
@ -400,11 +425,75 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_sub_obj, 3, 5, re_sub_helper);
#endif #endif
#if MICROPY_PY_RE_FINDITER
typedef struct _mp_re_finditer_it_t {
mp_obj_base_t base;
mp_fun_1_t iternext;
mp_obj_t pattern;
mp_obj_t str;
mp_obj_t start;
mp_obj_t end;
} mp_re_finditer_it_t;
static mp_obj_t mp_re_finditer_it_iternext(mp_obj_t self_in) {
mp_re_finditer_it_t *self = MP_OBJ_TO_PTR(self_in);
mp_obj_t args[4] = {
self->pattern,
self->str,
self->start,
self->end
};
int n_args = (self->end == mp_const_none) ? 3 : 4;
mp_obj_t obj_match = re_exec(false, n_args, args);
if (obj_match == mp_const_none) {
return MP_OBJ_STOP_ITERATION;
}
mp_obj_match_t *match = MP_OBJ_TO_PTR(obj_match);
const char *begin = mp_obj_str_get_str(self->str);
self->start = MP_OBJ_NEW_SMALL_INT(match->caps[1] - begin);
return obj_match;
}
static mp_obj_t re_finditer(size_t n_args, const mp_obj_t *args) {
mp_re_finditer_it_t *iter = mp_obj_malloc(mp_re_finditer_it_t, &mp_type_polymorph_iter);
iter->iternext = mp_re_finditer_it_iternext;
iter->str = args[1];
iter->start = MP_OBJ_NEW_SMALL_INT(0);
iter->end = mp_const_none;
if (mp_obj_is_type(args[0], (mp_obj_type_t *)&re_type)) {
iter->pattern = args[0];
if (n_args > 2) {
iter->start = args[2];
if (n_args > 3) {
iter->end = args[3];
}
}
}
else {
iter->pattern = mod_re_compile(1, args);
}
return MP_OBJ_FROM_PTR(iter);
}
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_finditer_obj, 2, 4, re_finditer);
#endif // MICROPY_PY_RE_FINDITER
#if !MICROPY_ENABLE_DYNRUNTIME #if !MICROPY_ENABLE_DYNRUNTIME
static const mp_rom_map_elem_t re_locals_dict_table[] = { static const mp_rom_map_elem_t re_locals_dict_table[] = {
{ MP_ROM_QSTR(MP_QSTR_match), MP_ROM_PTR(&re_match_obj) }, { MP_ROM_QSTR(MP_QSTR_match), MP_ROM_PTR(&re_match_obj) },
{ MP_ROM_QSTR(MP_QSTR_search), MP_ROM_PTR(&re_search_obj) }, { MP_ROM_QSTR(MP_QSTR_search), MP_ROM_PTR(&re_search_obj) },
{ MP_ROM_QSTR(MP_QSTR_split), MP_ROM_PTR(&re_split_obj) }, { MP_ROM_QSTR(MP_QSTR_split), MP_ROM_PTR(&re_split_obj) },
#if MICROPY_PY_RE_FINDITER
{ MP_ROM_QSTR(MP_QSTR_finditer), MP_ROM_PTR(&re_finditer_obj) },
#endif
#if MICROPY_PY_RE_SUB #if MICROPY_PY_RE_SUB
{ MP_ROM_QSTR(MP_QSTR_sub), MP_ROM_PTR(&re_sub_obj) }, { MP_ROM_QSTR(MP_QSTR_sub), MP_ROM_PTR(&re_sub_obj) },
#endif #endif
@ -455,6 +544,9 @@ static const mp_rom_map_elem_t mp_module_re_globals_table[] = {
{ MP_ROM_QSTR(MP_QSTR_compile), MP_ROM_PTR(&mod_re_compile_obj) }, { MP_ROM_QSTR(MP_QSTR_compile), MP_ROM_PTR(&mod_re_compile_obj) },
{ MP_ROM_QSTR(MP_QSTR_match), MP_ROM_PTR(&re_match_obj) }, { MP_ROM_QSTR(MP_QSTR_match), MP_ROM_PTR(&re_match_obj) },
{ MP_ROM_QSTR(MP_QSTR_search), MP_ROM_PTR(&re_search_obj) }, { MP_ROM_QSTR(MP_QSTR_search), MP_ROM_PTR(&re_search_obj) },
#if MICROPY_PY_RE_FINDITER
{ MP_ROM_QSTR(MP_QSTR_finditer), MP_ROM_PTR(&re_finditer_obj) },
#endif
#if MICROPY_PY_RE_SUB #if MICROPY_PY_RE_SUB
{ MP_ROM_QSTR(MP_QSTR_sub), MP_ROM_PTR(&re_sub_obj) }, { MP_ROM_QSTR(MP_QSTR_sub), MP_ROM_PTR(&re_sub_obj) },
#endif #endif

Wyświetl plik

@ -1663,6 +1663,10 @@ typedef double mp_float_t;
#define MICROPY_PY_RE_MATCH_SPAN_START_END (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EVERYTHING) #define MICROPY_PY_RE_MATCH_SPAN_START_END (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EVERYTHING)
#endif #endif
#ifndef MICROPY_PY_RE_FINDITER
#define MICROPY_PY_RE_FINDITER (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EXTRA_FEATURES)
#endif
#ifndef MICROPY_PY_RE_SUB #ifndef MICROPY_PY_RE_SUB
#define MICROPY_PY_RE_SUB (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EXTRA_FEATURES) #define MICROPY_PY_RE_SUB (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EXTRA_FEATURES)
#endif #endif

Wyświetl plik

@ -0,0 +1,25 @@
try:
import re
from re import finditer
except ImportError:
print("SKIP")
raise SystemExit
ms = re.finditer(r"f[a-z]*", "which foot or hand fell fastest")
print(list(x.group(0) for x in ms))
p = re.compile(r"f[a-z]*")
ms = p.finditer("which foot or hand fell fastest")
print(list(x.group(0) for x in ms))
ms = p.finditer("which foot or hand fell fastest", 10)
print(list(x.group(0) for x in ms))
ms = p.finditer("which foot or hand fell fastest", 10, 21)
print(list(x.group(0) for x in ms))
ms = re.finditer(r"\s+", "which foot or hand fell fastest")
print(list(x.group(0) for x in ms))
ms = re.finditer(r"zz", "which foot or hand fell fastest")
print(list(x.group(0) for x in ms))

Wyświetl plik

@ -0,0 +1,78 @@
# test start and end pos specification
try:
import re
except ImportError:
print("SKIP")
raise SystemExit
def print_groups(match):
print("----")
try:
if match is not None:
i = 0
while True:
print(match.group(i))
i += 1
except IndexError:
pass
p = re.compile(r"o")
m = p.match("dog")
print_groups(m)
m = p.match("dog", 1)
print_groups(m)
m = p.match("dog", 2)
print_groups(m)
# No match past end of input
m = p.match("dog", 5)
print_groups(m)
m = p.match("dog", 0, 1)
print_groups(m)
# Caret only matches the actual beginning
p = re.compile(r"^o")
m = p.match("dog", 1)
print_groups(m)
# End at beginning means searching empty string
p = re.compile(r"o")
m = p.match("dog", 1, 1)
print_groups(m)
# End before the beginning doesn't match anything
m = p.match("dog", 2, 1)
print_groups(m)
# Negative starting values don't crash
m = p.search("dog", -2)
print_groups(m)
m = p.search("dog", -2, -5)
print_groups(m)
# Search also works
print("--search")
p = re.compile(r"o")
m = p.search("dog")
print_groups(m)
m = p.search("dog", 1)
print_groups(m)
m = p.search("dog", 2)
print_groups(m)
# Negative starting values don't crash
m = p.search("dog", -2)
print_groups(m)
m = p.search("dog", -2, -5)
print_groups(m)