From 4c93955b7b4d3d860aed1551ca6231ac4e388e69 Mon Sep 17 00:00:00 2001 From: Nicko van Someren Date: Sat, 16 Nov 2019 17:07:11 -0700 Subject: [PATCH] py/objslice: Add support for indices() method on slice objects. Instances of the slice class are passed to __getitem__() on objects when the user indexes them with a slice. In practice the majority of the time (other than passing it on untouched) is to work out what the slice means in the context of an array dimension of a particular length. Since Python 2.3 there has been a method on the slice class, indices(), that takes a dimension length and returns the real start, stop and step, accounting for missing or negative values in the slice spec. This commit implements such a indices() method on the slice class. It is configurable at compile-time via MICROPY_PY_BUILTINS_SLICE_INDICES, disabled by default, enabled on unix, stm32 and esp32 ports. This commit also adds new tests for slice indices and for slicing unicode strings. --- ports/esp32/mpconfigport.h | 1 + ports/stm32/mpconfigport.h | 1 + ports/unix/mpconfigport.h | 1 + py/mpconfig.h | 5 ++ py/obj.h | 15 +++--- py/objslice.c | 98 +++++++++++++++++++++++++++++++++- py/sequence.c | 74 +++---------------------- tests/basics/slice_indices.py | 27 ++++++++++ tests/unicode/unicode_slice.py | 12 +++++ 9 files changed, 160 insertions(+), 74 deletions(-) create mode 100644 tests/basics/slice_indices.py create mode 100644 tests/unicode/unicode_slice.py diff --git a/ports/esp32/mpconfigport.h b/ports/esp32/mpconfigport.h index 1924cf2186..983c882ae8 100644 --- a/ports/esp32/mpconfigport.h +++ b/ports/esp32/mpconfigport.h @@ -73,6 +73,7 @@ #define MICROPY_PY_BUILTINS_SET (1) #define MICROPY_PY_BUILTINS_SLICE (1) #define MICROPY_PY_BUILTINS_SLICE_ATTRS (1) +#define MICROPY_PY_BUILTINS_SLICE_INDICES (1) #define MICROPY_PY_BUILTINS_FROZENSET (1) #define MICROPY_PY_BUILTINS_PROPERTY (1) #define MICROPY_PY_BUILTINS_RANGE_ATTRS (1) diff --git a/ports/stm32/mpconfigport.h b/ports/stm32/mpconfigport.h index 7ae8ee7d72..55f09f81f0 100644 --- a/ports/stm32/mpconfigport.h +++ b/ports/stm32/mpconfigport.h @@ -97,6 +97,7 @@ #define MICROPY_PY_BUILTINS_MEMORYVIEW (1) #define MICROPY_PY_BUILTINS_FROZENSET (1) #define MICROPY_PY_BUILTINS_SLICE_ATTRS (1) +#define MICROPY_PY_BUILTINS_SLICE_INDICES (1) #define MICROPY_PY_BUILTINS_ROUND_INT (1) #define MICROPY_PY_ALL_SPECIAL_METHODS (1) #define MICROPY_PY_BUILTINS_COMPILE (MICROPY_ENABLE_COMPILER) diff --git a/ports/unix/mpconfigport.h b/ports/unix/mpconfigport.h index 40cd1f5702..d633726a16 100644 --- a/ports/unix/mpconfigport.h +++ b/ports/unix/mpconfigport.h @@ -90,6 +90,7 @@ #define MICROPY_PY_REVERSE_SPECIAL_METHODS (1) #define MICROPY_PY_ARRAY_SLICE_ASSIGN (1) #define MICROPY_PY_BUILTINS_SLICE_ATTRS (1) +#define MICROPY_PY_BUILTINS_SLICE_INDICES (1) #define MICROPY_PY_SYS_EXIT (1) #define MICROPY_PY_SYS_ATEXIT (1) #if MICROPY_PY_SYS_SETTRACE diff --git a/py/mpconfig.h b/py/mpconfig.h index 93d67accdb..d6f4d92328 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -894,6 +894,11 @@ typedef double mp_float_t; #define MICROPY_PY_BUILTINS_SLICE_ATTRS (0) #endif +// Whether to support the .indices(len) method on slice objects +#ifndef MICROPY_PY_BUILTINS_SLICE_INDICES +#define MICROPY_PY_BUILTINS_SLICE_INDICES (0) +#endif + // Whether to support frozenset object #ifndef MICROPY_PY_BUILTINS_FROZENSET #define MICROPY_PY_BUILTINS_FROZENSET (0) diff --git a/py/obj.h b/py/obj.h index efeb14b433..ab5b1e6ec0 100644 --- a/py/obj.h +++ b/py/obj.h @@ -778,8 +778,16 @@ static inline mp_map_t *mp_obj_dict_get_map(mp_obj_t dict) { // set void mp_obj_set_store(mp_obj_t self_in, mp_obj_t item); +// slice indexes resolved to particular sequence +typedef struct { + mp_int_t start; + mp_int_t stop; + mp_int_t step; +} mp_bound_slice_t; + // slice void mp_obj_slice_get(mp_obj_t self_in, mp_obj_t *start, mp_obj_t *stop, mp_obj_t *step); +void mp_obj_slice_indices(mp_obj_t self_in, mp_int_t length, mp_bound_slice_t *result); // functions @@ -836,13 +844,6 @@ const mp_obj_t *mp_obj_property_get(mp_obj_t self_in); // sequence helpers -// slice indexes resolved to particular sequence -typedef struct { - mp_uint_t start; - mp_uint_t stop; - mp_int_t step; -} mp_bound_slice_t; - void mp_seq_multiply(const void *items, size_t item_sz, size_t len, size_t times, void *dest); #if MICROPY_PY_BUILTINS_SLICE bool mp_seq_get_fast_slice_indexes(mp_uint_t len, mp_obj_t slice, mp_bound_slice_t *indexes); diff --git a/py/objslice.c b/py/objslice.c index cfc819edcc..d17dbf6057 100644 --- a/py/objslice.c +++ b/py/objslice.c @@ -27,7 +27,7 @@ #include #include -#include "py/obj.h" +#include "py/runtime.h" /******************************************************************************/ /* slice object */ @@ -53,6 +53,22 @@ STATIC void slice_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t mp_print_str(print, ")"); } +#if MICROPY_PY_BUILTINS_SLICE_INDICES +STATIC mp_obj_t slice_indices(mp_obj_t self_in, mp_obj_t length_obj) { + mp_int_t length = mp_obj_int_get_checked(length_obj); + mp_bound_slice_t bound_indices; + mp_obj_slice_indices(self_in, length, &bound_indices); + + mp_obj_t results[3] = { + MP_OBJ_NEW_SMALL_INT(bound_indices.start), + MP_OBJ_NEW_SMALL_INT(bound_indices.stop), + MP_OBJ_NEW_SMALL_INT(bound_indices.step), + }; + return mp_obj_new_tuple(3, results); +} +STATIC MP_DEFINE_CONST_FUN_OBJ_2(slice_indices_obj, slice_indices); +#endif + #if MICROPY_PY_BUILTINS_SLICE_ATTRS STATIC void slice_attr(mp_obj_t self_in, qstr attr, mp_obj_t *dest) { if (dest[0] != MP_OBJ_NULL) { @@ -60,22 +76,37 @@ STATIC void slice_attr(mp_obj_t self_in, qstr attr, mp_obj_t *dest) { return; } mp_obj_slice_t *self = MP_OBJ_TO_PTR(self_in); + if (attr == MP_QSTR_start) { dest[0] = self->start; } else if (attr == MP_QSTR_stop) { dest[0] = self->stop; } else if (attr == MP_QSTR_step) { dest[0] = self->step; + #if MICROPY_PY_BUILTINS_SLICE_INDICES + } else if (attr == MP_QSTR_indices) { + dest[0] = MP_OBJ_FROM_PTR(&slice_indices_obj); + dest[1] = self_in; + #endif } } #endif +#if MICROPY_PY_BUILTINS_SLICE_INDICES && !MICROPY_PY_BUILTINS_SLICE_ATTRS +STATIC const mp_rom_map_elem_t slice_locals_dict_table[] = { + { MP_ROM_QSTR(MP_QSTR_indices), MP_ROM_PTR(&slice_indices_obj) }, +}; +STATIC MP_DEFINE_CONST_DICT(slice_locals_dict, slice_locals_dict_table); +#endif + const mp_obj_type_t mp_type_slice = { { &mp_type_type }, .name = MP_QSTR_slice, .print = slice_print, #if MICROPY_PY_BUILTINS_SLICE_ATTRS .attr = slice_attr, +#elif MICROPY_PY_BUILTINS_SLICE_INDICES + .locals_dict = (mp_obj_dict_t*)&slice_locals_dict, #endif }; @@ -96,4 +127,69 @@ void mp_obj_slice_get(mp_obj_t self_in, mp_obj_t *start, mp_obj_t *stop, mp_obj_ *step = self->step; } +// Return the real index and step values for a slice when applied to a sequence of +// the given length, resolving missing components, negative values and values off +// the end of the sequence. +void mp_obj_slice_indices(mp_obj_t self_in, mp_int_t length, mp_bound_slice_t *result) { + mp_obj_slice_t *self = MP_OBJ_TO_PTR(self_in); + mp_int_t start, stop, step; + + if (self->step == mp_const_none) { + step = 1; + } else { + step = mp_obj_get_int(self->step); + if (step == 0) { + mp_raise_ValueError("slice step cannot be zero"); + } + } + + if (step > 0) { + // Positive step + if (self->start == mp_const_none) { + start = 0; + } else { + start = mp_obj_get_int(self->start); + if (start < 0) { + start += length; + } + start = MIN(length, MAX(start, 0)); + } + + if (self->stop == mp_const_none) { + stop = length; + } else { + stop = mp_obj_get_int(self->stop); + if (stop < 0) { + stop += length; + } + stop = MIN(length, MAX(stop, 0)); + } + } else { + // Negative step + if (self->start == mp_const_none) { + start = length - 1; + } else { + start = mp_obj_get_int(self->start); + if (start < 0) { + start += length; + } + start = MIN(length - 1, MAX(start, -1)); + } + + if (self->stop == mp_const_none) { + stop = -1; + } else { + stop = mp_obj_get_int(self->stop); + if (stop < 0) { + stop += length; + } + stop = MIN(length - 1, MAX(stop, -1)); + } + } + + result->start = start; + result->stop = stop; + result->step = step; +} + #endif diff --git a/py/sequence.c b/py/sequence.c index 4c19fc69ea..15e925000a 100644 --- a/py/sequence.c +++ b/py/sequence.c @@ -46,78 +46,20 @@ void mp_seq_multiply(const void *items, size_t item_sz, size_t len, size_t times #if MICROPY_PY_BUILTINS_SLICE bool mp_seq_get_fast_slice_indexes(mp_uint_t len, mp_obj_t slice, mp_bound_slice_t *indexes) { - mp_obj_t ostart, ostop, ostep; - mp_int_t start, stop; - mp_obj_slice_get(slice, &ostart, &ostop, &ostep); + mp_obj_slice_indices(slice, len, indexes); - if (ostep != mp_const_none && ostep != MP_OBJ_NEW_SMALL_INT(1)) { - indexes->step = mp_obj_get_int(ostep); - if (indexes->step == 0) { - mp_raise_ValueError("slice step cannot be zero"); - } - } else { - indexes->step = 1; - } - - if (ostart == mp_const_none) { - if (indexes->step > 0) { - start = 0; - } else { - start = len - 1; - } - } else { - start = mp_obj_get_int(ostart); - } - if (ostop == mp_const_none) { - if (indexes->step > 0) { - stop = len; - } else { - stop = 0; - } - } else { - stop = mp_obj_get_int(ostop); - if (stop >= 0 && indexes->step < 0) { - stop += 1; - } - } - - // Unlike subscription, out-of-bounds slice indexes are never error - if (start < 0) { - start = len + start; - if (start < 0) { - if (indexes->step < 0) { - start = -1; - } else { - start = 0; - } - } - } else if (indexes->step > 0 && (mp_uint_t)start > len) { - start = len; - } else if (indexes->step < 0 && (mp_uint_t)start >= len) { - start = len - 1; - } - if (stop < 0) { - stop = len + stop; - if (stop < 0) { - stop = -1; - } - if (indexes->step < 0) { - stop += 1; - } - } else if ((mp_uint_t)stop > len) { - stop = len; + // If the index is negative then stop points to the last item, not after it + if (indexes->step < 0) { + indexes->stop++; } // CPython returns empty sequence in such case, or point for assignment is at start - if (indexes->step > 0 && start > stop) { - stop = start; - } else if (indexes->step < 0 && start < stop) { - stop = start + 1; + if (indexes->step > 0 && indexes->start > indexes->stop) { + indexes->stop = indexes->start; + } else if (indexes->step < 0 && indexes->start < indexes->stop) { + indexes->stop = indexes->start + 1; } - indexes->start = start; - indexes->stop = stop; - return indexes->step == 1; } diff --git a/tests/basics/slice_indices.py b/tests/basics/slice_indices.py new file mode 100644 index 0000000000..b7f439ccca --- /dev/null +++ b/tests/basics/slice_indices.py @@ -0,0 +1,27 @@ +# Test builtin slice indices resolution + +# A class that returns an item key +class A: + def __getitem__(self, idx): + return idx + +# Make sure that we have slices and .indices() +try: + A()[2:5].indices(10) +except: + print("SKIP") + raise SystemExit + +print(A()[:].indices(10)) +print(A()[2:].indices(10)) +print(A()[:7].indices(10)) +print(A()[2:7].indices(10)) +print(A()[2:7:2].indices(10)) +print(A()[2:7:-2].indices(10)) +print(A()[7:2:2].indices(10)) +print(A()[7:2:-2].indices(10)) + +print(A()[2:7:2].indices(5)) +print(A()[2:7:-2].indices(5)) +print(A()[7:2:2].indices(5)) +print(A()[7:2:-2].indices(5)) diff --git a/tests/unicode/unicode_slice.py b/tests/unicode/unicode_slice.py new file mode 100644 index 0000000000..d9237088f8 --- /dev/null +++ b/tests/unicode/unicode_slice.py @@ -0,0 +1,12 @@ +# Test slicing of Unicode strings + +s = "Привет" + +print(s[:]) +print(s[2:]) +print(s[:5]) +print(s[2:5]) +print(s[2:5:1]) +print(s[2:10]) +print(s[-3:10]) +print(s[-4:10])