From 0d339a143e2b6442366145e7f3d64aada293eaa0 Mon Sep 17 00:00:00 2001 From: Chris Angelico Date: Tue, 10 Jun 2014 02:24:11 +1000 Subject: [PATCH] Support slicing in str_index_to_ptr, and fix a bounds error --- py/objstr.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/py/objstr.c b/py/objstr.c index e3dc904c5a..6ffb7dd4a3 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -360,8 +360,9 @@ uncomparable: return MP_OBJ_NULL; // op not supported } -// Convert an index into a pointer to its lead byte, or raise IndexError if out of bounds -STATIC const char *str_index_to_ptr(const char *self_data, uint self_len, mp_obj_t index) { +// Convert an index into a pointer to its lead byte. Out of bounds indexing will raise IndexError or +// be capped to the first/last character of the string, depending on is_slice. +STATIC const char *str_index_to_ptr(const char *self_data, uint self_len, mp_obj_t index, bool is_slice) { machine_int_t i; // Copied from mp_get_index; I don't want bounds checking, just give me // the integer as-is. (I can't bounds-check without scanning the whole @@ -377,6 +378,9 @@ STATIC const char *str_index_to_ptr(const char *self_data, uint self_len, mp_obj // Negative indexing is performed by counting from the end of the string. for (s = top - 1; i; --s) { if (s < self_data) { + if (is_slice) { + return self_data; + } nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_IndexError, "string index out of range")); } if (!UTF8_IS_CONT(*s)) { @@ -384,22 +388,27 @@ STATIC const char *str_index_to_ptr(const char *self_data, uint self_len, mp_obj } } ++s; + } else if (!i) { + return self_data; // Shortcut - str[0] is its base pointer } else { // Positive indexing, correspondingly, counts from the start of the string. // It's assumed that negative indexing will generally be used with small // absolute values (eg str[-1], not str[-1000000]), which means it'll be // more efficient this way. - for (s = self_data; i; ++s) { + for (s = self_data; true; ++s) { if (s >= top) { + if (is_slice) { + while (UTF8_IS_CONT(*--s)); + return s; + } nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_IndexError, "string index out of range")); } - if (!UTF8_IS_CONT(*s)) { - --i; + while (UTF8_IS_CONT(*s)) { + ++s; + } + if (!i--) { + return s; } - } - // Skip continuation bytes after the last lead byte - while (UTF8_IS_CONT(*s)) { - ++s; } } return s; @@ -424,7 +433,7 @@ STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) { uint index_val = mp_get_index(type, self_len, index, false); return MP_OBJ_NEW_SMALL_INT((mp_small_int_t)self_data[index_val]); } - const char *s = str_index_to_ptr((const char *)self_data, self_len, index); + const char *s = str_index_to_ptr((const char *)self_data, self_len, index, false); int len = 1; if (UTF8_IS_NONASCII(*s)) { // Count the number of 1 bits (after the first)