Support slicing in str_index_to_ptr, and fix a bounds error

pull/671/head
Chris Angelico 2014-06-10 02:24:11 +10:00
rodzic 24371c7267
commit 0d339a143e
1 zmienionych plików z 19 dodań i 10 usunięć

Wyświetl plik

@ -360,8 +360,9 @@ uncomparable:
return MP_OBJ_NULL; // op not supported
}
// Convert an index into a pointer to its lead byte, or raise IndexError if out of bounds
STATIC const char *str_index_to_ptr(const char *self_data, uint self_len, mp_obj_t index) {
// Convert an index into a pointer to its lead byte. Out of bounds indexing will raise IndexError or
// be capped to the first/last character of the string, depending on is_slice.
STATIC const char *str_index_to_ptr(const char *self_data, uint self_len, mp_obj_t index, bool is_slice) {
machine_int_t i;
// Copied from mp_get_index; I don't want bounds checking, just give me
// the integer as-is. (I can't bounds-check without scanning the whole
@ -377,6 +378,9 @@ STATIC const char *str_index_to_ptr(const char *self_data, uint self_len, mp_obj
// Negative indexing is performed by counting from the end of the string.
for (s = top - 1; i; --s) {
if (s < self_data) {
if (is_slice) {
return self_data;
}
nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_IndexError, "string index out of range"));
}
if (!UTF8_IS_CONT(*s)) {
@ -384,22 +388,27 @@ STATIC const char *str_index_to_ptr(const char *self_data, uint self_len, mp_obj
}
}
++s;
} else if (!i) {
return self_data; // Shortcut - str[0] is its base pointer
} else {
// Positive indexing, correspondingly, counts from the start of the string.
// It's assumed that negative indexing will generally be used with small
// absolute values (eg str[-1], not str[-1000000]), which means it'll be
// more efficient this way.
for (s = self_data; i; ++s) {
for (s = self_data; true; ++s) {
if (s >= top) {
if (is_slice) {
while (UTF8_IS_CONT(*--s));
return s;
}
nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_IndexError, "string index out of range"));
}
if (!UTF8_IS_CONT(*s)) {
--i;
while (UTF8_IS_CONT(*s)) {
++s;
}
if (!i--) {
return s;
}
}
// Skip continuation bytes after the last lead byte
while (UTF8_IS_CONT(*s)) {
++s;
}
}
return s;
@ -424,7 +433,7 @@ STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
uint index_val = mp_get_index(type, self_len, index, false);
return MP_OBJ_NEW_SMALL_INT((mp_small_int_t)self_data[index_val]);
}
const char *s = str_index_to_ptr((const char *)self_data, self_len, index);
const char *s = str_index_to_ptr((const char *)self_data, self_len, index, false);
int len = 1;
if (UTF8_IS_NONASCII(*s)) {
// Count the number of 1 bits (after the first)