diff --git a/py/obj.h b/py/obj.h index 05ccb27574..68d9588b4d 100644 --- a/py/obj.h +++ b/py/obj.h @@ -208,6 +208,7 @@ mp_obj_t mp_obj_new_int(machine_int_t value); mp_obj_t mp_obj_new_int_from_uint(machine_uint_t value); mp_obj_t mp_obj_new_int_from_long_str(const char *s); mp_obj_t mp_obj_new_str(const byte* data, uint len, bool make_qstr_if_not_already); +mp_obj_t mp_obj_new_bytes(const byte* data, uint len); #if MICROPY_ENABLE_FLOAT mp_obj_t mp_obj_new_float(mp_float_t val); mp_obj_t mp_obj_new_complex(mp_float_t real, mp_float_t imag); @@ -280,7 +281,7 @@ void mp_obj_exception_get_traceback(mp_obj_t self_in, machine_uint_t *n, machine // str extern const mp_obj_type_t str_type; -mp_obj_t mp_obj_str_builder_start(uint len, byte **data); +mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data); mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in); bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2); uint mp_obj_str_get_hash(mp_obj_t self_in); @@ -288,6 +289,9 @@ uint mp_obj_str_get_len(mp_obj_t self_in); const char *mp_obj_str_get_str(mp_obj_t self_in); // use this only if you need the string to be null terminated const byte *mp_obj_str_get_data(mp_obj_t self_in, uint *len); +// bytes +extern const mp_obj_type_t bytes_type; + #if MICROPY_ENABLE_FLOAT // float extern const mp_obj_type_t float_type; diff --git a/py/objstr.c b/py/objstr.c index 82e97c0aff..723eebd614 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -28,16 +28,21 @@ typedef struct _mp_obj_str_t { // use this macro to extract the string data and length #define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; } -static mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, int cur); +static mp_obj_t mp_obj_new_str_iterator(mp_obj_t str); +static mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str); /******************************************************************************/ /* str */ void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) { GET_STR_DATA_LEN(self_in, str_data, str_len); - if (kind == PRINT_STR) { + bool is_bytes = MP_OBJ_IS_TYPE(self_in, &bytes_type); + if (kind == PRINT_STR && !is_bytes) { print(env, "%.*s", str_len, str_data); } else { + if (is_bytes) { + print(env, "b"); + } // TODO need to escape chars etc print(env, "'%.*s'", str_len, str_data); } @@ -71,7 +76,11 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { // ["no", "yes"][1 == 2] is common idiom if (MP_OBJ_IS_SMALL_INT(rhs_in)) { uint index = mp_get_index(mp_obj_get_type(lhs_in), lhs_len, rhs_in); - return mp_obj_new_str(lhs_data + index, 1, true); + if (MP_OBJ_IS_TYPE(lhs_in, &bytes_type)) { + return MP_OBJ_NEW_SMALL_INT(lhs_data[index]); + } else { + return mp_obj_new_str(lhs_data + index, 1, true); + } #if MICROPY_ENABLE_SLICE } else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) { machine_int_t start, stop, step; @@ -120,7 +129,7 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { // code for non-qstr byte *data; - mp_obj_t s = mp_obj_str_builder_start(alloc_len, &data); + mp_obj_t s = mp_obj_str_builder_start(mp_obj_get_type(lhs_in), alloc_len, &data); memcpy(data, lhs_data, lhs_len); memcpy(data + lhs_len, rhs_data, rhs_len); return mp_obj_str_builder_end(s); @@ -143,7 +152,7 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { } int n = MP_OBJ_SMALL_INT_VALUE(rhs_in); byte *data; - mp_obj_t s = mp_obj_str_builder_start(lhs_len * n, &data); + mp_obj_t s = mp_obj_str_builder_start(mp_obj_get_type(lhs_in), lhs_len * n, &data); mp_seq_multiply(lhs_data, sizeof(*lhs_data), lhs_len, n, data); return mp_obj_str_builder_end(s); } @@ -152,10 +161,6 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { return MP_OBJ_NULL; // op not supported } -static mp_obj_t str_getiter(mp_obj_t o_in) { - return mp_obj_new_str_iterator(o_in, 0); -} - mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) { assert(MP_OBJ_IS_STR(self_in)); @@ -188,7 +193,7 @@ mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) { // make joined string byte *data; - mp_obj_t joined_str = mp_obj_str_builder_start(required_len, &data); + mp_obj_t joined_str = mp_obj_str_builder_start(mp_obj_get_type(self_in), required_len, &data); for (int i = 0; i < seq_len; i++) { if (i > 0) { memcpy(data, sep_str, sep_len); @@ -393,13 +398,23 @@ const mp_obj_type_t str_type = { "str", .print = str_print, .binary_op = str_binary_op, - .getiter = str_getiter, + .getiter = mp_obj_new_str_iterator, .methods = str_type_methods, }; -mp_obj_t mp_obj_str_builder_start(uint len, byte **data) { +// Reuses most of methods from str +const mp_obj_type_t bytes_type = { + { &mp_const_type }, + "bytes", + .print = str_print, + .binary_op = str_binary_op, + .getiter = mp_obj_new_bytes_iterator, + .methods = str_type_methods, +}; + +mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data) { mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1); - o->base.type = &str_type; + o->base.type = type; o->len = len; *data = o->data; return o; @@ -413,6 +428,16 @@ mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) { return o; } +static mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len) { + mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1); + o->base.type = type; + o->hash = qstr_compute_hash(data, len); + o->len = len; + memcpy(o->data, data, len * sizeof(byte)); + o->data[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings + return o; +} + mp_obj_t mp_obj_new_str(const byte* data, uint len, bool make_qstr_if_not_already) { qstr q = qstr_find_strn(data, len); if (q != MP_QSTR_NULL) { @@ -423,16 +448,14 @@ mp_obj_t mp_obj_new_str(const byte* data, uint len, bool make_qstr_if_not_alread return MP_OBJ_NEW_QSTR(qstr_from_strn((const char*)data, len)); } else { // no existing qstr, don't make one - mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1); - o->base.type = &str_type; - o->hash = qstr_compute_hash(data, len); - o->len = len; - memcpy(o->data, data, len * sizeof(byte)); - o->data[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings - return o; + return str_new(&str_type, data, len); } } +mp_obj_t mp_obj_new_bytes(const byte* data, uint len) { + return str_new(&bytes_type, data, len); +} + bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2) { if (MP_OBJ_IS_QSTR(s1) && MP_OBJ_IS_QSTR(s2)) { return s1 == s2; @@ -522,10 +545,36 @@ static const mp_obj_type_t str_it_type = { .iternext = str_it_iternext, }; -mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, int cur) { +mp_obj_t bytes_it_iternext(mp_obj_t self_in) { + mp_obj_str_it_t *self = self_in; + GET_STR_DATA_LEN(self->str, str, len); + if (self->cur < len) { + mp_obj_t o_out = MP_OBJ_NEW_SMALL_INT(str[self->cur]); + self->cur += 1; + return o_out; + } else { + return mp_const_stop_iteration; + } +} + +static const mp_obj_type_t bytes_it_type = { + { &mp_const_type }, + "bytes_iterator", + .iternext = bytes_it_iternext, +}; + +mp_obj_t mp_obj_new_str_iterator(mp_obj_t str) { mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t); o->base.type = &str_it_type; o->str = str; - o->cur = cur; + o->cur = 0; + return o; +} + +mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str) { + mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t); + o->base.type = &bytes_it_type; + o->str = str; + o->cur = 0; return o; } diff --git a/py/runtime.c b/py/runtime.c index 0d9906ea60..3d56cc87ba 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -408,6 +408,13 @@ mp_obj_t rt_load_const_str(qstr qstr) { return MP_OBJ_NEW_QSTR(qstr); } +mp_obj_t rt_load_const_bytes(qstr qstr) { + DEBUG_OP_printf("load b'%s'\n", qstr_str(qstr)); + uint len; + const byte *data = qstr_data(qstr, &len); + return mp_obj_new_bytes(data, len); +} + mp_obj_t rt_load_name(qstr qstr) { // logic: search locals, globals, builtins DEBUG_OP_printf("load name %s\n", qstr_str(qstr)); diff --git a/py/runtime.h b/py/runtime.h index d62d1f86d6..c8113f4a7d 100644 --- a/py/runtime.h +++ b/py/runtime.h @@ -2,6 +2,7 @@ int rt_is_true(mp_obj_t arg); mp_obj_t rt_load_const_dec(qstr qstr); mp_obj_t rt_load_const_str(qstr qstr); +mp_obj_t rt_load_const_bytes(qstr qstr); mp_obj_t rt_load_name(qstr qstr); mp_obj_t rt_load_global(qstr qstr); mp_obj_t rt_load_build_class(void); diff --git a/py/vm.c b/py/vm.c index b6db0bb87f..affa5943bd 100644 --- a/py/vm.c +++ b/py/vm.c @@ -138,7 +138,7 @@ bool mp_execute_byte_code_2(const byte *code_info, const byte **ip_in_out, mp_ob case MP_BC_LOAD_CONST_BYTES: DECODE_QSTR; - PUSH(rt_load_const_str(qst)); // TODO + PUSH(rt_load_const_bytes(qst)); break; case MP_BC_LOAD_CONST_STRING: diff --git a/tests/basics/bytes.py b/tests/basics/bytes.py new file mode 100644 index 0000000000..7d0cf22d44 --- /dev/null +++ b/tests/basics/bytes.py @@ -0,0 +1,11 @@ +a = b"123" +print(a) +print(str(a)) +print(repr(a)) +print(a[0], a[2]) +print(a[-1]) + +s = 0 +for i in a: + s += i +print(s)