From 1ecea7c7539e73f105fef25da8a3bde7783da755 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Fri, 21 Mar 2014 23:46:59 +0200 Subject: [PATCH] py: Make 'bytes' be a proper type, support standard constructor args. --- py/builtin.c | 12 ------- py/objstr.c | 77 ++++++++++++++++++++++++++++++++++++++++++- py/runtime.c | 2 +- tests/basics/bytes.py | 28 ++++++++++++++++ 4 files changed, 105 insertions(+), 14 deletions(-) diff --git a/py/builtin.c b/py/builtin.c index 11b86111ec..93e91072c4 100644 --- a/py/builtin.c +++ b/py/builtin.c @@ -375,18 +375,6 @@ STATIC mp_obj_t mp_builtin_sorted(uint n_args, const mp_obj_t *args, mp_map_t *k MP_DEFINE_CONST_FUN_OBJ_KW(mp_builtin_sorted_obj, 1, mp_builtin_sorted); -// TODO: This should be type, this is just quick CPython compat hack -STATIC mp_obj_t mp_builtin_bytes(uint n_args, const mp_obj_t *args) { - if (!MP_OBJ_IS_QSTR(args[0]) && !MP_OBJ_IS_TYPE(args[0], &str_type)) { - assert(0); - } - // Currently, MicroPython strings are mix between CPython byte and unicode - // strings. So, conversion is null so far. - return args[0]; -} - -MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_bytes_obj, 1, 3, mp_builtin_bytes); - STATIC mp_obj_t mp_builtin_id(mp_obj_t o_in) { return mp_obj_new_int((machine_int_t)o_in); } diff --git a/py/objstr.c b/py/objstr.c index 44e84d7090..35a948700c 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -17,6 +17,8 @@ typedef struct _mp_obj_str_t { const byte *data; } mp_obj_str_t; +const mp_obj_t mp_const_empty_bytes; + // use this macro to extract the string hash #define GET_STR_HASH(str_obj_in, str_hash) uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; } @@ -113,6 +115,75 @@ STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_ } } +STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) { + if (n_args == 0) { + return mp_const_empty_bytes; + } + + if (MP_OBJ_IS_STR(args[0])) { + if (n_args < 2 || n_args > 3) { + goto wrong_args; + } + GET_STR_DATA_LEN(args[0], str_data, str_len); + GET_STR_HASH(args[0], str_hash); + mp_obj_str_t *o = str_new(&bytes_type, NULL, str_len); + o->data = str_data; + o->hash = str_hash; + return o; + } + + if (n_args > 1) { + goto wrong_args; + } + + if (MP_OBJ_IS_SMALL_INT(args[0])) { + uint len = MP_OBJ_SMALL_INT_VALUE(args[0]); + byte *data; + + mp_obj_t o = mp_obj_str_builder_start(&bytes_type, len, &data); + memset(data, 0, len); + return mp_obj_str_builder_end(o); + } + + int len; + byte *data; + vstr_t *vstr = NULL; + mp_obj_t o = NULL; + // Try to create array of exact len if initializer len is known + mp_obj_t len_in = mp_obj_len_maybe(args[0]); + if (len_in == MP_OBJ_NULL) { + len = -1; + vstr = vstr_new(); + } else { + len = MP_OBJ_SMALL_INT_VALUE(len_in); + o = mp_obj_str_builder_start(&bytes_type, len, &data); + } + + mp_obj_t iterable = rt_getiter(args[0]); + mp_obj_t item; + while ((item = rt_iternext(iterable)) != mp_const_stop_iteration) { + if (len == -1) { + vstr_add_char(vstr, MP_OBJ_SMALL_INT_VALUE(item)); + } else { + *data++ = MP_OBJ_SMALL_INT_VALUE(item); + } + } + + if (len == -1) { + vstr_shrink(vstr); + // TODO: Optimize, borrow buffer from vstr + len = vstr_len(vstr); + o = mp_obj_str_builder_start(&bytes_type, len, &data); + memcpy(data, vstr_str(vstr), len); + vstr_free(vstr); + } + + return mp_obj_str_builder_end(o); + +wrong_args: + nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "wrong number of arguments")); +} + // like strstr but with specified length and allows \0 bytes // TODO replace with something more efficient/standard STATIC const byte *find_subbytes(const byte *haystack, uint hlen, const byte *needle, uint nlen) { @@ -666,11 +737,16 @@ const mp_obj_type_t bytes_type = { { &mp_type_type }, .name = MP_QSTR_bytes, .print = str_print, + .make_new = bytes_make_new, .binary_op = str_binary_op, .getiter = mp_obj_new_bytes_iterator, .methods = str_type_methods, }; +// the zero-length bytes +STATIC const mp_obj_str_t empty_bytes_obj = {{&bytes_type}, 0, 0, NULL}; +const mp_obj_t mp_const_empty_bytes = (mp_obj_t)&empty_bytes_obj; + mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data) { mp_obj_str_t *o = m_new_obj(mp_obj_str_t); o->base.type = type; @@ -682,7 +758,6 @@ mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **da } mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) { - assert(MP_OBJ_IS_STR(o_in)); mp_obj_str_t *o = o_in; o->hash = qstr_compute_hash(o->data, o->len); byte *p = (byte*)o->data; diff --git a/py/runtime.c b/py/runtime.c index 2ab97ed182..4bcb91c547 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -89,6 +89,7 @@ STATIC const mp_builtin_elem_t builtin_table[] = { // built-in types { MP_QSTR_bool, (mp_obj_t)&bool_type }, + { MP_QSTR_bytes, (mp_obj_t)&bytes_type }, #if MICROPY_ENABLE_FLOAT { MP_QSTR_complex, (mp_obj_t)&mp_type_complex }, #endif @@ -115,7 +116,6 @@ STATIC const mp_builtin_elem_t builtin_table[] = { { MP_QSTR_abs, (mp_obj_t)&mp_builtin_abs_obj }, { MP_QSTR_all, (mp_obj_t)&mp_builtin_all_obj }, { MP_QSTR_any, (mp_obj_t)&mp_builtin_any_obj }, - { MP_QSTR_bytes, (mp_obj_t)&mp_builtin_bytes_obj }, { MP_QSTR_callable, (mp_obj_t)&mp_builtin_callable_obj }, { MP_QSTR_chr, (mp_obj_t)&mp_builtin_chr_obj }, { MP_QSTR_dir, (mp_obj_t)&mp_builtin_dir_obj }, diff --git a/tests/basics/bytes.py b/tests/basics/bytes.py index 7d0cf22d44..a084bc3994 100644 --- a/tests/basics/bytes.py +++ b/tests/basics/bytes.py @@ -4,8 +4,36 @@ print(str(a)) print(repr(a)) print(a[0], a[2]) print(a[-1]) +print(str(a, "utf-8")) +print(str(a, "utf-8", "ignore")) +try: + str(a, "utf-8", "ignore", "toomuch") +except TypeError: + print("TypeError") s = 0 for i in a: s += i print(s) + + +print(bytes("abc", "utf-8")) +print(bytes("abc", "utf-8", "replace")) +try: + bytes("abc") +except TypeError: + print("TypeError") +try: + bytes("abc", "utf-8", "replace", "toomuch") +except TypeError: + print("TypeError") + +print(bytes(3)) + +print(bytes([3, 2, 1])) +print(bytes(range(5))) + +def gen(): + for i in range(4): + yield i +print(bytes(gen()))