From cde0ca21bf79420386c7cb31de29b0f799f16655 Mon Sep 17 00:00:00 2001 From: Damien George Date: Thu, 25 Sep 2014 17:35:56 +0100 Subject: [PATCH] py: Simplify JSON str printing (while still conforming to JSON spec). The JSON specs are relatively flexible and allow us to use one function to print strings, be they ascii, bytes or utf-8 encoded. --- py/objstr.c | 18 +++++++++--------- py/objstr.h | 1 + py/objstrunicode.c | 32 +------------------------------- tests/extmod/ujson_dumps.py | 2 +- 4 files changed, 12 insertions(+), 41 deletions(-) diff --git a/py/objstr.c b/py/objstr.c index 130af8a6af..dd44b0784d 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -93,17 +93,16 @@ void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *e } #if MICROPY_PY_UJSON -STATIC void str_print_json(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, mp_uint_t str_len) { +void mp_str_print_json(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, mp_uint_t str_len) { + // for JSON spec, see http://www.ietf.org/rfc/rfc4627.txt + // if we are given a valid utf8-encoded string, we will print it in a JSON-conforming way print(env, "\""); for (const byte *s = str_data, *top = str_data + str_len; s < top; s++) { - if (*s == '"' || *s == '\\' || *s == '/') { + if (*s == '"' || *s == '\\') { print(env, "\\%c", *s); - } else if (32 <= *s && *s <= 126) { + } else if (*s >= 32) { + // this will handle normal and utf-8 encoded chars print(env, "%c", *s); - } else if (*s == '\b') { - print(env, "\\b"); - } else if (*s == '\f') { - print(env, "\\f"); } else if (*s == '\n') { print(env, "\\n"); } else if (*s == '\r') { @@ -111,6 +110,7 @@ STATIC void str_print_json(void (*print)(void *env, const char *fmt, ...), void } else if (*s == '\t') { print(env, "\\t"); } else { + // this will handle control chars print(env, "\\u%04x", *s); } } @@ -120,13 +120,13 @@ STATIC void str_print_json(void (*print)(void *env, const char *fmt, ...), void STATIC void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) { GET_STR_DATA_LEN(self_in, str_data, str_len); - bool is_bytes = MP_OBJ_IS_TYPE(self_in, &mp_type_bytes); #if MICROPY_PY_UJSON if (kind == PRINT_JSON) { - str_print_json(print, env, str_data, str_len); + mp_str_print_json(print, env, str_data, str_len); return; } #endif + bool is_bytes = MP_OBJ_IS_TYPE(self_in, &mp_type_bytes); if (kind == PRINT_STR && !is_bytes) { print(env, "%.*s", str_len, str_data); } else { diff --git a/py/objstr.h b/py/objstr.h index 3d20680d34..8cd4852943 100644 --- a/py/objstr.h +++ b/py/objstr.h @@ -50,6 +50,7 @@ typedef struct _mp_obj_str_t { { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } \ else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; } +void mp_str_print_json(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, mp_uint_t str_len); mp_obj_t mp_obj_str_format(mp_uint_t n_args, const mp_obj_t *args); mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, mp_uint_t len); diff --git a/py/objstrunicode.c b/py/objstrunicode.c index 0ee7f1dc9a..062e011fb1 100644 --- a/py/objstrunicode.c +++ b/py/objstrunicode.c @@ -91,41 +91,11 @@ STATIC void uni_print_quoted(void (*print)(void *env, const char *fmt, ...), voi print(env, "%c", quote_char); } -#if MICROPY_PY_UJSON -STATIC void uni_print_json(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, uint str_len) { - print(env, "\""); - const byte *s = str_data, *top = str_data + str_len; - while (s < top) { - unichar ch; - ch = utf8_get_char(s); - s = utf8_next_char(s); - if (ch == '"' || ch == '\\' || ch == '/') { - print(env, "\\%c", ch); - } else if (32 <= ch && ch <= 126) { - print(env, "%c", ch); - } else if (*s == '\b') { - print(env, "\\b"); - } else if (*s == '\f') { - print(env, "\\f"); - } else if (*s == '\n') { - print(env, "\\n"); - } else if (*s == '\r') { - print(env, "\\r"); - } else if (*s == '\t') { - print(env, "\\t"); - } else { - print(env, "\\u%04x", ch); - } - } - print(env, "\""); -} -#endif - STATIC void uni_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) { GET_STR_DATA_LEN(self_in, str_data, str_len); #if MICROPY_PY_UJSON if (kind == PRINT_JSON) { - uni_print_json(print, env, str_data, str_len); + mp_str_print_json(print, env, str_data, str_len); return; } #endif diff --git a/tests/extmod/ujson_dumps.py b/tests/extmod/ujson_dumps.py index 6e858fd3f9..0b8d239a06 100644 --- a/tests/extmod/ujson_dumps.py +++ b/tests/extmod/ujson_dumps.py @@ -9,7 +9,7 @@ print(json.dumps(None)) print(json.dumps(1)) print(json.dumps(1.2)) print(json.dumps('abc')) -print(json.dumps('\x01\x7e\x7f\x80\u1234')) +print(json.dumps('\x00\x01\x7e')) print(json.dumps([])) print(json.dumps([1])) print(json.dumps([1, 2]))