py: Be more precise about unicode type and disabled unicode behaviour.

2015-01-28 14:07:11 +00:00 · 2015-01-28 14:07:11 +00:00 · 16677ce311
commit 16677ce311
--- a/py/lexer.c
+++ b/py/lexer.c
@ -492,11 +492,19 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
                        }
                    }
                    if (c != MP_LEXER_EOF) {
+                        #if MICROPY_PY_BUILTINS_STR_UNICODE
                        if (c < 0x110000 && !is_bytes) {
                            vstr_add_char(&lex->vstr, c);
                        } else if (c < 0x100 && is_bytes) {
                            vstr_add_byte(&lex->vstr, c);
-                        } else {
+                        }
+                        #else
+                        // without unicode everything is just added as an 8-bit byte
+                        if (c < 0x100) {
+                            vstr_add_byte(&lex->vstr, c);
+                        }
+                        #endif
+                        else {
                            assert(!"TODO: Throw an error, invalid escape code probably");
                        }
                    }
--- a/py/misc.h
+++ b/py/misc.h
@ -92,7 +92,15 @@ size_t m_get_peak_bytes_allocated(void);

 /** unichar / UTF-8 *********************************************/

-typedef int unichar; // TODO
+#if MICROPY_PY_BUILTINS_STR_UNICODE
+#include <stdint.h> // only include if we need it
+// with unicode enabled we need a type which can fit chars up to 0x10ffff
+typedef uint32_t unichar;
+#else
+// without unicode enabled we can only need to fit chars up to 0xff
+// (on 16-bit archs uint is 16-bits and more efficient than uint32_t)
+typedef uint unichar;
+#endif

 unichar utf8_get_char(const byte *s);
 const byte *utf8_next_char(const byte *s);
--- a/py/modbuiltins.c
+++ b/py/modbuiltins.c
@ -182,11 +182,11 @@ STATIC mp_obj_t mp_builtin_chr(mp_obj_t o_in) {
    return mp_obj_new_str(str, len, true);
    #else
    mp_int_t ord = mp_obj_get_int(o_in);
-    if (0 <= ord && ord <= 0x10ffff) {
+    if (0 <= ord && ord <= 0xff) {
        char str[1] = {ord};
        return mp_obj_new_str(str, 1, true);
    } else {
-        nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "chr() arg not in range(0x110000)"));
+        nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "chr() arg not in range(256)"));
    }
    #endif
 }