Formatting/layout improvements - introduce macros for UTF-8 byte detection, add braces. No functional changes.

2014-06-07 15:28:35 +10:00 · 2014-06-07 15:28:35 +10:00 · 279de0c8eb
commit 279de0c8eb
--- a/py/builtin.c
+++ b/py/builtin.c
@ -360,12 +360,12 @@ STATIC mp_obj_t mp_builtin_ord(mp_obj_t o_in) {
    uint len, charlen;
    const char *str = mp_obj_str_get_data_len(o_in, &len, &charlen);
    if (charlen == 1) {
-        if (MP_OBJ_IS_STR(o_in) && (*str & 0x80)) {
+        if (MP_OBJ_IS_STR(o_in) && UTF8_IS_NONASCII(*str)) {
 	    machine_int_t ord = *str++ & 0x7F;
            for (machine_int_t mask = 0x40; ord & mask; mask >>= 1) {
 		ord &= ~mask;
 	    }
-	    while ((*str & 0xC0) == 0x80) {
+	    while (UTF8_IS_CONT(*str)) {
 		ord = (ord << 6) | (*str++ & 0x3F);
 	    }
 	    return mp_obj_new_int(ord);
--- a/py/misc.h
+++ b/py/misc.h
@ -100,6 +100,8 @@ bool unichar_isupper(unichar c);
 bool unichar_islower(unichar c);
 unichar unichar_tolower(unichar c);
 unichar unichar_toupper(unichar c);
+#define UTF8_IS_NONASCII(ch) ((ch) & 0x80)
+#define UTF8_IS_CONT(ch) (((ch) & 0xC0) == 0x80)

 /** variable string *********************************************/

--- a/py/objstr.c
+++ b/py/objstr.c
@ -109,7 +109,7 @@ void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *e
            for (machine_int_t mask = 0x40; ord & mask; mask >>= 1) {
 		ord &= ~mask;
 	    }
-	    while ((*s & 0xC0) == 0x80) {
+	    while (UTF8_IS_CONT(*s)) {
 		ord = (ord << 6) | (*s++ & 0x3F);
 	    }
 	    --s; // s will be incremented by the main loop
@ -398,12 +398,22 @@ STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
            // Assumes that the string is correctly formed - will run past the
            // end of the buffer if there aren't that many characters in it
            const char *s;
-            for (s=(const char *)self_data; index_val; ++s)
-                if ((*s & 0xC0) != 0x80) --index_val;
-            while ((*s & 0xC0) == 0x80) ++s; // Skip continuation bytes after the last lead byte
+            for (s=(const char *)self_data; index_val; ++s) {
+                if (!UTF8_IS_CONT(*s)) {
+		    --index_val;
+		}
+	    }
+	    // Skip continuation bytes after the last lead byte
+            while (UTF8_IS_CONT(*s)) {
+		++s;
+	    }
            int len = 1;
-            if (*s & 0x80)
-                for (char mask = 0x40; *s & mask; mask >>= 1) ++len; // Count the number of 1 bits (after the first)
+            if (UTF8_IS_NONASCII(*s)) {
+		// Count the number of 1 bits (after the first)
+                for (char mask = 0x40; *s & mask; mask >>= 1) {
+		    ++len;
+		}
+	    }
            return mp_obj_new_str(s, len, true); // This will create a one-character string
        }
    } else {
@ -1769,8 +1779,11 @@ mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, uin
            // Count non-continuation bytes so we know how long the string is in characters.
            const byte *endptr, *top = data + len;
            uint charlen = 0;
-            for (endptr = data; endptr < top; ++endptr)
-                if ((*endptr & 0xC0) != 0x80) ++charlen;
+            for (endptr = data; endptr < top; ++endptr) {
+                if (!UTF8_IS_CONT(*endptr)) {
+		    ++charlen;
+		}
+	    }
            o->charlen = charlen;
 	} else {
            // For byte strings, the 'character' length (really the "exposed length" or "Python length") equals the byte length.
--- a/py/qstr.c
+++ b/py/qstr.c
@ -162,8 +162,11 @@ qstr qstr_from_strn(const char *str, uint len) {
        machine_uint_t hash = qstr_compute_hash((const byte*)str, len);
        byte *q_ptr = m_new(byte, 7 + len + 1);
        uint charlen = 0;
-        for (const char *s = str; s < str + len; ++s)
-            if ((*s & 0xC0) != 0x80) ++charlen;
+        for (const char *s = str; s < str + len; ++s) {
+            if (!UTF8_IS_CONT(*s)) {
+		++charlen;
+	    }
+	}
        q_ptr[0] = hash;
        q_ptr[1] = hash >> 8;
        q_ptr[2] = len;
@ -195,8 +198,11 @@ qstr qstr_build_end(byte *q_ptr) {
        q_ptr[0] = hash;
        q_ptr[1] = hash >> 8;
        uint charlen = 0;
-        for (const byte *s = str; s < str + len; ++s)
-            if ((*s & 0xC0) != 0x80) ++charlen;
+        for (const byte *s = str; s < str + len; ++s) {
+            if (!UTF8_IS_CONT(*s)) {
+		++charlen;
+	    }
+	}
        q_ptr[4] = charlen;
        q_ptr[5] = charlen >> 8;
        q_ptr[6] = 1;