From a019ba968b4e8daf7f3674f63c5cc400e304c509 Mon Sep 17 00:00:00 2001 From: Chris Angelico Date: Sun, 8 Jun 2014 06:58:26 +1000 Subject: [PATCH] Add a unichar_charlen() function to calculate length-in-characters from length-in-bytes --- py/misc.h | 1 + py/unicode.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/py/misc.h b/py/misc.h index f2d375d251..325d0dd881 100644 --- a/py/misc.h +++ b/py/misc.h @@ -100,6 +100,7 @@ bool unichar_isupper(unichar c); bool unichar_islower(unichar c); unichar unichar_tolower(unichar c); unichar unichar_toupper(unichar c); +uint unichar_charlen(const char *str, uint len); #define UTF8_IS_NONASCII(ch) ((ch) & 0x80) #define UTF8_IS_CONT(ch) (((ch) & 0xC0) == 0x80) diff --git a/py/unicode.c b/py/unicode.c index f498a0096b..8421eead73 100644 --- a/py/unicode.c +++ b/py/unicode.c @@ -86,6 +86,17 @@ char *utf8_next_char(const char *s) { return (char *)s; } +uint unichar_charlen(const char *str, uint len) +{ + uint charlen = 0; + for (const char *top = str + len; str < top; ++str) { + if (!UTF8_IS_CONT(*str)) { + ++charlen; + } + } + return charlen; +} + // Be aware: These unichar_is* functions are actually ASCII-only! bool unichar_isspace(unichar c) { return c < 128 && (attr[c] & FL_SPACE) != 0;