From a019ba968b4e8daf7f3674f63c5cc400e304c509 Mon Sep 17 00:00:00 2001
From: Chris Angelico <rosuav@gmail.com>
Date: Sun, 8 Jun 2014 06:58:26 +1000
Subject: [PATCH] Add a unichar_charlen() function to calculate
 length-in-characters from length-in-bytes

---
 py/misc.h    |  1 +
 py/unicode.c | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/py/misc.h b/py/misc.h
index f2d375d251..325d0dd881 100644
--- a/py/misc.h
+++ b/py/misc.h
@@ -100,6 +100,7 @@ bool unichar_isupper(unichar c);
 bool unichar_islower(unichar c);
 unichar unichar_tolower(unichar c);
 unichar unichar_toupper(unichar c);
+uint unichar_charlen(const char *str, uint len);
 #define UTF8_IS_NONASCII(ch) ((ch) & 0x80)
 #define UTF8_IS_CONT(ch) (((ch) & 0xC0) == 0x80)
 
diff --git a/py/unicode.c b/py/unicode.c
index f498a0096b..8421eead73 100644
--- a/py/unicode.c
+++ b/py/unicode.c
@@ -86,6 +86,17 @@ char *utf8_next_char(const char *s) {
     return (char *)s;
 }
 
+uint unichar_charlen(const char *str, uint len)
+{
+    uint charlen = 0;
+    for (const char *top = str + len; str < top; ++str) {
+        if (!UTF8_IS_CONT(*str)) {
+            ++charlen;
+        }
+    }
+    return charlen;
+}
+
 // Be aware: These unichar_is* functions are actually ASCII-only!
 bool unichar_isspace(unichar c) {
     return c < 128 && (attr[c] & FL_SPACE) != 0;