From 1abb9b9b690870c5f72d5c323f0213573b8edca4 Mon Sep 17 00:00:00 2001 From: Andrej Lajovic Date: Thu, 4 Oct 2012 12:55:12 -0500 Subject: [PATCH] UTF-8 wide characters * Fix put_echo_char() to handle UTF-8 characters wider than two bytes * Reimplement Fl_Text_Buffer_mod::get_char_at() to handle UTF-8 chars wider than two bytes --- src/dialogs/fl_digi.cxx | 11 ++++++-- src/widgets/Fl_Text_Buffer_mod_1_3.cxx | 39 +++++++++++++------------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/src/dialogs/fl_digi.cxx b/src/dialogs/fl_digi.cxx index 59850d9d..744e3907 100644 --- a/src/dialogs/fl_digi.cxx +++ b/src/dialogs/fl_digi.cxx @@ -6364,9 +6364,14 @@ void put_echo_char(unsigned int data, int style) if (asc != NULL) { // MAIL / ARQ / RTTY / CW sch.assign(asc[data & 0xFF]); } else if (data & 0x8000) { //UTF-8 extended character - sch.assign(" "); - sch[0] = (data >> 8) & 0xFF; - sch[1] = (data & 0xFF); + unsigned int shiftdata = data; + while (!(shiftdata & 0xff000000)) + shiftdata <<= 8; + unsigned char c; + while ((c = (shiftdata >> 24) & 0xff)) { + sch += c; + shiftdata <<= 8; + } } else { // keyboard character including MSB set chars sch.assign(" "); sch[0] = data; diff --git a/src/widgets/Fl_Text_Buffer_mod_1_3.cxx b/src/widgets/Fl_Text_Buffer_mod_1_3.cxx index a346d2c9..20c36129 100644 --- a/src/widgets/Fl_Text_Buffer_mod_1_3.cxx +++ b/src/widgets/Fl_Text_Buffer_mod_1_3.cxx @@ -258,29 +258,30 @@ unsigned int Fl_Text_Buffer_mod::char_at(int pos) const { Return a UTF-8 character at the given index. Pos must be at a character boundary. */ + unsigned int Fl_Text_Buffer_mod::get_char_at(int pos, int &len) const { - if (pos < 0 || pos >= mLength) { - len = 1; - return 0; - } + if (pos < 0 || pos >= mLength) + return '\0'; - IS_UTF8_ALIGNED2(this, (pos)) + IS_UTF8_ALIGNED2(this, (pos)) - const char *src = address(pos); - unsigned int code; - int codelen; - if (*src & 0x80) { // what should be a multibyte encoding - fl_utf8decode(src, src+2, &codelen); - if (codelen == 2) - code = (*src << 8) | (*(src+1) & 0xFF); - else - code = *src & 0xFF; - } else { // handle the 1-byte utf8 encoding: - code = (*src & 0xFF); - codelen = 1; + const char *src = address(pos); + unsigned int code = 0; + + if (*src & 0x80) { + fl_utf8decode(src, 0, &len); + + for (int i = 0; i < len && i < 4; i++) { + code <<= 8; + code |= (unsigned char)*(src + i) & 0xFF; } - len = codelen; - return code; + } + else { + code = *src; + len = 1; + } + + return code; }