diff --git a/app/src/main/java/org/thoughtcrime/securesms/util/CharacterIterable.java b/app/src/main/java/org/thoughtcrime/securesms/util/CharacterIterable.java new file mode 100644 index 000000000..b0e8eaf72 --- /dev/null +++ b/app/src/main/java/org/thoughtcrime/securesms/util/CharacterIterable.java @@ -0,0 +1,124 @@ +package org.thoughtcrime.securesms.util; + +import android.os.Build; + +import androidx.annotation.NonNull; +import androidx.annotation.RequiresApi; + +import java.util.Iterator; + +/** + * Iterates over a string treating a surrogate pair and a grapheme cluster a single character. + */ +public final class CharacterIterable implements Iterable { + + private final String string; + + public CharacterIterable(@NonNull String string) { + this.string = string; + } + + @Override + public @NonNull Iterator iterator() { + return new CharacterIterator(); + } + + private class CharacterIterator implements Iterator { + private static final int UNINITIALIZED = -2; + + private final BreakIteratorCompat breakIterator; + + private int lastIndex = UNINITIALIZED; + + CharacterIterator() { + this.breakIterator = Build.VERSION.SDK_INT >= 24 ? new AndroidIcuBreakIterator(string) + : new FallbackBreakIterator(string); + } + + @Override + public boolean hasNext() { + if (lastIndex == UNINITIALIZED) { + lastIndex = breakIterator.first(); + } + return !breakIterator.isDone(lastIndex); + } + + @Override + public String next() { + int firstIndex = lastIndex; + lastIndex = breakIterator.next(); + return string.substring(firstIndex, lastIndex); + } + } + + private interface BreakIteratorCompat { + int first(); + + int next(); + + boolean isDone(int index); + } + + /** + * An BreakIteratorCompat implementation that delegates calls to `android.icu.text.BreakIterator`. + * This class handles grapheme clusters fine but requires Android API >= 24. + */ + @RequiresApi(24) + private static class AndroidIcuBreakIterator implements BreakIteratorCompat { + private final android.icu.text.BreakIterator breakIterator = android.icu.text.BreakIterator.getCharacterInstance(); + + public AndroidIcuBreakIterator(@NonNull String string) { + breakIterator.setText(string); + } + + @Override + public int first() { + return breakIterator.first(); + } + + @Override + public int next() { + return breakIterator.next(); + } + + @Override + public boolean isDone(int index) { + return index == android.icu.text.BreakIterator.DONE; + } + } + + /** + * An BreakIteratorCompat implementation that delegates calls to `java.text.BreakIterator`. + * This class may or may not handle grapheme clusters well depending on the underlying implementation. + * In the emulator, API 23 implements ICU version of the BreakIterator so that it handles grapheme + * clusters fine. But API 21 implements RuleBasedIterator which does not handle grapheme clusters. + *

+ * If it doesn't handle grapheme clusters correctly, in most cases the combined characters are + * broken up into pieces when the code tries to trim a string. For example, an emoji that is + * a combination of a person, gender and skin tone, trimming the character using this class may result + * in trimming the parts of the character, e.g. a dark skin frowning woman emoji may result in + * a neutral skin frowning woman emoji. + */ + private static class FallbackBreakIterator implements BreakIteratorCompat { + private final java.text.BreakIterator breakIterator = java.text.BreakIterator.getCharacterInstance(); + + public FallbackBreakIterator(@NonNull String string) { + breakIterator.setText(string); + } + + @Override + public int first() { + return breakIterator.first(); + } + + @Override + public int next() { + return breakIterator.next(); + } + + @Override + public boolean isDone(int index) { + return index == java.text.BreakIterator.DONE; + } + } +} diff --git a/app/src/main/java/org/thoughtcrime/securesms/util/StringUtil.java b/app/src/main/java/org/thoughtcrime/securesms/util/StringUtil.java index 395379da2..48369c45c 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/util/StringUtil.java +++ b/app/src/main/java/org/thoughtcrime/securesms/util/StringUtil.java @@ -1,9 +1,13 @@ package org.thoughtcrime.securesms.util; +import android.text.TextUtils; + import androidx.annotation.NonNull; import androidx.annotation.Nullable; import androidx.core.text.BidiFormatter; +import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Set; @@ -39,21 +43,33 @@ public final class StringUtil { /** * Trims a name string to fit into the byte length requirement. + *

+ * This method treats a surrogate pair and a grapheme cluster a single character + * See examples in tests defined in StringUtilText_trimToFit. */ - public static @NonNull String trimToFit(@Nullable String name, int maxLength) { - if (name == null) return ""; - - // At least one byte per char, so shorten string to reduce loop - if (name.length() > maxLength) { - name = name.substring(0, maxLength); + public static @NonNull String trimToFit(@Nullable String name, int maxByteLength) { + if (TextUtils.isEmpty(name)) { + return ""; } - // Remove one char at a time until fits in byte allowance - while (name.getBytes(StandardCharsets.UTF_8).length > maxLength) { - name = name.substring(0, name.length() - 1); + if (name.getBytes(StandardCharsets.UTF_8).length <= maxByteLength) { + return name; } - return name; + try (ByteArrayOutputStream stream = new ByteArrayOutputStream()) { + for (String graphemeCharacter : new CharacterIterable(name)) { + byte[] bytes = graphemeCharacter.getBytes(StandardCharsets.UTF_8); + + if (stream.size() + bytes.length <= maxByteLength) { + stream.write(bytes); + } else { + break; + } + } + return stream.toString(); + } catch (IOException e) { + throw new AssertionError(e); + } } /** diff --git a/app/src/test/java/org/thoughtcrime/securesms/profiles/ProfileNameTest.java b/app/src/test/java/org/thoughtcrime/securesms/profiles/ProfileNameTest.java index 30d08da6e..37d2139a4 100644 --- a/app/src/test/java/org/thoughtcrime/securesms/profiles/ProfileNameTest.java +++ b/app/src/test/java/org/thoughtcrime/securesms/profiles/ProfileNameTest.java @@ -1,6 +1,11 @@ package org.thoughtcrime.securesms.profiles; +import android.app.Application; + import org.junit.Test; +import org.junit.runner.RunWith; +import org.robolectric.RobolectricTestRunner; +import org.robolectric.annotation.Config; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -8,6 +13,8 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; +@RunWith(RobolectricTestRunner.class) +@Config(manifest = Config.NONE, application = Application.class) public final class ProfileNameTest { @Test diff --git a/app/src/test/java/org/thoughtcrime/securesms/util/StringUtilTest_trimToFit.java b/app/src/test/java/org/thoughtcrime/securesms/util/StringUtilTest_trimToFit.java new file mode 100644 index 000000000..471e3a011 --- /dev/null +++ b/app/src/test/java/org/thoughtcrime/securesms/util/StringUtilTest_trimToFit.java @@ -0,0 +1,230 @@ +package org.thoughtcrime.securesms.util; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; +import static org.junit.Assume.assumeTrue; + +import android.app.Application; + +import android.os.Build; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.robolectric.RobolectricTestRunner; +import org.robolectric.annotation.Config; + +@RunWith(RobolectricTestRunner.class) +@Config(manifest = Config.NONE, application = Application.class) +public final class StringUtilTest_trimToFit { + + @Test + public void testShortStringIsNotTrimmed() { + assertEquals("Test string", StringUtil.trimToFit("Test string", 32)); + assertEquals("", StringUtil.trimToFit("", 32)); + assertEquals("aaaBBBCCC", StringUtil.trimToFit("aaaBBBCCC", 9)); + } + + @Test + public void testNull() { + assertEquals("", StringUtil.trimToFit(null, 0)); + assertEquals("", StringUtil.trimToFit(null, 1)); + assertEquals("", StringUtil.trimToFit(null, 10)); + } + + @Test + public void testStringIsTrimmed() { + assertEquals("Test stri", StringUtil.trimToFit("Test string", 9)); + assertEquals("aaaBBBCC", StringUtil.trimToFit("aaaBBBCCC", 8)); + } + + @Test + public void testStringWithControlCharsIsTrimmed() { + assertEquals("Test string\nwrap\r\nhere", + StringUtil.trimToFit("Test string\nwrap\r\nhere\tindent\n\n", 22)); + } + + @Test + public void testAccentedCharactersAreTrimmedCorrectly() { + assertEquals("", StringUtil.trimToFit("âëȋõṷ", 1)); + assertEquals("â", StringUtil.trimToFit("âëȋõṷ", 2)); + assertEquals("â", StringUtil.trimToFit("âëȋõṷ", 3)); + assertEquals("âë", StringUtil.trimToFit("âëȋõṷ", 4)); + assertEquals("The last characters take more than a byte in utf8 â", + StringUtil.trimToFit("The last characters take more than a byte in utf8 âëȋõṷ", 53)); + assertEquals("un quinzième jour en jaune apr", StringUtil.trimToFit("un quinzième jour en jaune après son épopée de 2019", 32)); + assertEquals("una vez se organizaron detrás l", StringUtil.trimToFit("una vez se organizaron detrás la ventaja nunca pasó de los 3 minutos.", 32)); + } + + @Test + public void testCombinedAccentsAreTrimmedAsACharacter() { + final String a = "a\u0302"; + final String e = "e\u0308"; + final String i = "i\u0311"; + final String o = "o\u0303"; + final String u = "u\u032d"; + assertEquals("", StringUtil.trimToFit(a + e + i + o + u, 1)); + assertEquals("", StringUtil.trimToFit(a + e + i + o + u, 2)); + assertEquals(a, StringUtil.trimToFit(a + e + i + o + u, 3)); + assertEquals(a, StringUtil.trimToFit(a + e + i + o + u, 4)); + assertEquals(a, StringUtil.trimToFit(a + e + i + o + u, 5)); + assertEquals(a + e, StringUtil.trimToFit(a + e + i + o + u, 6)); + assertEquals("The last characters take more than a byte in utf8 " + a, + StringUtil.trimToFit("The last characters take more than a byte in utf8 " + a + e + i + o + u, 53)); + assertEquals("un quinzie\u0300me jour en jaune apr", StringUtil.trimToFit("un quinzie\u0300me jour en jaune apre\u0300s son e\u0301pope\u0301e de 2019", 32)); + assertEquals("una vez se organizaron detra\u0301s ", StringUtil.trimToFit("una vez se organizaron detra\u0301s la ventaja nunca paso\u0301 de los 3 minutos.", 32)); + } + + @Test + public void testCJKCharactersAreTrimmedCorrectly() { + final String shin = "\u4fe1"; + final String signal = shin + "\u53f7"; + final String _private = "\u79c1\u4eba"; + final String messenger = "\u4fe1\u4f7f"; + assertEquals("", StringUtil.trimToFit(signal, 1)); + assertEquals("", StringUtil.trimToFit(signal, 2)); + assertEquals(shin, StringUtil.trimToFit(signal, 3)); + assertEquals(shin, StringUtil.trimToFit(signal, 4)); + assertEquals(shin, StringUtil.trimToFit(signal, 5)); + assertEquals(signal, StringUtil.trimToFit(signal, 6)); + assertEquals(String.format("Signal %s Pr", signal), + StringUtil.trimToFit(String.format("Signal %s Private %s Messenger %s", signal, _private, messenger), + 16)); + } + + @Test + public void testSurrogatePairsAreTrimmedCorrectly() { + final String sword = "\uD841\uDF4F"; + assertEquals("", StringUtil.trimToFit(sword, 1)); + assertEquals("", StringUtil.trimToFit(sword, 2)); + assertEquals("", StringUtil.trimToFit(sword, 3)); + assertEquals(sword, StringUtil.trimToFit(sword, 4)); + + final String so = "\ud869\uddf1"; + final String go = "\ud869\ude1a"; + assertEquals("", StringUtil.trimToFit(so + go, 1)); + assertEquals("", StringUtil.trimToFit(so + go, 2)); + assertEquals("", StringUtil.trimToFit(so + go, 3)); + assertEquals(so, StringUtil.trimToFit(so + go, 4)); + assertEquals(so, StringUtil.trimToFit(so + go, 5)); + assertEquals(so, StringUtil.trimToFit(so + go, 6)); + assertEquals(so, StringUtil.trimToFit(so + go, 7)); + assertEquals(so + go, StringUtil.trimToFit(so + go, 8)); + + final String gClef = "\uD834\uDD1E"; + final String fClef = "\uD834\uDD22"; + assertEquals("", StringUtil.trimToFit(gClef + " " + fClef, 1)); + assertEquals("", StringUtil.trimToFit(gClef + " " + fClef, 2)); + assertEquals("", StringUtil.trimToFit(gClef + " " + fClef, 3)); + assertEquals(gClef, StringUtil.trimToFit(gClef + " " + fClef, 4)); + assertEquals(gClef + " ", StringUtil.trimToFit(gClef + " " + fClef, 5)); + assertEquals(gClef + " ", StringUtil.trimToFit(gClef + " " + fClef, 6)); + assertEquals(gClef + " ", StringUtil.trimToFit(gClef + " " + fClef, 7)); + assertEquals(gClef + " ", StringUtil.trimToFit(gClef + " " + fClef, 8)); + assertEquals(gClef + " " + fClef, StringUtil.trimToFit(gClef + " " + fClef, 9)); + } + + @Test + public void testSimpleEmojiTrimming() { + final String congrats = "\u3297"; + assertEquals("", StringUtil.trimToFit(congrats, 1)); + assertEquals("", StringUtil.trimToFit(congrats, 2)); + assertEquals(congrats, StringUtil.trimToFit(congrats, 3)); + + final String eject = "\u23cf"; + assertEquals("", StringUtil.trimToFit(eject, 1)); + assertEquals("", StringUtil.trimToFit(eject, 2)); + assertEquals(eject, StringUtil.trimToFit(eject, 3)); + } + + @Test + public void testEmojisSurrogatePairTrimming() { + final String grape = "🍇"; + assertEquals("", StringUtil.trimToFit(grape, 1)); + assertEquals("", StringUtil.trimToFit(grape, 2)); + assertEquals("", StringUtil.trimToFit(grape, 3)); + assertEquals(grape, StringUtil.trimToFit(grape, 4)); + + final String smile = "\uD83D\uDE42"; + assertEquals("", StringUtil.trimToFit(smile, 1)); + assertEquals("", StringUtil.trimToFit(smile, 2)); + assertEquals("", StringUtil.trimToFit(smile, 3)); + assertEquals(smile, StringUtil.trimToFit(smile, 4)); + + final String check = "\u2714"; // Simple emoji + assertEquals(check, StringUtil.trimToFit(check, 3)); + final String secret = "\u3299"; // Simple emoji + assertEquals(secret, StringUtil.trimToFit(secret, 3)); + final String phoneWithArrow = "\uD83D\uDCF2"; // Surrogate Pair emoji + assertEquals(phoneWithArrow, StringUtil.trimToFit(phoneWithArrow, 4)); + + assertEquals(phoneWithArrow + ":", + StringUtil.trimToFit(phoneWithArrow + ":" + secret + ", " + check, 7)); + assertEquals(phoneWithArrow + ":" + secret, + StringUtil.trimToFit(phoneWithArrow + ":" + secret + ", " + check, 8)); + assertEquals(phoneWithArrow + ":" + secret + ",", + StringUtil.trimToFit(phoneWithArrow + ":" + secret + ", " + check, 9)); + assertEquals(phoneWithArrow + ":" + secret + ", ", + StringUtil.trimToFit(phoneWithArrow + ":" + secret + ", " + check, 10)); + assertEquals(phoneWithArrow + ":" + secret + ", ", + StringUtil.trimToFit(phoneWithArrow + ":" + secret + ", " + check, 11)); + assertEquals(phoneWithArrow + ":" + secret + ", ", + StringUtil.trimToFit(phoneWithArrow + ":" + secret + ", " + check, 12)); + } + + @Test + public void testGraphemeClusterTrimming1() { + assumeTrue(Build.VERSION.SDK_INT >= 24); + + final String alphas = "AAAAABBBBBCCCCCDDDDDEEEEE"; + final String wavingHand = "\uD83D\uDC4B"; + final String mediumDark = "\uD83C\uDFFE"; + assertEquals(alphas, StringUtil.trimToFit(alphas + wavingHand + mediumDark, 32)); + assertEquals(alphas + wavingHand + mediumDark, StringUtil.trimToFit(alphas + wavingHand + mediumDark, 33)); + + final String pads = "abcdefghijklm"; + final String frowningPerson = "\uD83D\uDE4D"; + final String female = "\u200D\u2640\uFE0F"; + assertEquals(pads + frowningPerson + female, + StringUtil.trimToFit(pads + frowningPerson + female, 26)); + assertEquals(pads + "n", + StringUtil.trimToFit(pads + "n" + frowningPerson + female, 26)); + + final String pads1 = "abcdef"; + final String mediumSkin = "\uD83C\uDFFD️"; + assertEquals(pads1 + frowningPerson + mediumSkin + female, + StringUtil.trimToFit(pads1 + frowningPerson + mediumSkin + female, 26)); + assertEquals(pads1 + "g", + StringUtil.trimToFit(pads1 + "g" + frowningPerson + mediumSkin + female, 26)); + } + + @Test + public void testGraphemeClusterTrimming2() { + assumeTrue(Build.VERSION.SDK_INT >= 24); + + final String woman = "\uD83D\uDC69"; + final String mediumDarkSkin = "\uD83C\uDFFE"; + final String joint = "\u200D"; + final String hands = "\uD83E\uDD1D"; + final String man = "\uD83D\uDC68"; + final String lightSkin = "\uD83C\uDFFB"; + + assertEquals(woman + mediumDarkSkin + joint + hands + joint + man + lightSkin, + StringUtil.trimToFit(woman + mediumDarkSkin + joint + hands + joint + man + lightSkin, 26)); + assertEquals("a", + StringUtil.trimToFit("a" + woman + mediumDarkSkin + joint + hands + joint + man + lightSkin, 26)); + + final String pads = "abcdefghijk"; + final String wheelchair = "\uD83E\uDDBC"; + assertEquals(pads + man + lightSkin + joint + wheelchair, + StringUtil.trimToFit(pads + man + lightSkin + joint + wheelchair, 26)); + assertEquals(pads + "l", + StringUtil.trimToFit(pads + "l" + man + lightSkin + joint + wheelchair, 26)); + + final String girl = "\uD83D\uDC67"; + final String boy = "\uD83D\uDC66"; + assertEquals(man + mediumDarkSkin + joint + man + joint + girl + lightSkin + joint + boy, + StringUtil.trimToFit(man + mediumDarkSkin + joint + man + joint + girl + lightSkin + joint + boy, 33)); + assertEquals("a", + StringUtil.trimToFit("a" + man + mediumDarkSkin + joint + man + joint + girl + lightSkin + joint + boy, 33)); + } +}