Account for grapheme cluster when trimming to fit a specific length.

Fixes #10076
fork-5.53.8
Fumiaki Yoshimatsu 2020-10-25 18:13:29 -04:00 zatwierdzone przez Alex Hart
rodzic da4be5c1cf
commit f06817f00d
4 zmienionych plików z 387 dodań i 10 usunięć

Wyświetl plik

@ -0,0 +1,124 @@
package org.thoughtcrime.securesms.util;
import android.os.Build;
import androidx.annotation.NonNull;
import androidx.annotation.RequiresApi;
import java.util.Iterator;
/**
* Iterates over a string treating a surrogate pair and a grapheme cluster a single character.
*/
public final class CharacterIterable implements Iterable<String> {
private final String string;
public CharacterIterable(@NonNull String string) {
this.string = string;
}
@Override
public @NonNull Iterator<String> iterator() {
return new CharacterIterator();
}
private class CharacterIterator implements Iterator<String> {
private static final int UNINITIALIZED = -2;
private final BreakIteratorCompat breakIterator;
private int lastIndex = UNINITIALIZED;
CharacterIterator() {
this.breakIterator = Build.VERSION.SDK_INT >= 24 ? new AndroidIcuBreakIterator(string)
: new FallbackBreakIterator(string);
}
@Override
public boolean hasNext() {
if (lastIndex == UNINITIALIZED) {
lastIndex = breakIterator.first();
}
return !breakIterator.isDone(lastIndex);
}
@Override
public String next() {
int firstIndex = lastIndex;
lastIndex = breakIterator.next();
return string.substring(firstIndex, lastIndex);
}
}
private interface BreakIteratorCompat {
int first();
int next();
boolean isDone(int index);
}
/**
* An BreakIteratorCompat implementation that delegates calls to `android.icu.text.BreakIterator`.
* This class handles grapheme clusters fine but requires Android API >= 24.
*/
@RequiresApi(24)
private static class AndroidIcuBreakIterator implements BreakIteratorCompat {
private final android.icu.text.BreakIterator breakIterator = android.icu.text.BreakIterator.getCharacterInstance();
public AndroidIcuBreakIterator(@NonNull String string) {
breakIterator.setText(string);
}
@Override
public int first() {
return breakIterator.first();
}
@Override
public int next() {
return breakIterator.next();
}
@Override
public boolean isDone(int index) {
return index == android.icu.text.BreakIterator.DONE;
}
}
/**
* An BreakIteratorCompat implementation that delegates calls to `java.text.BreakIterator`.
* This class may or may not handle grapheme clusters well depending on the underlying implementation.
* In the emulator, API 23 implements ICU version of the BreakIterator so that it handles grapheme
* clusters fine. But API 21 implements RuleBasedIterator which does not handle grapheme clusters.
* <p>
* If it doesn't handle grapheme clusters correctly, in most cases the combined characters are
* broken up into pieces when the code tries to trim a string. For example, an emoji that is
* a combination of a person, gender and skin tone, trimming the character using this class may result
* in trimming the parts of the character, e.g. a dark skin frowning woman emoji may result in
* a neutral skin frowning woman emoji.
*/
private static class FallbackBreakIterator implements BreakIteratorCompat {
private final java.text.BreakIterator breakIterator = java.text.BreakIterator.getCharacterInstance();
public FallbackBreakIterator(@NonNull String string) {
breakIterator.setText(string);
}
@Override
public int first() {
return breakIterator.first();
}
@Override
public int next() {
return breakIterator.next();
}
@Override
public boolean isDone(int index) {
return index == java.text.BreakIterator.DONE;
}
}
}

Wyświetl plik

@ -1,9 +1,13 @@
package org.thoughtcrime.securesms.util;
import android.text.TextUtils;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import androidx.core.text.BidiFormatter;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Set;
@ -39,21 +43,33 @@ public final class StringUtil {
/**
* Trims a name string to fit into the byte length requirement.
* <p>
* This method treats a surrogate pair and a grapheme cluster a single character
* See examples in tests defined in StringUtilText_trimToFit.
*/
public static @NonNull String trimToFit(@Nullable String name, int maxLength) {
if (name == null) return "";
// At least one byte per char, so shorten string to reduce loop
if (name.length() > maxLength) {
name = name.substring(0, maxLength);
public static @NonNull String trimToFit(@Nullable String name, int maxByteLength) {
if (TextUtils.isEmpty(name)) {
return "";
}
// Remove one char at a time until fits in byte allowance
while (name.getBytes(StandardCharsets.UTF_8).length > maxLength) {
name = name.substring(0, name.length() - 1);
if (name.getBytes(StandardCharsets.UTF_8).length <= maxByteLength) {
return name;
}
return name;
try (ByteArrayOutputStream stream = new ByteArrayOutputStream()) {
for (String graphemeCharacter : new CharacterIterable(name)) {
byte[] bytes = graphemeCharacter.getBytes(StandardCharsets.UTF_8);
if (stream.size() + bytes.length <= maxByteLength) {
stream.write(bytes);
} else {
break;
}
}
return stream.toString();
} catch (IOException e) {
throw new AssertionError(e);
}
}
/**

Wyświetl plik

@ -1,6 +1,11 @@
package org.thoughtcrime.securesms.profiles;
import android.app.Application;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.robolectric.RobolectricTestRunner;
import org.robolectric.annotation.Config;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@ -8,6 +13,8 @@ import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
@RunWith(RobolectricTestRunner.class)
@Config(manifest = Config.NONE, application = Application.class)
public final class ProfileNameTest {
@Test

Wyświetl plik

@ -0,0 +1,230 @@
package org.thoughtcrime.securesms.util;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import static org.junit.Assume.assumeTrue;
import android.app.Application;
import android.os.Build;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.robolectric.RobolectricTestRunner;
import org.robolectric.annotation.Config;
@RunWith(RobolectricTestRunner.class)
@Config(manifest = Config.NONE, application = Application.class)
public final class StringUtilTest_trimToFit {
@Test
public void testShortStringIsNotTrimmed() {
assertEquals("Test string", StringUtil.trimToFit("Test string", 32));
assertEquals("", StringUtil.trimToFit("", 32));
assertEquals("aaaBBBCCC", StringUtil.trimToFit("aaaBBBCCC", 9));
}
@Test
public void testNull() {
assertEquals("", StringUtil.trimToFit(null, 0));
assertEquals("", StringUtil.trimToFit(null, 1));
assertEquals("", StringUtil.trimToFit(null, 10));
}
@Test
public void testStringIsTrimmed() {
assertEquals("Test stri", StringUtil.trimToFit("Test string", 9));
assertEquals("aaaBBBCC", StringUtil.trimToFit("aaaBBBCCC", 8));
}
@Test
public void testStringWithControlCharsIsTrimmed() {
assertEquals("Test string\nwrap\r\nhere",
StringUtil.trimToFit("Test string\nwrap\r\nhere\tindent\n\n", 22));
}
@Test
public void testAccentedCharactersAreTrimmedCorrectly() {
assertEquals("", StringUtil.trimToFit("âëȋõṷ", 1));
assertEquals("â", StringUtil.trimToFit("âëȋõṷ", 2));
assertEquals("â", StringUtil.trimToFit("âëȋõṷ", 3));
assertEquals("âë", StringUtil.trimToFit("âëȋõṷ", 4));
assertEquals("The last characters take more than a byte in utf8 â",
StringUtil.trimToFit("The last characters take more than a byte in utf8 âëȋõṷ", 53));
assertEquals("un quinzième jour en jaune apr", StringUtil.trimToFit("un quinzième jour en jaune après son épopée de 2019", 32));
assertEquals("una vez se organizaron detrás l", StringUtil.trimToFit("una vez se organizaron detrás la ventaja nunca pasó de los 3 minutos.", 32));
}
@Test
public void testCombinedAccentsAreTrimmedAsACharacter() {
final String a = "a\u0302";
final String e = "e\u0308";
final String i = "i\u0311";
final String o = "o\u0303";
final String u = "u\u032d";
assertEquals("", StringUtil.trimToFit(a + e + i + o + u, 1));
assertEquals("", StringUtil.trimToFit(a + e + i + o + u, 2));
assertEquals(a, StringUtil.trimToFit(a + e + i + o + u, 3));
assertEquals(a, StringUtil.trimToFit(a + e + i + o + u, 4));
assertEquals(a, StringUtil.trimToFit(a + e + i + o + u, 5));
assertEquals(a + e, StringUtil.trimToFit(a + e + i + o + u, 6));
assertEquals("The last characters take more than a byte in utf8 " + a,
StringUtil.trimToFit("The last characters take more than a byte in utf8 " + a + e + i + o + u, 53));
assertEquals("un quinzie\u0300me jour en jaune apr", StringUtil.trimToFit("un quinzie\u0300me jour en jaune apre\u0300s son e\u0301pope\u0301e de 2019", 32));
assertEquals("una vez se organizaron detra\u0301s ", StringUtil.trimToFit("una vez se organizaron detra\u0301s la ventaja nunca paso\u0301 de los 3 minutos.", 32));
}
@Test
public void testCJKCharactersAreTrimmedCorrectly() {
final String shin = "\u4fe1";
final String signal = shin + "\u53f7";
final String _private = "\u79c1\u4eba";
final String messenger = "\u4fe1\u4f7f";
assertEquals("", StringUtil.trimToFit(signal, 1));
assertEquals("", StringUtil.trimToFit(signal, 2));
assertEquals(shin, StringUtil.trimToFit(signal, 3));
assertEquals(shin, StringUtil.trimToFit(signal, 4));
assertEquals(shin, StringUtil.trimToFit(signal, 5));
assertEquals(signal, StringUtil.trimToFit(signal, 6));
assertEquals(String.format("Signal %s Pr", signal),
StringUtil.trimToFit(String.format("Signal %s Private %s Messenger %s", signal, _private, messenger),
16));
}
@Test
public void testSurrogatePairsAreTrimmedCorrectly() {
final String sword = "\uD841\uDF4F";
assertEquals("", StringUtil.trimToFit(sword, 1));
assertEquals("", StringUtil.trimToFit(sword, 2));
assertEquals("", StringUtil.trimToFit(sword, 3));
assertEquals(sword, StringUtil.trimToFit(sword, 4));
final String so = "\ud869\uddf1";
final String go = "\ud869\ude1a";
assertEquals("", StringUtil.trimToFit(so + go, 1));
assertEquals("", StringUtil.trimToFit(so + go, 2));
assertEquals("", StringUtil.trimToFit(so + go, 3));
assertEquals(so, StringUtil.trimToFit(so + go, 4));
assertEquals(so, StringUtil.trimToFit(so + go, 5));
assertEquals(so, StringUtil.trimToFit(so + go, 6));
assertEquals(so, StringUtil.trimToFit(so + go, 7));
assertEquals(so + go, StringUtil.trimToFit(so + go, 8));
final String gClef = "\uD834\uDD1E";
final String fClef = "\uD834\uDD22";
assertEquals("", StringUtil.trimToFit(gClef + " " + fClef, 1));
assertEquals("", StringUtil.trimToFit(gClef + " " + fClef, 2));
assertEquals("", StringUtil.trimToFit(gClef + " " + fClef, 3));
assertEquals(gClef, StringUtil.trimToFit(gClef + " " + fClef, 4));
assertEquals(gClef + " ", StringUtil.trimToFit(gClef + " " + fClef, 5));
assertEquals(gClef + " ", StringUtil.trimToFit(gClef + " " + fClef, 6));
assertEquals(gClef + " ", StringUtil.trimToFit(gClef + " " + fClef, 7));
assertEquals(gClef + " ", StringUtil.trimToFit(gClef + " " + fClef, 8));
assertEquals(gClef + " " + fClef, StringUtil.trimToFit(gClef + " " + fClef, 9));
}
@Test
public void testSimpleEmojiTrimming() {
final String congrats = "\u3297";
assertEquals("", StringUtil.trimToFit(congrats, 1));
assertEquals("", StringUtil.trimToFit(congrats, 2));
assertEquals(congrats, StringUtil.trimToFit(congrats, 3));
final String eject = "\u23cf";
assertEquals("", StringUtil.trimToFit(eject, 1));
assertEquals("", StringUtil.trimToFit(eject, 2));
assertEquals(eject, StringUtil.trimToFit(eject, 3));
}
@Test
public void testEmojisSurrogatePairTrimming() {
final String grape = "🍇";
assertEquals("", StringUtil.trimToFit(grape, 1));
assertEquals("", StringUtil.trimToFit(grape, 2));
assertEquals("", StringUtil.trimToFit(grape, 3));
assertEquals(grape, StringUtil.trimToFit(grape, 4));
final String smile = "\uD83D\uDE42";
assertEquals("", StringUtil.trimToFit(smile, 1));
assertEquals("", StringUtil.trimToFit(smile, 2));
assertEquals("", StringUtil.trimToFit(smile, 3));
assertEquals(smile, StringUtil.trimToFit(smile, 4));
final String check = "\u2714"; // Simple emoji
assertEquals(check, StringUtil.trimToFit(check, 3));
final String secret = "\u3299"; // Simple emoji
assertEquals(secret, StringUtil.trimToFit(secret, 3));
final String phoneWithArrow = "\uD83D\uDCF2"; // Surrogate Pair emoji
assertEquals(phoneWithArrow, StringUtil.trimToFit(phoneWithArrow, 4));
assertEquals(phoneWithArrow + ":",
StringUtil.trimToFit(phoneWithArrow + ":" + secret + ", " + check, 7));
assertEquals(phoneWithArrow + ":" + secret,
StringUtil.trimToFit(phoneWithArrow + ":" + secret + ", " + check, 8));
assertEquals(phoneWithArrow + ":" + secret + ",",
StringUtil.trimToFit(phoneWithArrow + ":" + secret + ", " + check, 9));
assertEquals(phoneWithArrow + ":" + secret + ", ",
StringUtil.trimToFit(phoneWithArrow + ":" + secret + ", " + check, 10));
assertEquals(phoneWithArrow + ":" + secret + ", ",
StringUtil.trimToFit(phoneWithArrow + ":" + secret + ", " + check, 11));
assertEquals(phoneWithArrow + ":" + secret + ", ",
StringUtil.trimToFit(phoneWithArrow + ":" + secret + ", " + check, 12));
}
@Test
public void testGraphemeClusterTrimming1() {
assumeTrue(Build.VERSION.SDK_INT >= 24);
final String alphas = "AAAAABBBBBCCCCCDDDDDEEEEE";
final String wavingHand = "\uD83D\uDC4B";
final String mediumDark = "\uD83C\uDFFE";
assertEquals(alphas, StringUtil.trimToFit(alphas + wavingHand + mediumDark, 32));
assertEquals(alphas + wavingHand + mediumDark, StringUtil.trimToFit(alphas + wavingHand + mediumDark, 33));
final String pads = "abcdefghijklm";
final String frowningPerson = "\uD83D\uDE4D";
final String female = "\u200D\u2640\uFE0F";
assertEquals(pads + frowningPerson + female,
StringUtil.trimToFit(pads + frowningPerson + female, 26));
assertEquals(pads + "n",
StringUtil.trimToFit(pads + "n" + frowningPerson + female, 26));
final String pads1 = "abcdef";
final String mediumSkin = "\uD83C\uDFFD";
assertEquals(pads1 + frowningPerson + mediumSkin + female,
StringUtil.trimToFit(pads1 + frowningPerson + mediumSkin + female, 26));
assertEquals(pads1 + "g",
StringUtil.trimToFit(pads1 + "g" + frowningPerson + mediumSkin + female, 26));
}
@Test
public void testGraphemeClusterTrimming2() {
assumeTrue(Build.VERSION.SDK_INT >= 24);
final String woman = "\uD83D\uDC69";
final String mediumDarkSkin = "\uD83C\uDFFE";
final String joint = "\u200D";
final String hands = "\uD83E\uDD1D";
final String man = "\uD83D\uDC68";
final String lightSkin = "\uD83C\uDFFB";
assertEquals(woman + mediumDarkSkin + joint + hands + joint + man + lightSkin,
StringUtil.trimToFit(woman + mediumDarkSkin + joint + hands + joint + man + lightSkin, 26));
assertEquals("a",
StringUtil.trimToFit("a" + woman + mediumDarkSkin + joint + hands + joint + man + lightSkin, 26));
final String pads = "abcdefghijk";
final String wheelchair = "\uD83E\uDDBC";
assertEquals(pads + man + lightSkin + joint + wheelchair,
StringUtil.trimToFit(pads + man + lightSkin + joint + wheelchair, 26));
assertEquals(pads + "l",
StringUtil.trimToFit(pads + "l" + man + lightSkin + joint + wheelchair, 26));
final String girl = "\uD83D\uDC67";
final String boy = "\uD83D\uDC66";
assertEquals(man + mediumDarkSkin + joint + man + joint + girl + lightSkin + joint + boy,
StringUtil.trimToFit(man + mediumDarkSkin + joint + man + joint + girl + lightSkin + joint + boy, 33));
assertEquals("a",
StringUtil.trimToFit("a" + man + mediumDarkSkin + joint + man + joint + girl + lightSkin + joint + boy, 33));
}
}