2020-05-07 13:39:40 +00:00
|
|
|
package org.thoughtcrime.securesms.util;
|
|
|
|
|
2020-10-25 22:13:29 +00:00
|
|
|
import android.text.TextUtils;
|
|
|
|
|
2020-05-07 13:39:40 +00:00
|
|
|
import androidx.annotation.NonNull;
|
|
|
|
import androidx.annotation.Nullable;
|
2020-07-29 04:55:20 +00:00
|
|
|
import androidx.core.text.BidiFormatter;
|
2020-05-07 13:39:40 +00:00
|
|
|
|
2021-04-09 20:44:47 +00:00
|
|
|
import org.signal.core.util.BreakIteratorCompat;
|
|
|
|
|
2020-10-25 22:13:29 +00:00
|
|
|
import java.io.ByteArrayOutputStream;
|
|
|
|
import java.io.IOException;
|
2020-05-07 13:39:40 +00:00
|
|
|
import java.nio.charset.StandardCharsets;
|
2020-07-01 19:30:39 +00:00
|
|
|
import java.util.Set;
|
2020-05-07 13:39:40 +00:00
|
|
|
|
|
|
|
public final class StringUtil {
|
|
|
|
|
2020-11-17 13:58:14 +00:00
|
|
|
private static final Set<Character> WHITESPACE = SetUtil.newHashSet('\u200E', // left-to-right mark
|
|
|
|
'\u200F', // right-to-left mark
|
|
|
|
'\u2007'); // figure space
|
2020-07-01 19:30:39 +00:00
|
|
|
|
2020-07-29 04:55:20 +00:00
|
|
|
private static final class Bidi {
|
|
|
|
/** Override text direction */
|
2020-11-17 13:58:14 +00:00
|
|
|
private static final Set<Integer> OVERRIDES = SetUtil.newHashSet("\u202a".codePointAt(0), /* LRE */
|
|
|
|
"\u202b".codePointAt(0), /* RLE */
|
|
|
|
"\u202d".codePointAt(0), /* LRO */
|
|
|
|
"\u202e".codePointAt(0) /* RLO */);
|
2020-07-29 04:55:20 +00:00
|
|
|
|
|
|
|
/** Set direction and isolate surrounding text */
|
2020-11-17 13:58:14 +00:00
|
|
|
private static final Set<Integer> ISOLATES = SetUtil.newHashSet("\u2066".codePointAt(0), /* LRI */
|
|
|
|
"\u2067".codePointAt(0), /* RLI */
|
|
|
|
"\u2068".codePointAt(0) /* FSI */);
|
2020-07-29 04:55:20 +00:00
|
|
|
/** Closes things in {@link #OVERRIDES} */
|
|
|
|
private static final int PDF = "\u202c".codePointAt(0);
|
|
|
|
|
|
|
|
/** Closes things in {@link #ISOLATES} */
|
|
|
|
private static final int PDI = "\u2069".codePointAt(0);
|
|
|
|
|
|
|
|
/** Auto-detecting isolate */
|
|
|
|
private static final int FSI = "\u2068".codePointAt(0);
|
|
|
|
}
|
|
|
|
|
2020-05-07 13:39:40 +00:00
|
|
|
private StringUtil() {
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Trims a name string to fit into the byte length requirement.
|
2020-10-25 22:13:29 +00:00
|
|
|
* <p>
|
|
|
|
* This method treats a surrogate pair and a grapheme cluster a single character
|
|
|
|
* See examples in tests defined in StringUtilText_trimToFit.
|
2020-05-07 13:39:40 +00:00
|
|
|
*/
|
2020-10-25 22:13:29 +00:00
|
|
|
public static @NonNull String trimToFit(@Nullable String name, int maxByteLength) {
|
|
|
|
if (TextUtils.isEmpty(name)) {
|
|
|
|
return "";
|
2020-05-07 13:39:40 +00:00
|
|
|
}
|
|
|
|
|
2020-10-25 22:13:29 +00:00
|
|
|
if (name.getBytes(StandardCharsets.UTF_8).length <= maxByteLength) {
|
|
|
|
return name;
|
2020-05-07 13:39:40 +00:00
|
|
|
}
|
|
|
|
|
2020-10-25 22:13:29 +00:00
|
|
|
try (ByteArrayOutputStream stream = new ByteArrayOutputStream()) {
|
|
|
|
for (String graphemeCharacter : new CharacterIterable(name)) {
|
|
|
|
byte[] bytes = graphemeCharacter.getBytes(StandardCharsets.UTF_8);
|
|
|
|
|
|
|
|
if (stream.size() + bytes.length <= maxByteLength) {
|
|
|
|
stream.write(bytes);
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return stream.toString();
|
|
|
|
} catch (IOException e) {
|
|
|
|
throw new AssertionError(e);
|
|
|
|
}
|
2020-05-07 13:39:40 +00:00
|
|
|
}
|
2020-07-01 19:30:39 +00:00
|
|
|
|
2020-11-12 17:18:20 +00:00
|
|
|
/**
|
|
|
|
* @return A charsequence with no leading or trailing whitespace. Only creates a new charsequence
|
|
|
|
* if it has to.
|
|
|
|
*/
|
|
|
|
public static @NonNull CharSequence trim(@NonNull CharSequence charSequence) {
|
|
|
|
if (charSequence.length() == 0) {
|
|
|
|
return charSequence;
|
|
|
|
}
|
|
|
|
|
|
|
|
int start = 0;
|
|
|
|
int end = charSequence.length() - 1;
|
|
|
|
|
|
|
|
while (start < charSequence.length() && Character.isWhitespace(charSequence.charAt(start))) {
|
|
|
|
start++;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (end >= 0 && end > start && Character.isWhitespace(charSequence.charAt(end))) {
|
|
|
|
end--;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (start > 0 || end < charSequence.length() - 1) {
|
|
|
|
return charSequence.subSequence(start, end + 1);
|
|
|
|
} else {
|
|
|
|
return charSequence;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-01 19:30:39 +00:00
|
|
|
/**
|
|
|
|
* @return True if the string is empty, or if it contains nothing but whitespace characters.
|
|
|
|
* Accounts for various unicode whitespace characters.
|
|
|
|
*/
|
|
|
|
public static boolean isVisuallyEmpty(@Nullable String value) {
|
|
|
|
if (value == null || value.length() == 0) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-07-23 15:25:37 +00:00
|
|
|
return indexOfFirstNonEmptyChar(value) == -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @return String without any leading or trailing whitespace.
|
|
|
|
* Accounts for various unicode whitespace characters.
|
|
|
|
*/
|
|
|
|
public static String trimToVisualBounds(@NonNull String value) {
|
|
|
|
int start = indexOfFirstNonEmptyChar(value);
|
|
|
|
|
|
|
|
if (start == -1) {
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
|
|
|
|
int end = indexOfLastNonEmptyChar(value);
|
|
|
|
|
|
|
|
return value.substring(start, end + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
private static int indexOfFirstNonEmptyChar(@NonNull String value) {
|
|
|
|
int length = value.length();
|
|
|
|
|
|
|
|
for (int i = 0; i < length; i++) {
|
2020-07-01 19:30:39 +00:00
|
|
|
if (!isVisuallyEmpty(value.charAt(i))) {
|
2020-07-23 15:25:37 +00:00
|
|
|
return i;
|
2020-07-01 19:30:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-23 15:25:37 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static int indexOfLastNonEmptyChar(@NonNull String value) {
|
|
|
|
for (int i = value.length() - 1; i >= 0; i--) {
|
|
|
|
if (!isVisuallyEmpty(value.charAt(i))) {
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return -1;
|
2020-07-01 19:30:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @return True if the character is invisible or whitespace. Accounts for various unicode
|
|
|
|
* whitespace characters.
|
|
|
|
*/
|
|
|
|
public static boolean isVisuallyEmpty(char c) {
|
|
|
|
return Character.isWhitespace(c) || WHITESPACE.contains(c);
|
|
|
|
}
|
2020-07-23 02:36:10 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @return A string representation of the provided unicode code point.
|
|
|
|
*/
|
|
|
|
public static @NonNull String codePointToString(int codePoint) {
|
|
|
|
return new String(Character.toChars(codePoint));
|
|
|
|
}
|
2020-07-29 04:55:20 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Isolates bi-directional text from influencing surrounding text. You should use this whenever
|
|
|
|
* you're injecting user-generated text into a larger string.
|
|
|
|
*
|
|
|
|
* You'd think we'd be able to trust {@link BidiFormatter}, but unfortunately it just misses some
|
|
|
|
* corner cases, so here we are.
|
|
|
|
*
|
|
|
|
* The general idea is just to balance out the opening and closing codepoints, and then wrap the
|
|
|
|
* whole thing in FSI/PDI to isolate it.
|
|
|
|
*
|
|
|
|
* For more details, see:
|
|
|
|
* https://www.w3.org/International/questions/qa-bidi-unicode-controls
|
|
|
|
*/
|
2020-12-17 16:53:58 +00:00
|
|
|
public static @NonNull String isolateBidi(@Nullable String text) {
|
|
|
|
if (text == null) {
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Util.isEmpty(text)) {
|
2020-07-31 03:05:09 +00:00
|
|
|
return text;
|
|
|
|
}
|
|
|
|
|
2020-07-29 04:55:20 +00:00
|
|
|
int overrideCount = 0;
|
|
|
|
int overrideCloseCount = 0;
|
|
|
|
int isolateCount = 0;
|
|
|
|
int isolateCloseCount = 0;
|
|
|
|
|
|
|
|
for (int i = 0, len = text.codePointCount(0, text.length()); i < len; i++) {
|
|
|
|
int codePoint = text.codePointAt(i);
|
|
|
|
|
|
|
|
if (Bidi.OVERRIDES.contains(codePoint)) {
|
|
|
|
overrideCount++;
|
|
|
|
} else if (codePoint == Bidi.PDF) {
|
|
|
|
overrideCloseCount++;
|
|
|
|
} else if (Bidi.ISOLATES.contains(codePoint)) {
|
|
|
|
isolateCount++;
|
|
|
|
} else if (codePoint == Bidi.PDI) {
|
|
|
|
isolateCloseCount++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
StringBuilder suffix = new StringBuilder();
|
|
|
|
|
|
|
|
while (overrideCount > overrideCloseCount) {
|
|
|
|
suffix.appendCodePoint(Bidi.PDF);
|
|
|
|
overrideCloseCount++;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (isolateCount > isolateCloseCount) {
|
|
|
|
suffix.appendCodePoint(Bidi.FSI);
|
|
|
|
isolateCloseCount++;
|
|
|
|
}
|
|
|
|
|
|
|
|
StringBuilder out = new StringBuilder();
|
|
|
|
|
|
|
|
return out.appendCodePoint(Bidi.FSI)
|
|
|
|
.append(text)
|
|
|
|
.append(suffix)
|
|
|
|
.appendCodePoint(Bidi.PDI)
|
|
|
|
.toString();
|
|
|
|
}
|
2020-08-05 20:45:52 +00:00
|
|
|
|
2020-09-25 17:46:38 +00:00
|
|
|
public static @Nullable String stripBidiProtection(@Nullable String text) {
|
|
|
|
if (text == null) return null;
|
|
|
|
|
|
|
|
return text.replaceAll("[\\u2068\\u2069\\u202c]", "");
|
|
|
|
}
|
|
|
|
|
2020-08-05 20:45:52 +00:00
|
|
|
/**
|
|
|
|
* Trims a {@link CharSequence} of starting and trailing whitespace. Behavior matches
|
|
|
|
* {@link String#trim()} to preserve expectations around results.
|
|
|
|
*/
|
|
|
|
public static CharSequence trimSequence(CharSequence text) {
|
|
|
|
int length = text.length();
|
|
|
|
int startIndex = 0;
|
|
|
|
|
|
|
|
while ((startIndex < length) && (text.charAt(startIndex) <= ' ')) {
|
|
|
|
startIndex++;
|
|
|
|
}
|
|
|
|
while ((startIndex < length) && (text.charAt(length - 1) <= ' ')) {
|
|
|
|
length--;
|
|
|
|
}
|
|
|
|
return (startIndex > 0 || length < text.length()) ? text.subSequence(startIndex, length) : text;
|
|
|
|
}
|
2021-04-06 16:03:33 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* If the {@param text} exceeds the {@param maxChars} it is trimmed in the middle so that the result is exactly {@param maxChars} long including an added
|
|
|
|
* ellipsis character.
|
|
|
|
* <p>
|
|
|
|
* Otherwise the string is returned untouched.
|
|
|
|
* <p>
|
|
|
|
* When {@param maxChars} is even, one more character is kept from the end of the string than the start.
|
|
|
|
*/
|
|
|
|
public static @Nullable CharSequence abbreviateInMiddle(@Nullable CharSequence text, int maxChars) {
|
|
|
|
if (text == null || text.length() <= maxChars) {
|
|
|
|
return text;
|
|
|
|
}
|
|
|
|
|
|
|
|
int start = (maxChars - 1) / 2;
|
|
|
|
int end = (maxChars - 1) - start;
|
|
|
|
return text.subSequence(0, start) + "…" + text.subSequence(text.length() - end, text.length());
|
|
|
|
}
|
2021-04-09 20:44:47 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @return The number of graphemes in the provided string.
|
|
|
|
*/
|
|
|
|
public static int getGraphemeCount(@NonNull CharSequence text) {
|
|
|
|
BreakIteratorCompat iterator = BreakIteratorCompat.getInstance();
|
|
|
|
iterator.setText(text);
|
|
|
|
return iterator.countBreaks();
|
|
|
|
}
|
2020-05-07 13:39:40 +00:00
|
|
|
}
|