From ed1be766061ce03dea4325897571bbd7c5b52db8 Mon Sep 17 00:00:00 2001 From: Cody Henthorne Date: Mon, 19 Apr 2021 20:28:54 -0400 Subject: [PATCH] Scrub domains from debug logs. --- .../securesms/logsubmit/util/Scrubber.java | 34 +++++++++++++++++++ .../logsubmit/util/ScrubberTest.java | 20 +++++++++-- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/app/src/main/java/org/thoughtcrime/securesms/logsubmit/util/Scrubber.java b/app/src/main/java/org/thoughtcrime/securesms/logsubmit/util/Scrubber.java index f230c6f5f..8010914e4 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/logsubmit/util/Scrubber.java +++ b/app/src/main/java/org/thoughtcrime/securesms/logsubmit/util/Scrubber.java @@ -19,6 +19,10 @@ package org.thoughtcrime.securesms.logsubmit.util; import androidx.annotation.NonNull; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -62,6 +66,18 @@ public final class Scrubber { private static final Pattern UUID_PATTERN = Pattern.compile("(JOB::)?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{10})([0-9a-f]{2})", Pattern.CASE_INSENSITIVE); private static final String UUID_CENSOR = "********-****-****-****-**********"; + /** + * The domain name except for TLD will be censored. + */ + private static final Pattern DOMAIN_PATTERN = Pattern.compile("([a-z0-9]+\\.)+([a-z0-9\\-]*[a-z\\-][a-z0-9\\-]*)", Pattern.CASE_INSENSITIVE); + private static final String DOMAIN_CENSOR = "***."; + private static final Set TOP_100_TLDS = new HashSet<>(Arrays.asList("com", "net", "org", "jp", "de", "uk", "fr", "br", "it", "ru", "es", "me", "gov", "pl", "ca", "au", "cn", "co", "in", + "nl", "edu", "info", "eu", "ch", "id", "at", "kr", "cz", "mx", "be", "tv", "se", "tr", "tw", "al", "ua", "ir", "vn", + "cl", "sk", "ly", "cc", "to", "no", "fi", "us", "pt", "dk", "ar", "hu", "tk", "gr", "il", "news", "ro", "my", "biz", + "ie", "za", "nz", "sg", "ee", "th", "io", "xyz", "pe", "bg", "hk", "lt", "link", "ph", "club", "si", "site", + "mobi", "by", "cat", "wiki", "la", "ga", "xxx", "cf", "hr", "ng", "jobs", "online", "kz", "ug", "gq", "ae", "is", + "lv", "pro", "fm", "tips", "ms", "sa", "app")); + public static CharSequence scrub(@NonNull CharSequence in) { in = scrubE164(in); @@ -69,6 +85,7 @@ public final class Scrubber { in = scrubGroupsV1(in); in = scrubGroupsV2(in); in = scrubUuids(in); + in = scrubDomains(in); return in; } @@ -119,6 +136,23 @@ public final class Scrubber { }); } + private static CharSequence scrubDomains(@NonNull CharSequence in) { + return scrub(in, + DOMAIN_PATTERN, + (matcher, output) -> { + String match = matcher.group(0); + if (matcher.groupCount() == 2 && + TOP_100_TLDS.contains(matcher.group(2).toLowerCase(Locale.US)) && + !match.endsWith("whispersystems.org") && + !match.endsWith("signal.org")) { + output.append(DOMAIN_CENSOR) + .append(matcher.group(2)); + } else { + output.append(match); + } + }); + } + private static CharSequence scrub(@NonNull CharSequence in, @NonNull Pattern pattern, @NonNull ProcessMatch processMatch) { final StringBuilder output = new StringBuilder(in.length()); final Matcher matcher = pattern.matcher(in); diff --git a/app/src/test/java/org/thoughtcrime/securesms/logsubmit/util/ScrubberTest.java b/app/src/test/java/org/thoughtcrime/securesms/logsubmit/util/ScrubberTest.java index e2ec737cc..c20dd8e43 100644 --- a/app/src/test/java/org/thoughtcrime/securesms/logsubmit/util/ScrubberTest.java +++ b/app/src/test/java/org/thoughtcrime/securesms/logsubmit/util/ScrubberTest.java @@ -79,8 +79,24 @@ public final class ScrubberTest { { "JOB::a37cb654-c9e0-4c1e-93df-3d11ca3c97f4", "JOB::a37cb654-c9e0-4c1e-93df-3d11ca3c97f4" }, - { "All patterns in a row __textsecure_group__!abcdefg1234567890 +1234567890123456 abc@def.com a37cb654-c9e0-4c1e-93df-3d11ca3c97f4 with text after", - "All patterns in a row __...group...90 +*************456 a...@... ********-****-****-****-**********f4 with text after" + { "All patterns in a row __textsecure_group__!abcdefg1234567890 +1234567890123456 abc@def.com a37cb654-c9e0-4c1e-93df-3d11ca3c97f4 nl.motorsport.com with text after", + "All patterns in a row __...group...90 +*************456 a...@... ********-****-****-****-**********f4 ***.com with text after" + }, + + { "java.net.UnknownServiceException: CLEARTEXT communication to nl.motorsport.com not permitted by network security policy", + "java.net.UnknownServiceException: CLEARTEXT communication to ***.com not permitted by network security policy" + }, + + { "nl.motorsport.com:443", + "***.com:443" + }, + + { "Failed to resolve textsecure-service.whispersystems.org using . Continuing.", + "Failed to resolve textsecure-service.whispersystems.org using . Continuing." + }, + + { " Caused by: java.io.IOException: unexpected end of stream on Connection{storage.signal.org:443, proxy=DIRECT hostAddress=storage.signal.org/142.251.32.211:443 cipherSuite=TLS_AES_128_GCM_SHA256 protocol=http/1.1}", + " Caused by: java.io.IOException: unexpected end of stream on Connection{storage.signal.org:443, proxy=DIRECT hostAddress=storage.signal.org/142.251.32.211:443 cipherSuite=TLS_AES_128_GCM_SHA256 protocol=http/1.1}" } });