From c2c8cbe6ef6d86ad39fbee1c43c6f0b56e0e4a71 Mon Sep 17 00:00:00 2001 From: Michael Barry Date: Sat, 30 Nov 2024 11:36:31 -0500 Subject: [PATCH] Make valid language tag regex case sensitive (#1117) --- .../planetiler/util/LanguageUtils.java | 2 +- .../planetiler/util/LanguageUtilsTest.java | 60 ++++++++++++------- 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/LanguageUtils.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/LanguageUtils.java index ffcf26a7..5cd9e03d 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/LanguageUtils.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/LanguageUtils.java @@ -10,7 +10,7 @@ public class LanguageUtils { // See https://wiki.openstreetmap.org/wiki/Multilingual_names public static final Predicate VALID_NAME_TAGS = Pattern - .compile("^name:[a-z]{2,3}(-[a-z]{4})?([-_](x-)?[a-z]{2,})?(-([a-z]{2}|\\d{3}))?$", Pattern.CASE_INSENSITIVE) + .compile("^name:[a-z]{2,3}(-[A-Z][a-z]{3})?([-_](x-)?[a-z]{2,})?(-([A-Z]{2}|\\d{3}))?$") .asMatchPredicate(); // See https://github.com/onthegomap/planetiler/issues/86 // Match strings that only contain latin characters. diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/LanguageUtilsTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/LanguageUtilsTest.java index 049dc3f7..12bf8ac7 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/LanguageUtilsTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/LanguageUtilsTest.java @@ -1,9 +1,12 @@ package com.onthegomap.planetiler.util; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.ValueSource; class LanguageUtilsTest { @@ -26,28 +29,41 @@ class LanguageUtilsTest { } @ParameterizedTest - @CsvSource(value = { - "name:es, true", - "name:en-US, true", - "name:fr-x-gallo, true", - "name:ko-Latn, true", - "name:be-tarask, true", - "name:ja_rm, true", - "name:ja_kana, true", - "name:vls, true", - "name:zh-hant-CN, true", - "name:zh_pinyin, true", - "name:zh_zhuyin, true", - "name:zh-Latn-tongyong, true", - "name:zh-Latn-pinyin, true", - "name:zh-Latn-wadegiles, true", - "name:yue-Latn-jyutping, true", - "nombre, false", - "name:, false", - "name:xxxxx, false", - }, nullValues = "null") - void testIsValidOsmNameTag(String in, boolean out) { - assertEquals(out, LanguageUtils.isValidOsmNameTag(in)); + @ValueSource(strings = { + "name:es", + "name:en-US", + "name:en-001", + "name:fr-x-gallo", + "name:ko-Latn", + "name:be-tarask", + "name:ja_rm", + "name:ja_kana", + "name:vls", + "name:zh-hant-CN", + "name:zh_pinyin", + "name:zh_zhuyin", + "name:zh-Latn-tongyong", + "name:zh-Latn-pinyin", + "name:zh-Latn-wadegiles", + "name:yue-Latn-jyutping", + "name:tec", + "name:be-tarask", + "name:nan-Latn-pehoeji", + "name:zh-Latn-pinyin", + }) + void testIsValidOsmNameTag(String in) { + assertTrue(LanguageUtils.isValidOsmNameTag(in)); + } + + @ParameterizedTest + @ValueSource(strings = { + "nombre", + "name:", + "name:xxxxx", + "name:TEC", + }) + void testIsNotValidOsmNameTag(String in) { + assertFalse(LanguageUtils.isValidOsmNameTag(in)); } }