translation cleanup

pull/1/head
Mike Barry 2021-06-18 08:31:08 -04:00
rodzic 32d109f52a
commit cd9122c5ad
8 zmienionych plików z 105 dodań i 29 usunięć

Wyświetl plik

@ -1,5 +1,6 @@
package com.onthegomap.flatmap;
import com.graphhopper.reader.ReaderElement;
import com.graphhopper.reader.ReaderRelation;
import com.onthegomap.flatmap.geo.GeometryException;
import com.onthegomap.flatmap.read.OpenStreetMapReader;
@ -39,6 +40,10 @@ public interface Profile {
return false;
}
default boolean caresAboutWikidataTranslation(ReaderElement elem) {
return true;
}
class NullProfile implements Profile {
@Override

Wyświetl plik

@ -11,7 +11,6 @@ import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.graphhopper.coll.GHLongObjectHashMap;
import com.graphhopper.reader.ReaderElement;
import com.graphhopper.reader.ReaderElementUtils;
import com.graphhopper.util.StopWatch;
import com.onthegomap.flatmap.monitoring.ProgressLoggers;
import com.onthegomap.flatmap.monitoring.Stats;
@ -63,11 +62,13 @@ public class Wikidata {
private final Writer writer;
private final Client client;
private final int batchSize;
private final Profile profile;
public Wikidata(Writer writer, Client client, int batchSize) {
public Wikidata(Writer writer, Client client, int batchSize, Profile profile) {
this.writer = writer;
this.client = client;
this.batchSize = batchSize;
this.profile = profile;
qidsToFetch = new ArrayList<>(batchSize);
}
@ -129,7 +130,7 @@ public class Wikidata {
WikidataTranslations oldMappings = load(outfile);
try (Writer writer = Files.newBufferedWriter(outfile)) {
HttpClient client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(30)).build();
Wikidata fetcher = new Wikidata(writer, Client.wrap(client), 5_000);
Wikidata fetcher = new Wikidata(writer, Client.wrap(client), 5_000, profile);
fetcher.loadExisting(oldMappings);
var topology = Topology.start("wikidata", stats)
@ -213,7 +214,6 @@ public class Wikidata {
}
private void filter(Supplier<ReaderElement> prev, Consumer<Long> next) {
TrackUsageMapping qidTracker = new TrackUsageMapping();
ReaderElement elem;
while ((elem = prev.get()) != null) {
switch (elem.getType()) {
@ -221,12 +221,13 @@ public class Wikidata {
case ReaderElement.WAY -> ways.incrementAndGet();
case ReaderElement.RELATION -> rels.incrementAndGet();
}
if (elem.hasTag("wikidata")) {
qidTracker.qid = 0;
// TODO send reader element through profile
qidTracker.getNameTranslations(ReaderElementUtils.getProperties(elem));
if (qidTracker.qid > 0) {
next.accept(qidTracker.qid);
Object wikidata = elem.getTag("wikidata");
if (wikidata instanceof String wikidataString) {
if (profile.caresAboutWikidataTranslation(elem)) {
long qid = parseQid(wikidataString);
if (qid > 0) {
next.accept(qid);
}
}
}
}
@ -335,15 +336,4 @@ public class Wikidata {
return null;
}
}
private static class TrackUsageMapping extends WikidataTranslations {
public long qid = 0;
@Override
public Map<String, String> get(long qid) {
this.qid = qid;
return null;
}
}
}

Wyświetl plik

@ -0,0 +1,30 @@
package com.onthegomap.flatmap;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.util.List;
import java.util.Map;
import org.junit.jupiter.api.Test;
public class TranslationsTest {
@Test
public void testNull() {
var translations = Translations.nullProvider(List.of("en"));
assertEquals(Map.of(), translations.getTranslations(Map.of("name:en", "name")));
}
@Test
public void testDefaultProvider() {
var translations = Translations.defaultProvider(List.of("en"));
assertEquals(Map.of("name:en", "name"), translations.getTranslations(Map.of("name:en", "name", "name:de", "de")));
}
@Test
public void testTwoProviders() {
var translations = Translations.defaultProvider(List.of("en", "es", "de"))
.addTranslationProvider(elem -> Map.of("name:de", "de2", "name:en", "en2"));
assertEquals(Map.of("name:en", "en2", "name:es", "es1", "name:de", "de2"),
translations.getTranslations(Map.of("name:en", "en1", "name:es", "es1")));
}
}

Wyświetl plik

@ -28,6 +28,8 @@ import org.mockito.Mockito;
public class WikidataTest {
Profile profile = new Profile.NullProfile();
@Test
public void testWikidataTranslations() {
var expected = Map.of("en", "en value", "es", "es value");
@ -48,7 +50,7 @@ public class WikidataTest {
public List<DynamicTest> testFetchWikidata() throws IOException, InterruptedException {
StringWriter writer = new StringWriter();
Wikidata.Client client = Mockito.mock(Wikidata.Client.class, Mockito.RETURNS_SMART_NULLS);
Wikidata fixture = new Wikidata(writer, client, 2);
Wikidata fixture = new Wikidata(writer, client, 2, profile);
fixture.fetch(1L);
Mockito.verifyNoInteractions(client);
Mockito.when(client.send(Mockito.any())).thenReturn(new ByteArrayInputStream("""
@ -117,7 +119,7 @@ public class WikidataTest {
dynamicTest("do not re-request on subsequent loads", () -> {
StringWriter writer2 = new StringWriter();
Wikidata.Client client2 = Mockito.mock(Wikidata.Client.class, Mockito.RETURNS_SMART_NULLS);
Wikidata fixture2 = new Wikidata(writer2, client2, 2);
Wikidata fixture2 = new Wikidata(writer2, client2, 2, profile);
fixture2.loadExisting(Wikidata.load(new BufferedReader(new StringReader(writer.toString()))));
fixture2.fetch(1L);
fixture2.fetch(2L);

Wyświetl plik

@ -147,6 +147,10 @@ public record MultiExpression<T>(Map<T, Expression> expressions) {
}
}
public boolean matches(Map<String, Object> input) {
return !getMatchesWithTriggers(input).isEmpty();
}
public static record MatchWithTriggers<T>(T match, List<String> keys) {}
public List<MatchWithTriggers<T>> getMatchesWithTriggers(Map<String, Object> input) {

Wyświetl plik

@ -4,6 +4,8 @@ import static com.onthegomap.flatmap.openmaptiles.Expression.FALSE;
import static com.onthegomap.flatmap.openmaptiles.Expression.TRUE;
import static com.onthegomap.flatmap.openmaptiles.Expression.matchType;
import com.graphhopper.reader.ReaderElement;
import com.graphhopper.reader.ReaderElementUtils;
import com.graphhopper.reader.ReaderRelation;
import com.onthegomap.flatmap.Arguments;
import com.onthegomap.flatmap.FeatureCollector;
@ -178,6 +180,17 @@ public class OpenMapTilesProfile implements Profile {
throws GeometryException;
}
@Override
public boolean caresAboutWikidataTranslation(ReaderElement elem) {
var tags = ReaderElementUtils.getProperties(elem);
return switch (elem.getType()) {
case ReaderElement.WAY -> osmPolygonMappings.matches(tags) || osmLineMappings.matches(tags);
case ReaderElement.NODE -> osmPointMappings.matches(tags);
case ReaderElement.RELATION -> osmPolygonMappings.matches(tags);
default -> false;
};
}
@Override
public String name() {
return Layers.NAME;

Wyświetl plik

@ -1,17 +1,25 @@
package com.onthegomap.flatmap.openmaptiles;
import com.onthegomap.flatmap.Arguments;
import com.onthegomap.flatmap.CommonParams;
import com.onthegomap.flatmap.FlatMapRunner;
import com.onthegomap.flatmap.Translations;
import com.onthegomap.flatmap.Wikidata;
import com.onthegomap.flatmap.monitoring.Stats;
import com.onthegomap.flatmap.openmaptiles.generated.Layers;
import com.onthegomap.flatmap.read.OsmInputFile;
import java.nio.file.Path;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class OpenMaptilesMain {
private static final Logger LOGGER = LoggerFactory.getLogger(OpenMaptilesMain.class);
private static final String fallbackOsmFile = "north-america_us_massachusetts.pbf";
private static final Path sourcesDir = Path.of("data", "sources");
public static void main(String[] args) throws Exception {
Path sourcesDir = Path.of("data", "sources");
FlatMapRunner runner = FlatMapRunner.create();
@ -23,24 +31,35 @@ public class OpenMaptilesMain {
// sourcesDir.resolve("water-polygons-split-3857.zip"))
// .addNaturalEarthSource(OpenMapTilesProfile.NATURAL_EARTH_SOURCE,
// sourcesDir.resolve("natural_earth_vector.sqlite.zip"))
.addOsmSource(OpenMapTilesProfile.OSM_SOURCE, sourcesDir.resolve("north-america_us_massachusetts.pbf"))
.addOsmSource(OpenMapTilesProfile.OSM_SOURCE, sourcesDir.resolve(fallbackOsmFile))
.setOutput("mbtiles", Path.of("data", "massachusetts.mbtiles"))
.run();
}
private static OpenMapTilesProfile createProfileWithWikidataTranslations(FlatMapRunner runner) {
private static OpenMapTilesProfile createProfileWithWikidataTranslations(FlatMapRunner runner) throws Exception {
Arguments arguments = runner.arguments();
boolean fetchWikidata = arguments.get("fetch_wikidata", "fetch wikidata translations", false);
boolean fetchWikidata = arguments.get("fetch_wikidata", "fetch wikidata translations then continue", false);
boolean onlyFetchWikidata = arguments.get("only_fetch_wikidata", "fetch wikidata translations then quit", false);
boolean useWikidata = arguments.get("use_wikidata", "use wikidata translations", true);
boolean transliterate = arguments.get("transliterate", "attempt to transliterate latin names", true);
Path wikidataNamesFile = arguments.file("wikidata_cache", "wikidata cache file",
Path.of("data", "sources", "wikidata_names.json"));
// most common languages: "en,ru,ar,zh,ja,ko,fr,de,fi,pl,es,be,br,he"
List<String> languages = arguments.get("name_languages", "languages to use",
Layers.LANGUAGES.toArray(String[]::new));
List<String> languages = arguments
.get("name_languages", "languages to use", Layers.LANGUAGES.toArray(String[]::new));
var translations = Translations.defaultProvider(languages).setShouldTransliterate(transliterate);
var profile = new OpenMapTilesProfile(translations, arguments, runner.stats());
if (onlyFetchWikidata) {
LOGGER.info("Will fetch wikidata translations then quit...");
var osmInput = new OsmInputFile(
arguments.inputFile(OpenMapTilesProfile.OSM_SOURCE, "input file", sourcesDir.resolve(fallbackOsmFile)));
Wikidata
.fetch(osmInput, wikidataNamesFile, CommonParams.from(arguments, osmInput), profile, new Stats.InMemory());
translations.addTranslationProvider(Wikidata.load(wikidataNamesFile));
System.exit(0);
}
if (useWikidata) {
if (fetchWikidata) {
runner.addStage("wikidata", "fetch translations from wikidata query service", () -> {

Wyświetl plik

@ -6,8 +6,11 @@ import static com.onthegomap.flatmap.TestUtils.newPoint;
import static com.onthegomap.flatmap.TestUtils.newPolygon;
import static com.onthegomap.flatmap.openmaptiles.OpenMapTilesProfile.OSM_SOURCE;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.DynamicTest.dynamicTest;
import com.graphhopper.reader.ReaderNode;
import com.onthegomap.flatmap.Arguments;
import com.onthegomap.flatmap.CommonParams;
import com.onthegomap.flatmap.FeatureCollector;
@ -380,6 +383,16 @@ public class OpenMaptilesProfileTest {
))));
}
@Test
public void testCaresAboutWikidata() {
var node = new ReaderNode(1, 1, 1);
node.setTag("aeroway", "gate");
assertTrue(profile.caresAboutWikidataTranslation(node));
node.setTag("aeroway", "other");
assertFalse(profile.caresAboutWikidataTranslation(node));
}
private VectorTileEncoder.Feature pointFeature(String layer, Map<String, Object> map, int group) {
return new VectorTileEncoder.Feature(
layer,