Make update_city_point use incremental update #814 (#951)

Replacing update on the whole table with an update only on changed rows.

The goal is to update more quickly by just updating the changing content.
The update now focus on osm_id of changed rows, it use index. Add a where clause tags != update_tags(tags, geometry) to ensure only update when changed.

It requires one more trigger and a table to store changed osm_id.

The UPDATE is keep in a function to be reusable for initial setup and trigger update.

It is a based on the already merged https://github.com/openmaptiles/openmaptiles/pull/944

It is a separated PR as less obvious than previous. It replaces the reset of the `rank` field to NULL by missing value resulting of `LEFT JOIN`. It avoid triggering a new update on the table by reset the value then re-seting it to initial or new value.

It addresses #814.

Thanks @frodrigo
pull/952/head^2
Frédéric Rodrigo 2020-08-30 14:29:50 +02:00 zatwierdzone przez GitHub
rodzic bb2a4328f3
commit 82616eaac0
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
1 zmienionych plików z 42 dodań i 18 usunięć

Wyświetl plik

@ -1,24 +1,27 @@
DROP TRIGGER IF EXISTS trigger_flag ON osm_city_point;
DROP TRIGGER IF EXISTS trigger_store ON osm_city_point;
DROP TRIGGER IF EXISTS trigger_refresh ON place_city.updates;
CREATE EXTENSION IF NOT EXISTS unaccent;
CREATE OR REPLACE FUNCTION update_osm_city_point() RETURNS void AS
CREATE SCHEMA IF NOT EXISTS place_city;
CREATE TABLE IF NOT EXISTS place_city.osm_ids
(
osm_id bigint
);
CREATE OR REPLACE FUNCTION update_osm_city_point(full_update boolean) RETURNS void AS
$$
BEGIN
-- Clear OSM key:rank ( https://github.com/openmaptiles/openmaptiles/issues/108 )
-- etldoc: osm_city_point -> osm_city_point
UPDATE osm_city_point AS osm SET "rank" = NULL WHERE "rank" IS NOT NULL;
-- etldoc: ne_10m_populated_places -> osm_city_point
-- etldoc: osm_city_point -> osm_city_point
WITH important_city_point AS (
SELECT osm.geometry, osm.osm_id, osm.name, osm.name_en, ne.scalerank, ne.labelrank
FROM ne_10m_populated_places AS ne,
osm_city_point AS osm
WHERE (
SELECT osm.osm_id, ne.scalerank
FROM osm_city_point AS osm
-- Clear OSM key:rank ( https://github.com/openmaptiles/openmaptiles/issues/108 )
LEFT JOIN ne_10m_populated_places AS ne ON
(
(osm.tags ? 'wikidata' AND osm.tags->'wikidata' = ne.wikidataid) OR
lower(osm.name) IN (lower(ne.name), lower(ne.namealt), lower(ne.meganame), lower(ne.gn_ascii), lower(ne.nameascii)) OR
lower(osm.name_en) IN (lower(ne.name), lower(ne.namealt), lower(ne.meganame), lower(ne.gn_ascii), lower(ne.nameascii)) OR
@ -32,22 +35,35 @@ BEGIN
-- are in the scalerank 5 bucket
SET "rank" = CASE WHEN scalerank <= 5 THEN scalerank + 1 ELSE scalerank END
FROM important_city_point AS ne
WHERE osm.osm_id = ne.osm_id;
WHERE (full_update OR osm.osm_id IN (SELECT osm_id FROM place_city.osm_ids))
AND rank IS DISTINCT FROM CASE WHEN scalerank <= 5 THEN scalerank + 1 ELSE scalerank END
AND osm.osm_id = ne.osm_id;
UPDATE osm_city_point
SET tags = update_tags(tags, geometry)
WHERE COALESCE(tags->'name:latin', tags->'name:nonlatin', tags->'name_int') IS NULL;
WHERE (full_update OR osm_id IN (SELECT osm_id FROM place_city.osm_ids))
AND COALESCE(tags->'name:latin', tags->'name:nonlatin', tags->'name_int') IS NULL
AND tags != update_tags(tags, geometry);
END;
$$ LANGUAGE plpgsql;
$$ LANGUAGE SQL;
SELECT update_osm_city_point();
SELECT update_osm_city_point(true);
CREATE INDEX IF NOT EXISTS osm_city_point_rank_idx ON osm_city_point ("rank");
-- Handle updates
CREATE SCHEMA IF NOT EXISTS place_city;
CREATE OR REPLACE FUNCTION place_city.store() RETURNS trigger AS
$$
BEGIN
IF (tg_op = 'DELETE') THEN
INSERT INTO place_city.osm_ids VALUES (OLD.osm_id);
ELSE
INSERT INTO place_city.osm_ids VALUES (NEW.osm_id);
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
CREATE TABLE IF NOT EXISTS place_city.updates
(
@ -67,13 +83,21 @@ CREATE OR REPLACE FUNCTION place_city.refresh() RETURNS trigger AS
$$
BEGIN
RAISE LOG 'Refresh place_city rank';
PERFORM update_osm_city_point();
PERFORM update_osm_city_point(false);
-- noinspection SqlWithoutWhere
DELETE FROM place_city.osm_ids;
-- noinspection SqlWithoutWhere
DELETE FROM place_city.updates;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER trigger_store
AFTER INSERT OR UPDATE OR DELETE
ON osm_city_point
FOR EACH ROW
EXECUTE PROCEDURE place_city.store();
CREATE TRIGGER trigger_flag
AFTER INSERT OR UPDATE OR DELETE
ON osm_city_point