Fix osm enrich, add env file

pull/110/head
admire 2021-02-09 17:47:37 +02:00
rodzic 20c11e3ff5
commit 81cca54a96
4 zmienionych plików z 236 dodań i 143 usunięć

53
.env 100644
Wyświetl plik

@ -0,0 +1,53 @@
COMPOSE_PROJECT_NAME=kartozadockerosm
POSTGRES_USER=docker
POSTGRES_PASS=docker
POSTGRES_DBNAME=gis
DB_PORT=35432
PGDB_PORT=6500
POSTGRES_VERSION=13-3.1
MARTIN_PORT=3000
WATCH_MODE=true
DATABASE_URL=postgres://docker:docker@db/gis
# Uncomment to expose the postgis database on the network
ALLOW_IP_RANGE= 0.0.0.0/0
POSTGRES_PORT=5432
POSTGRES_HOST=db
# seconds between 2 executions of the script
# if 0, then no update will be done, only the first initial import from the PBF
TIME=120
# folder for settings (with *.json and *.sql)
SETTINGS=settings
# folder for caching
CACHE=cache
# folder for diff which has been imported
IMPORT_DONE=import_done
# folder for diff which hasn't been imported yet
IMPORT_QUEUE=import_queue
# it can be 3857
SRID=4326
# see http://imposm.org/docs/imposm3/latest/tutorial.html#optimize
OPTIMIZE=false
# see http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables
DBSCHEMA_PRODUCTION=public
# http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables
DBSCHEMA_IMPORT=import
# http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables
DBSCHEMA_BACKUP=backup
# Install some styles if you are using the default mapping. It can be 'yes' or 'no'
QGIS_STYLE=yes
# Use clip in the database - To use this you should have run make import_clip to add your clip to the DB
CLIP=no
# These are all currently the defaults but listed here for your
# convenience if you want to change them
# the maximum time range to assemble a cumulated changefile.
MAX_DAYS=100
# osmupdate uses a combination of minutely, hourly and daily changefiles. This value can be minute, hour, day or sporadic.
DIFF=sporadic
# argument to determine the maximum number of parallely processed changefiles.
MAX_MERGE=7
# define level for gzip compression. values between 1 (low compression but fast) and 9 (high compression but slow)
COMPRESSION_LEVEL=1
# change the URL to use a custom URL to fetch regional file updates.
BASE_URL=http://planet.openstreetmap.org/replication/
PGADMIN_DEFAULT_EMAIL=docker@gmail.com
PGADMIN_DEFAULT_PASSWORD=docker

Wyświetl plik

@ -1,42 +1,112 @@
# Usage:
version: '3.9'
# docker-compose -f docker-compose.yml -f docker-compose-web.yml
version: '2.1'
volumes:
osm-postgis-data:
import_done:
import_queue:
cache:
pgadmin_data:
services:
qgisserver:
image: camptocamp/qgis-server:3.6
hostname: dockerosm_qgisserver
container_name: dockerosm_qgisserver
db:
image: kartoza/postgis:${POSTGRES_VERSION}
hostname: db
container_name: dockerosm_db
environment:
- QGIS_PROJECT_FILE=/project/project.qgs
- GIS_SERVER_LOG_LEVEL=DEBUG
- MAX_REQUESTS_PER_PROCESS=100
- POSTGRES_USER=${POSTGRES_USER}
- POSTGRES_PASS=${POSTGRES_PASS}
- POSTGRES_DBNAME=${POSTGRES_DBNAME}
- ALLOW_IP_RANGE=${ALLOW_IP_RANGE}
volumes:
- ./logs:/var/log/apache2
- ./web:/project
- ./settings:/web/settings
depends_on:
db:
condition: service_healthy
links:
- db:db
- osm-postgis-data:/var/lib/postgresql
ports:
- 8198:80
restart: on-failure
- ${DB_PORT}:5432
healthcheck:
test: "exit 0"
imposm:
image: kartoza/docker-osm:imposm-latest
build: docker-imposm3
container_name: dockerosm_imposm
volumes:
- ./settings:/home/settings
- import_done:/home/import_done
- import_queue:/home/import_queue
- cache:/home/cache
depends_on:
- db
environment:
- POSTGRES_USER=${POSTGRES_USER}
- POSTGRES_PASS=${POSTGRES_PASS}
- POSTGRES_DBNAME=${POSTGRES_DBNAME}
- POSTGRES_PORT=${POSTGRES_PORT}
- POSTGRES_HOST=${POSTGRES_HOST}
- TIME=${TIME}
- SETTINGS=${SETTINGS}
- CACHE=${CACHE}
- IMPORT_DONE=${IMPORT_DONE}
- IMPORT_QUEUE=${IMPORT_QUEUE}
- SRID=${SRID}
- OPTIMIZE=${OPTIMIZE}
- DBSCHEMA_PRODUCTION=${DBSCHEMA_PRODUCTION}
- DBSCHEMA_IMPORT=${DBSCHEMA_IMPORT}
- DBSCHEMA_BACKUP=${DBSCHEMA_BACKUP}
- QGIS_STYLE=${QGIS_STYLE}
- CLIP=${CLIP}
command: bash -c "while [ ! -f /home/settings/country.pbf ] ; do sleep 1; done && python3 -u /home/importer.py"
osmupdate:
build: docker-osmupdate
image: kartoza/docker-osm:osmupdate-latest
container_name: dockerosm_osmupdate
volumes_from:
- imposm
depends_on:
- db
environment:
- MAX_DAYS=${MAX_DAYS}
- DIFF=${DIFF}
- MAX_MERGE=${MAX_MERGE}
- COMPRESSION_LEVEL=${COMPRESSION_LEVEL}
- BASE_URL=${BASE_URL}
- IMPORT_QUEUE=${IMPORT_QUEUE}
- IMPORT_DONE=${IMPORT_DONE}
- TIME=${TIME}
command: bash -c "while [ ! -f /home/settings/country.pbf ] ; do sleep 1; done && python3 -u /home/download.py"
pgadmin4:
image: dpage/pgadmin4:4.16
hostname: pgadmin4
volumes:
- pgadmin_data:/var/lib/pgadmin
environment:
- PGADMIN_DEFAULT_EMAIL=${PGADMIN_DEFAULT_EMAIL}
- PGADMIN_DEFAULT_PASSWORD=${PGADMIN_DEFAULT_PASSWORD}
ports:
- ${PGDB_PORT}:80
restart: on-failure
depends_on:
- db
osmenrich:
build: docker-osmenrich
volumes_from:
- imposm
depends_on:
- db
environment:
- IMPORT_QUEUE=${IMPORT_QUEUE}
- IMPORT_DONE=${IMPORT_DONE}
- TIME=${TIME}
# Server vector tiles from PostgreSQL DB
martin:
image: urbica/martin
hostname: dockerosm_martin
container_name: dockerosm_martin
restart: on-failure
ports:
- 3000:3000
- ${MARTIN_PORT}:3000
environment:
- WATCH_MODE=true
- DATABASE_URL=postgres://docker:docker@db/gis
- WATCH_MODE=${WATCH_MODE}
- DATABASE_URL=${DATABASE_URL}
depends_on:
db:
condition: service_healthy

Wyświetl plik

@ -1,4 +1,4 @@
version: '2.1'
version: '3.9'
volumes:
osm-postgis-data:
@ -9,21 +9,18 @@ volumes:
services:
db:
# About the postgresql version, it should match in the dockerfile of docker-imposm3
image: kartoza/postgis:12.0
image: kartoza/postgis:${POSTGRES_VERSION}
hostname: db
container_name: dockerosm_db
environment:
- POSTGRES_USER=docker
- POSTGRES_PASS=docker
- POSTGRES_DBNAME=gis
# Uncomment to expose the postgis database on the network
# - ALLOW_IP_RANGE= 0.0.0.0/0
- POSTGRES_USER=${POSTGRES_USER}
- POSTGRES_PASS=${POSTGRES_PASS}
- POSTGRES_DBNAME=${POSTGRES_DBNAME}
- ALLOW_IP_RANGE=${ALLOW_IP_RANGE}
volumes:
- osm-postgis-data:/var/lib/postgresql
# Uncomment to use the postgis database from outside the docker network
# ports:
# - "35432:5432"
ports:
- ${DB_PORT}:5432
healthcheck:
test: "exit 0"
@ -33,80 +30,49 @@ services:
build: docker-imposm3
container_name: dockerosm_imposm
volumes:
# These are sharable to other containers
- ./settings:/home/settings
- import_done:/home/import_done
- import_queue:/home/import_queue
- cache:/home/cache
depends_on:
db:
condition: service_healthy
- db
environment:
- POSTGRES_USER=docker
- POSTGRES_PASS=docker
- POSTGRES_DBNAME=gis
- POSTGRES_PORT=5432
- POSTGRES_HOST=db
# seconds between 2 executions of the script
# if 0, then no update will be done, only the first initial import from the PBF
- TIME=120
# folder for settings (with *.json and *.sql)
- SETTINGS=settings
# folder for caching
- CACHE=cache
# folder for diff which has been imported
- IMPORT_DONE=import_done
# folder for diff which hasn't been imported yet
- IMPORT_QUEUE=import_queue
# it can be 3857
- SRID=4326
# see http://imposm.org/docs/imposm3/latest/tutorial.html#optimize
- OPTIMIZE=false
# see http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables
- DBSCHEMA_PRODUCTION=public
# http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables
- DBSCHEMA_IMPORT=import
# http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables
- DBSCHEMA_BACKUP=backup
# Install some styles if you are using the default mapping. It can be 'yes' or 'no'
- QGIS_STYLE=yes
# Use clip in the database - To use this you should have run make import_clip to add your clip to the DB
- CLIP=no
- POSTGRES_USER=${POSTGRES_USER}
- POSTGRES_PASS=${POSTGRES_PASS}
- POSTGRES_DBNAME=${POSTGRES_DBNAME}
- POSTGRES_PORT=${POSTGRES_PORT}
- POSTGRES_HOST=${POSTGRES_HOST}
- TIME=${TIME}
- SETTINGS=${SETTINGS}
- CACHE=${CACHE}
- IMPORT_DONE=${IMPORT_DONE}
- IMPORT_QUEUE=${IMPORT_QUEUE}
- SRID=${SRID}
- OPTIMIZE=${OPTIMIZE}
- DBSCHEMA_PRODUCTION=${DBSCHEMA_PRODUCTION}
- DBSCHEMA_IMPORT=${DBSCHEMA_IMPORT}
- DBSCHEMA_BACKUP=${DBSCHEMA_BACKUP}
- QGIS_STYLE=${QGIS_STYLE}
- CLIP=${CLIP}
command: bash -c "while [ ! -f /home/settings/country.pbf ] ; do sleep 1; done && python3 -u /home/importer.py"
osmupdate:
build: docker-osmupdate
image: kartoza/docker-osm:osmupdate-latest
container_name: dockerosm_osmupdate
volumes:
# These are sharable to other containers
- ./settings:/home/settings
- import_done:/home/import_done
- import_queue:/home/import_queue
- cache:/home/cache
volumes_from:
- imposm
depends_on:
db:
condition: service_healthy
- db
environment:
# These are all currently the defaults but listed here for your
# convenience if you want to change them
# the maximum time range to assemble a cumulated changefile.
- MAX_DAYS=100
# osmupdate uses a combination of minutely, hourly and daily changefiles. This value can be minute, hour, day or sporadic.
- DIFF=sporadic
# argument to determine the maximum number of parallely processed changefiles.
- MAX_MERGE=7
# define level for gzip compression. values between 1 (low compression but fast) and 9 (high compression but slow)
- COMPRESSION_LEVEL=1
# change the URL to use a custom URL to fetch regional file updates.
- BASE_URL=http://planet.openstreetmap.org/replication/
# folder for diff which hasn't been imported yet
- IMPORT_QUEUE=import_queue
# folder for diff which has been imported
- IMPORT_DONE=import_done
# seconds between 2 executions of the script
# if 0, then no update will be done, only the first initial import from the PBF
- TIME=120
- MAX_DAYS=${MAX_DAYS}
- DIFF=${DIFF}
- MAX_MERGE=${MAX_MERGE}
- COMPRESSION_LEVEL=${COMPRESSION_LEVEL}
- BASE_URL=${BASE_URL}
- IMPORT_QUEUE=${IMPORT_QUEUE}
- IMPORT_DONE=${IMPORT_DONE}
- TIME=${TIME}
command: bash -c "while [ ! -f /home/settings/country.pbf ] ; do sleep 1; done && python3 -u /home/download.py"
pgadmin4:
@ -115,35 +81,22 @@ services:
volumes:
- pgadmin_data:/var/lib/pgadmin
environment:
- PGADMIN_DEFAULT_EMAIL=docker@gmail.com
- PGADMIN_DEFAULT_PASSWORD=docker
- PGADMIN_DEFAULT_EMAIL=${PGADMIN_DEFAULT_EMAIL}
- PGADMIN_DEFAULT_PASSWORD=${PGADMIN_DEFAULT_PASSWORD}
ports:
- 6500:80
- ${PGDB_PORT}:80
restart: on-failure
depends_on:
db:
condition: service_healthy
- db
osmenrich:
build: docker-osmenrich
container_name: dockerosm_osmenrich
volumes:
# These are sharable to other containers
- ./settings:/home/settings
- import_done:/home/import_done
- import_queue:/home/import_queue
- cache:/home/cache
volumes_from:
- imposm
depends_on:
db:
condition: service_healthy
- db
environment:
# These are all currently the defaults but listed here for your
# convenience if you want to change them
# folder for diff which hasn't been imported yet
- IMPORT_QUEUE=import_queue
# folder for diff which has been imported
- IMPORT_DONE=import_done
# seconds between 2 executions of the script
# if 0, then no update will be done, only the first initial import from the PBF
- TIME=120
command: bash -c "while [ ! -f /home/settings/importer.lock ] ; do sleep 1; done && python3 -u /home/enrich.py"
- IMPORT_QUEUE=${IMPORT_QUEUE}
- IMPORT_DONE=${IMPORT_DONE}
- TIME=${TIME}

Wyświetl plik

@ -20,16 +20,17 @@
"""
import gzip
import xmltodict
import yaml
from xmltodict import OrderedDict
from dateutil import parser
from os import environ, listdir, mkdir
from os.path import join, exists, getsize
from sys import exit, stderr
from urllib import request
from psycopg2 import connect, OperationalError, ProgrammingError
from time import sleep
from urllib import request
import xmltodict
import yaml
from dateutil import parser
from psycopg2 import connect, OperationalError, ProgrammingError
from xmltodict import OrderedDict
class Enrich(object):
@ -220,18 +221,22 @@ class Enrich(object):
for table, table_data in self.mapping_database_schema.items():
new_columns_postgis = []
for enrich_key, enrich_type in self.enriched_column.items():
try:
cursor.execute('select %s from %s' % (enrich_key, table))
except ProgrammingError as e:
connection.rollback()
check_column = ''' SELECT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name='%s' and column_name='%s'); ''' % (
table, enrich_key)
cursor.execute(check_column)
column_existence = cursor.fetchone()[0]
if column_existence != 1:
if enrich_type == 'int':
new_columns_postgis.append('ADD COLUMN %s INTEGER' % enrich_key)
new_columns_postgis.append('ADD COLUMN IF NOT EXISTS %s NUMERIC' % enrich_key)
elif enrich_type == 'string':
new_columns_postgis.append('ADD COLUMN %s VARCHAR' % enrich_key)
new_columns_postgis.append('ADD COLUMN IF NOT EXISTS %s CHARACTER VARYING (255)' % enrich_key)
elif enrich_type == 'datetime':
new_columns_postgis.append('ADD COLUMN %s timestamp' % enrich_key)
new_columns_postgis.append('ADD COLUMN IF NOT EXISTS %s TIMESTAMPTZ' % enrich_key)
if len(new_columns_postgis) > 0:
query = 'ALTER TABLE %s %s' % (table, ','.join(new_columns_postgis))
query = 'ALTER TABLE public."%s" %s;' % (table, ','.join(new_columns_postgis))
cursor.execute(query)
connection.commit()
connection.close()
@ -415,8 +420,8 @@ class Enrich(object):
row_batch = {}
osm_ids = []
try:
cursor.execute(
'select * from %s WHERE changeset_timestamp IS NULL AND osm_id IS NOT NULL ORDER BY osm_id' % table_name)
check_sql = ''' select * from "%s" WHERE "changeset_timestamp" IS NULL AND "osm_id" IS NOT NULL ORDER BY "osm_id" ''' % table_name
cursor.execute(check_sql)
row = True
while row:
# do something with row
@ -479,8 +484,9 @@ class Enrich(object):
connection = self.create_connection()
cursor = connection.cursor()
try:
cursor.execute('select * from %s WHERE %s=%s' % (
table, table_data['osm_id_columnn'], osm_id))
validate_sql = ''' select * from "%s" WHERE "%s"=%s ''' % (
table, table_data['osm_id_columnn'], osm_id)
cursor.execute(validate_sql)
row = cursor.fetchone()
if row:
row = dict(zip(table_data['columns'], row))
@ -550,15 +556,26 @@ class Enrich(object):
except IOError:
self.info('cache file can\'t be created')
def locate_table(self, name):
"""Check for tables in the DB table exists in the DB"""
connection = self.create_connection()
cursor = connection.cursor()
sql = """ SELECT EXISTS (SELECT 1 AS result from information_schema.tables where table_name like 'TEMP_TABLE'); """
cursor.execute(sql.replace('TEMP_TABLE', '%s' % name))
# noinspection PyUnboundLocalVariable
return cursor.fetchone()[0]
def run(self):
"""First checker."""
while True:
self.info('Run enrich process')
if self.check_database():
self.enrich_empty_changeset()
self.enrich_database_from_diff_file()
osm_tables = self.locate_table('osm_%')
if osm_tables != 1:
self.info('Imposm is still running, wait a while and try again')
else:
self.info('Database is not ready')
if self.check_database():
self.enrich_empty_changeset()
self.enrich_database_from_diff_file()
# sleep looping
self.info('sleeping for %s' % self.default['TIME'])