diff --git a/.env b/.env new file mode 100644 index 0000000..51ea0e7 --- /dev/null +++ b/.env @@ -0,0 +1,53 @@ +COMPOSE_PROJECT_NAME=kartozadockerosm +POSTGRES_USER=docker +POSTGRES_PASS=docker +POSTGRES_DBNAME=gis +DB_PORT=35432 +PGDB_PORT=6500 +POSTGRES_VERSION=13-3.1 +MARTIN_PORT=3000 +WATCH_MODE=true +DATABASE_URL=postgres://docker:docker@db/gis +# Uncomment to expose the postgis database on the network +ALLOW_IP_RANGE= 0.0.0.0/0 +POSTGRES_PORT=5432 +POSTGRES_HOST=db +# seconds between 2 executions of the script +# if 0, then no update will be done, only the first initial import from the PBF +TIME=120 +# folder for settings (with *.json and *.sql) +SETTINGS=settings +# folder for caching +CACHE=cache +# folder for diff which has been imported +IMPORT_DONE=import_done +# folder for diff which hasn't been imported yet +IMPORT_QUEUE=import_queue +# it can be 3857 +SRID=4326 +# see http://imposm.org/docs/imposm3/latest/tutorial.html#optimize +OPTIMIZE=false +# see http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables +DBSCHEMA_PRODUCTION=public +# http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables +DBSCHEMA_IMPORT=import +# http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables +DBSCHEMA_BACKUP=backup +# Install some styles if you are using the default mapping. It can be 'yes' or 'no' +QGIS_STYLE=yes +# Use clip in the database - To use this you should have run make import_clip to add your clip to the DB +CLIP=no +# These are all currently the defaults but listed here for your +# convenience if you want to change them +# the maximum time range to assemble a cumulated changefile. +MAX_DAYS=100 +# osmupdate uses a combination of minutely, hourly and daily changefiles. This value can be minute, hour, day or sporadic. +DIFF=sporadic +# argument to determine the maximum number of parallely processed changefiles. +MAX_MERGE=7 +# define level for gzip compression. values between 1 (low compression but fast) and 9 (high compression but slow) +COMPRESSION_LEVEL=1 +# change the URL to use a custom URL to fetch regional file updates. +BASE_URL=http://planet.openstreetmap.org/replication/ +PGADMIN_DEFAULT_EMAIL=docker@gmail.com +PGADMIN_DEFAULT_PASSWORD=docker diff --git a/docker-compose-web.yml b/docker-compose-web.yml index 97eaa30..aa14736 100644 --- a/docker-compose-web.yml +++ b/docker-compose-web.yml @@ -1,42 +1,112 @@ -# Usage: +version: '3.9' -# docker-compose -f docker-compose.yml -f docker-compose-web.yml - -version: '2.1' +volumes: + osm-postgis-data: + import_done: + import_queue: + cache: + pgadmin_data: services: - qgisserver: - image: camptocamp/qgis-server:3.6 - hostname: dockerosm_qgisserver - container_name: dockerosm_qgisserver + db: + image: kartoza/postgis:${POSTGRES_VERSION} + hostname: db + container_name: dockerosm_db environment: - - QGIS_PROJECT_FILE=/project/project.qgs - - GIS_SERVER_LOG_LEVEL=DEBUG - - MAX_REQUESTS_PER_PROCESS=100 + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASS=${POSTGRES_PASS} + - POSTGRES_DBNAME=${POSTGRES_DBNAME} + - ALLOW_IP_RANGE=${ALLOW_IP_RANGE} volumes: - - ./logs:/var/log/apache2 - - ./web:/project - - ./settings:/web/settings - depends_on: - db: - condition: service_healthy - links: - - db:db + - osm-postgis-data:/var/lib/postgresql ports: - - 8198:80 - restart: on-failure + - ${DB_PORT}:5432 + healthcheck: + test: "exit 0" + + imposm: + image: kartoza/docker-osm:imposm-latest + build: docker-imposm3 + container_name: dockerosm_imposm + volumes: + - ./settings:/home/settings + - import_done:/home/import_done + - import_queue:/home/import_queue + - cache:/home/cache + depends_on: + - db + environment: + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASS=${POSTGRES_PASS} + - POSTGRES_DBNAME=${POSTGRES_DBNAME} + - POSTGRES_PORT=${POSTGRES_PORT} + - POSTGRES_HOST=${POSTGRES_HOST} + - TIME=${TIME} + - SETTINGS=${SETTINGS} + - CACHE=${CACHE} + - IMPORT_DONE=${IMPORT_DONE} + - IMPORT_QUEUE=${IMPORT_QUEUE} + - SRID=${SRID} + - OPTIMIZE=${OPTIMIZE} + - DBSCHEMA_PRODUCTION=${DBSCHEMA_PRODUCTION} + - DBSCHEMA_IMPORT=${DBSCHEMA_IMPORT} + - DBSCHEMA_BACKUP=${DBSCHEMA_BACKUP} + - QGIS_STYLE=${QGIS_STYLE} + - CLIP=${CLIP} + command: bash -c "while [ ! -f /home/settings/country.pbf ] ; do sleep 1; done && python3 -u /home/importer.py" + + osmupdate: + build: docker-osmupdate + image: kartoza/docker-osm:osmupdate-latest + container_name: dockerosm_osmupdate + volumes_from: + - imposm + depends_on: + - db + environment: + - MAX_DAYS=${MAX_DAYS} + - DIFF=${DIFF} + - MAX_MERGE=${MAX_MERGE} + - COMPRESSION_LEVEL=${COMPRESSION_LEVEL} + - BASE_URL=${BASE_URL} + - IMPORT_QUEUE=${IMPORT_QUEUE} + - IMPORT_DONE=${IMPORT_DONE} + - TIME=${TIME} + command: bash -c "while [ ! -f /home/settings/country.pbf ] ; do sleep 1; done && python3 -u /home/download.py" + + pgadmin4: + image: dpage/pgadmin4:4.16 + hostname: pgadmin4 + volumes: + - pgadmin_data:/var/lib/pgadmin + environment: + - PGADMIN_DEFAULT_EMAIL=${PGADMIN_DEFAULT_EMAIL} + - PGADMIN_DEFAULT_PASSWORD=${PGADMIN_DEFAULT_PASSWORD} + ports: + - ${PGDB_PORT}:80 + restart: on-failure + depends_on: + - db + + osmenrich: + build: docker-osmenrich + volumes_from: + - imposm + depends_on: + - db + environment: + - IMPORT_QUEUE=${IMPORT_QUEUE} + - IMPORT_DONE=${IMPORT_DONE} + - TIME=${TIME} - # Server vector tiles from PostgreSQL DB martin: image: urbica/martin - hostname: dockerosm_martin - container_name: dockerosm_martin restart: on-failure ports: - - 3000:3000 + - ${MARTIN_PORT}:3000 environment: - - WATCH_MODE=true - - DATABASE_URL=postgres://docker:docker@db/gis + - WATCH_MODE=${WATCH_MODE} + - DATABASE_URL=${DATABASE_URL} depends_on: db: condition: service_healthy diff --git a/docker-compose.yml b/docker-compose.yml index 69214c2..43543f4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,4 @@ -version: '2.1' +version: '3.9' volumes: osm-postgis-data: @@ -9,21 +9,18 @@ volumes: services: db: - # About the postgresql version, it should match in the dockerfile of docker-imposm3 - image: kartoza/postgis:12.0 + image: kartoza/postgis:${POSTGRES_VERSION} hostname: db container_name: dockerosm_db environment: - - POSTGRES_USER=docker - - POSTGRES_PASS=docker - - POSTGRES_DBNAME=gis - # Uncomment to expose the postgis database on the network - # - ALLOW_IP_RANGE= 0.0.0.0/0 + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASS=${POSTGRES_PASS} + - POSTGRES_DBNAME=${POSTGRES_DBNAME} + - ALLOW_IP_RANGE=${ALLOW_IP_RANGE} volumes: - osm-postgis-data:/var/lib/postgresql - # Uncomment to use the postgis database from outside the docker network - # ports: - # - "35432:5432" + ports: + - ${DB_PORT}:5432 healthcheck: test: "exit 0" @@ -33,80 +30,49 @@ services: build: docker-imposm3 container_name: dockerosm_imposm volumes: - # These are sharable to other containers - ./settings:/home/settings - import_done:/home/import_done - import_queue:/home/import_queue - cache:/home/cache depends_on: - db: - condition: service_healthy + - db environment: - - POSTGRES_USER=docker - - POSTGRES_PASS=docker - - POSTGRES_DBNAME=gis - - POSTGRES_PORT=5432 - - POSTGRES_HOST=db - # seconds between 2 executions of the script - # if 0, then no update will be done, only the first initial import from the PBF - - TIME=120 - # folder for settings (with *.json and *.sql) - - SETTINGS=settings - # folder for caching - - CACHE=cache - # folder for diff which has been imported - - IMPORT_DONE=import_done - # folder for diff which hasn't been imported yet - - IMPORT_QUEUE=import_queue - # it can be 3857 - - SRID=4326 - # see http://imposm.org/docs/imposm3/latest/tutorial.html#optimize - - OPTIMIZE=false - # see http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables - - DBSCHEMA_PRODUCTION=public - # http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables - - DBSCHEMA_IMPORT=import - # http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables - - DBSCHEMA_BACKUP=backup - # Install some styles if you are using the default mapping. It can be 'yes' or 'no' - - QGIS_STYLE=yes - # Use clip in the database - To use this you should have run make import_clip to add your clip to the DB - - CLIP=no + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASS=${POSTGRES_PASS} + - POSTGRES_DBNAME=${POSTGRES_DBNAME} + - POSTGRES_PORT=${POSTGRES_PORT} + - POSTGRES_HOST=${POSTGRES_HOST} + - TIME=${TIME} + - SETTINGS=${SETTINGS} + - CACHE=${CACHE} + - IMPORT_DONE=${IMPORT_DONE} + - IMPORT_QUEUE=${IMPORT_QUEUE} + - SRID=${SRID} + - OPTIMIZE=${OPTIMIZE} + - DBSCHEMA_PRODUCTION=${DBSCHEMA_PRODUCTION} + - DBSCHEMA_IMPORT=${DBSCHEMA_IMPORT} + - DBSCHEMA_BACKUP=${DBSCHEMA_BACKUP} + - QGIS_STYLE=${QGIS_STYLE} + - CLIP=${CLIP} command: bash -c "while [ ! -f /home/settings/country.pbf ] ; do sleep 1; done && python3 -u /home/importer.py" osmupdate: build: docker-osmupdate image: kartoza/docker-osm:osmupdate-latest container_name: dockerosm_osmupdate - volumes: - # These are sharable to other containers - - ./settings:/home/settings - - import_done:/home/import_done - - import_queue:/home/import_queue - - cache:/home/cache + volumes_from: + - imposm depends_on: - db: - condition: service_healthy + - db environment: - # These are all currently the defaults but listed here for your - # convenience if you want to change them - # the maximum time range to assemble a cumulated changefile. - - MAX_DAYS=100 - # osmupdate uses a combination of minutely, hourly and daily changefiles. This value can be minute, hour, day or sporadic. - - DIFF=sporadic - # argument to determine the maximum number of parallely processed changefiles. - - MAX_MERGE=7 - # define level for gzip compression. values between 1 (low compression but fast) and 9 (high compression but slow) - - COMPRESSION_LEVEL=1 - # change the URL to use a custom URL to fetch regional file updates. - - BASE_URL=http://planet.openstreetmap.org/replication/ - # folder for diff which hasn't been imported yet - - IMPORT_QUEUE=import_queue - # folder for diff which has been imported - - IMPORT_DONE=import_done - # seconds between 2 executions of the script - # if 0, then no update will be done, only the first initial import from the PBF - - TIME=120 + - MAX_DAYS=${MAX_DAYS} + - DIFF=${DIFF} + - MAX_MERGE=${MAX_MERGE} + - COMPRESSION_LEVEL=${COMPRESSION_LEVEL} + - BASE_URL=${BASE_URL} + - IMPORT_QUEUE=${IMPORT_QUEUE} + - IMPORT_DONE=${IMPORT_DONE} + - TIME=${TIME} command: bash -c "while [ ! -f /home/settings/country.pbf ] ; do sleep 1; done && python3 -u /home/download.py" pgadmin4: @@ -115,35 +81,22 @@ services: volumes: - pgadmin_data:/var/lib/pgadmin environment: - - PGADMIN_DEFAULT_EMAIL=docker@gmail.com - - PGADMIN_DEFAULT_PASSWORD=docker + - PGADMIN_DEFAULT_EMAIL=${PGADMIN_DEFAULT_EMAIL} + - PGADMIN_DEFAULT_PASSWORD=${PGADMIN_DEFAULT_PASSWORD} ports: - - 6500:80 + - ${PGDB_PORT}:80 restart: on-failure depends_on: - db: - condition: service_healthy + - db osmenrich: build: docker-osmenrich container_name: dockerosm_osmenrich - volumes: - # These are sharable to other containers - - ./settings:/home/settings - - import_done:/home/import_done - - import_queue:/home/import_queue - - cache:/home/cache + volumes_from: + - imposm depends_on: - db: - condition: service_healthy + - db environment: - # These are all currently the defaults but listed here for your - # convenience if you want to change them - # folder for diff which hasn't been imported yet - - IMPORT_QUEUE=import_queue - # folder for diff which has been imported - - IMPORT_DONE=import_done - # seconds between 2 executions of the script - # if 0, then no update will be done, only the first initial import from the PBF - - TIME=120 - command: bash -c "while [ ! -f /home/settings/importer.lock ] ; do sleep 1; done && python3 -u /home/enrich.py" + - IMPORT_QUEUE=${IMPORT_QUEUE} + - IMPORT_DONE=${IMPORT_DONE} + - TIME=${TIME} diff --git a/docker-osmenrich/enrich.py b/docker-osmenrich/enrich.py index 5b30754..43ec246 100644 --- a/docker-osmenrich/enrich.py +++ b/docker-osmenrich/enrich.py @@ -20,16 +20,17 @@ """ import gzip -import xmltodict -import yaml -from xmltodict import OrderedDict -from dateutil import parser from os import environ, listdir, mkdir from os.path import join, exists, getsize from sys import exit, stderr -from urllib import request -from psycopg2 import connect, OperationalError, ProgrammingError from time import sleep +from urllib import request + +import xmltodict +import yaml +from dateutil import parser +from psycopg2 import connect, OperationalError, ProgrammingError +from xmltodict import OrderedDict class Enrich(object): @@ -220,18 +221,22 @@ class Enrich(object): for table, table_data in self.mapping_database_schema.items(): new_columns_postgis = [] for enrich_key, enrich_type in self.enriched_column.items(): - try: - cursor.execute('select %s from %s' % (enrich_key, table)) - except ProgrammingError as e: - connection.rollback() + check_column = ''' SELECT EXISTS (SELECT 1 FROM information_schema.columns + WHERE table_name='%s' and column_name='%s'); ''' % ( + table, enrich_key) + cursor.execute(check_column) + column_existence = cursor.fetchone()[0] + + if column_existence != 1: if enrich_type == 'int': - new_columns_postgis.append('ADD COLUMN %s INTEGER' % enrich_key) + new_columns_postgis.append('ADD COLUMN IF NOT EXISTS %s NUMERIC' % enrich_key) elif enrich_type == 'string': - new_columns_postgis.append('ADD COLUMN %s VARCHAR' % enrich_key) + new_columns_postgis.append('ADD COLUMN IF NOT EXISTS %s CHARACTER VARYING (255)' % enrich_key) elif enrich_type == 'datetime': - new_columns_postgis.append('ADD COLUMN %s timestamp' % enrich_key) + new_columns_postgis.append('ADD COLUMN IF NOT EXISTS %s TIMESTAMPTZ' % enrich_key) + if len(new_columns_postgis) > 0: - query = 'ALTER TABLE %s %s' % (table, ','.join(new_columns_postgis)) + query = 'ALTER TABLE public."%s" %s;' % (table, ','.join(new_columns_postgis)) cursor.execute(query) connection.commit() connection.close() @@ -415,8 +420,8 @@ class Enrich(object): row_batch = {} osm_ids = [] try: - cursor.execute( - 'select * from %s WHERE changeset_timestamp IS NULL AND osm_id IS NOT NULL ORDER BY osm_id' % table_name) + check_sql = ''' select * from "%s" WHERE "changeset_timestamp" IS NULL AND "osm_id" IS NOT NULL ORDER BY "osm_id" ''' % table_name + cursor.execute(check_sql) row = True while row: # do something with row @@ -479,8 +484,9 @@ class Enrich(object): connection = self.create_connection() cursor = connection.cursor() try: - cursor.execute('select * from %s WHERE %s=%s' % ( - table, table_data['osm_id_columnn'], osm_id)) + validate_sql = ''' select * from "%s" WHERE "%s"=%s ''' % ( + table, table_data['osm_id_columnn'], osm_id) + cursor.execute(validate_sql) row = cursor.fetchone() if row: row = dict(zip(table_data['columns'], row)) @@ -550,15 +556,26 @@ class Enrich(object): except IOError: self.info('cache file can\'t be created') + def locate_table(self, name): + """Check for tables in the DB table exists in the DB""" + connection = self.create_connection() + cursor = connection.cursor() + sql = """ SELECT EXISTS (SELECT 1 AS result from information_schema.tables where table_name like 'TEMP_TABLE'); """ + cursor.execute(sql.replace('TEMP_TABLE', '%s' % name)) + # noinspection PyUnboundLocalVariable + return cursor.fetchone()[0] + def run(self): """First checker.""" while True: self.info('Run enrich process') - if self.check_database(): - self.enrich_empty_changeset() - self.enrich_database_from_diff_file() + osm_tables = self.locate_table('osm_%') + if osm_tables != 1: + self.info('Imposm is still running, wait a while and try again') else: - self.info('Database is not ready') + if self.check_database(): + self.enrich_empty_changeset() + self.enrich_database_from_diff_file() # sleep looping self.info('sleeping for %s' % self.default['TIME'])