diff --git a/.travis.yml b/.travis.yml index ddb686a..aad4656 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,6 +13,7 @@ env: - SCENARIO=replications - SCENARIO=collations - SCENARIO=extensions + - SCENARIO=logical_replication before_script: - ./build-test.sh diff --git a/Dockerfile b/Dockerfile index 87d2a22..a5cee13 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ #--------- Generic stuff all our Dockerfiles should start with so we get caching ------------ ARG DISTRO=debian ARG IMAGE_VERSION=buster -ARG IMAGE_VARIANT=-slim -FROM $DISTRO:$IMAGE_VERSION$IMAGE_VARIANT +ARG IMAGE_VARIANT=slim +FROM kartoza/postgis:$DISTRO-$IMAGE_VERSION-$IMAGE_VARIANT MAINTAINER Tim Sutton # Reset ARG for version @@ -11,8 +11,6 @@ ARG IMAGE_VERSION RUN set -eux \ && export DEBIAN_FRONTEND=noninteractive \ && apt-get update \ - && apt-get -y --no-install-recommends install \ - locales gnupg2 wget ca-certificates rpl pwgen software-properties-common gdal-bin iputils-ping \ && sh -c "echo \"deb http://apt.postgresql.org/pub/repos/apt/ ${IMAGE_VERSION}-pgdg main\" > /etc/apt/sources.list.d/pgdg.list" \ && wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc -O- | apt-key add - \ && apt-get -y --purge autoremove \ @@ -20,16 +18,6 @@ RUN set -eux \ && rm -rf /var/lib/apt/lists/* \ && dpkg-divert --local --rename --add /sbin/initctl -# Generating locales takes a long time. Utilize caching by runnig it by itself -# early in the build process. -COPY scripts/locale.gen /etc/locale.gen -RUN set -eux \ - && /usr/sbin/locale-gen - -ENV LANG=en_US.UTF-8 \ - LANGUAGE=en_US:en \ - LC_ALL=en_US.UTF-8 -RUN update-locale ${LANG} #-------------Application Specific Stuff ---------------------------------------------------- @@ -42,10 +30,9 @@ RUN set -eux \ && apt-get -y --no-install-recommends install postgresql-client-12 \ postgresql-common postgresql-12 postgresql-12-postgis-3 \ netcat postgresql-12-ogr-fdw postgresql-12-postgis-3-scripts \ - postgresql-12-cron postgresql-plpython3-12 postgresql-12-pgrouting + postgresql-12-cron postgresql-plpython3-12 postgresql-12-pgrouting postgresql-server-dev-12 # Compile pointcloud extension -RUN apt-get -y update; apt-get -y install build-essential autoconf postgresql-server-dev-12 libxml2-dev zlib1g-dev RUN wget -O- https://github.com/pgpointcloud/pointcloud/archive/master.tar.gz | tar xz && \ cd pointcloud-master && \ @@ -72,4 +59,4 @@ RUN set -eux \ VOLUME /var/lib/postgresql -ENTRYPOINT /scripts/docker-entrypoint.sh +ENTRYPOINT /scripts/docker-entrypoint.sh \ No newline at end of file diff --git a/README.md b/README.md index e98dd7a..ed1eb1e 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,10 @@ and `IMAGE_VARIANT` (=slim) which can be used to control the base image used (but it still needs to be Debian based and have PostgreSQL official apt repo). For example making Ubuntu 20.04 based build (for better arm64 support) +First build the base image using the branch `postgres-base` following instructions from [Kartoza base image builds](https://github.com/kartoza/docker-postgis/tree/postgres-base#alternative-base-distributions-builds) + +And then build the `PostGIS Image` using + ``` docker build --build-arg DISTRO=ubuntu --build-arg IMAGE_VERSION=focal --build-arg IMAGE_VARIANT="" -t kartoza/postgis . ``` @@ -378,6 +382,17 @@ See [the postgres documentation about encoding](https://www.postgresql.org/docs/ ## Postgres Replication Setup +The image supports replication out of the box. The two mains replication methods allowed are +* Streaming replication +* Logical replication + +You can also use the environment variable `-e REPLICATION=false` that disables replication. +This can be useful in situation you need to have a database for running example Unit tests. + +`docker run --name "repl" -e REPLICATION=false -it kartoza/postgis:12.0` + +### Streaming replication +By default a running container will support streaming replication. Replication allows you to maintain two or more synchronised copies of a database, with a single **master** copy and one or more **replicant** copies. The animation below illustrates this - the layer with the red boundary is accessed from the master database and the layer @@ -395,7 +410,7 @@ mirror database content from a designated master. This replication scheme allows us to sync databases. However a `replicant` is only for read-only transaction, thus we can't write new data to it. The whole database cluster will be replicated. -### Database permissions +#### Database permissions Since we are using a role ${REPLICATION_USER}, we need to ensure that it has access to all the tables in a particular schema. So if a user adds another schema called `data` to the database `gis` he also has to update the permission for the user @@ -466,7 +481,7 @@ make slave-log You can try experiment with several scenarios to see how replication works -### Sync changes from master to replicant +#### Sync changes from master to replicant You can use any postgres database tools to create new tables in master, by connecting using POSTGRES_USER and POSTGRES_PASS credentials using exposed port. @@ -490,7 +505,7 @@ make slave-shell Then view your changes using psql. -### Promoting replicant to master +#### Promoting replicant to master You will notice that you cannot make changes in replicant, because it is read-only. If somehow you want to promote it to master, you can specify `PROMOTE_MASTER: 'True'` @@ -501,7 +516,7 @@ be in sync anymore. This is useful if the replicant needs to take over a failove However it is recommended to take additional action, such as creating a backup from the slave so a dedicated master can be created again. -### Preventing replicant database destroy on restart +#### Preventing replicant database destroy on restart You can optionally set `DESTROY_DATABASE_ON_RESTART: 'False'` after successful sync to prevent the database from being destroyed on restart. With this setting you can @@ -512,6 +527,16 @@ However, you should note that this option doesn't mean anything if you didn't persist your database volume. Because if it is not persisted, then it will be lost on restart because docker will recreate the container. +### Logical replication +To activate the following you need to use the environment variable + +`WAL_LEVEL=logical` to get a running instance like + +``` +docker run --name "logical-replication" -e WAL_LEVEL=logical -d kartoza/postgis:12.0 +``` +For a detailed example see the docker-compose in the folder `sample/logical_replication`. + ### Support @@ -524,4 +549,4 @@ Tim Sutton (tim@kartoza.com) Gavin Fleming (gavin@kartoza.com) Rizky Maulana (rizky@kartoza.com) Admire Nyakudya (admire@kartoza.com) -December 2018 +December 2018 \ No newline at end of file diff --git a/sample/logical_replication/docker-compose.yml b/sample/logical_replication/docker-compose.yml new file mode 100644 index 0000000..5357861 --- /dev/null +++ b/sample/logical_replication/docker-compose.yml @@ -0,0 +1,41 @@ + +version: '2.1' + +volumes: + pg-publisher-data-dir: + pg-subscriber-data-dir: + + +services: + pg-publisher: + image: kartoza/postgis:12.0 + restart: 'always' + volumes: + - pg-publisher-data-dir:/var/lib/postgresql + - ./scripts/setup-publisher.sql:/docker-entrypoint-initdb.d/setup-publisher.sql + environment: + ALLOW_IP_RANGE: '0.0.0.0/0' + REPLICATION_USER: 'replicator' + REPLICATION_PASS: 'replicator' + WAL_LEVEL: 'logical' + ports: + - "7777:5432" + healthcheck: + test: "exit 0" + + pg-subscriber: + image: kartoza/postgis:12.0 + restart: 'always' + volumes: + - pg-subscriber-data-dir:/var/lib/postgresql + - ./scripts/setup-subscriber.sql:/docker-entrypoint-initdb.d/setup-subscriber.sql + environment: + ALLOW_IP_RANGE: '0.0.0.0/0' + REPLICATION_USER: 'replicator' + REPLICATION_PASS: 'replicator' + WAL_LEVEL: 'logical' + depends_on: + pg-publisher: + condition: service_healthy + ports: + - "7776:5432" diff --git a/sample/logical_replication/scripts/setup-publisher.sql b/sample/logical_replication/scripts/setup-publisher.sql new file mode 100644 index 0000000..3e507ab --- /dev/null +++ b/sample/logical_replication/scripts/setup-publisher.sql @@ -0,0 +1,22 @@ +-- Create a table +CREATE TABLE IF NOT EXISTS sweets + ( + id SERIAL, + name TEXT, + price DECIMAL, + CONSTRAINT sweets_pkey PRIMARY KEY (id) + ); + +CREATE TABLE IF NOT EXISTS public.block ( + id serial NOT NULL, + geom public.geometry(Polygon,4326), + fid bigint, + tile_name character varying, + location character varying +); + +-- Add table to publication called logical_replication which is created by the scripts +ALTER PUBLICATION logical_replication ADD TABLE sweets; +ALTER PUBLICATION logical_replication ADD TABLE block; +-- Inserts records into the table +INSERT INTO sweets (name, price) VALUES ('strawberry', 4.50), ('Coffee', 6.20), ('lollipop', 3.80); diff --git a/sample/logical_replication/scripts/setup-subscriber.sql b/sample/logical_replication/scripts/setup-subscriber.sql new file mode 100644 index 0000000..d7b9a34 --- /dev/null +++ b/sample/logical_replication/scripts/setup-subscriber.sql @@ -0,0 +1,20 @@ +-- Create a table +CREATE TABLE IF NOT EXISTS sweets + ( + id SERIAL, + name TEXT, + price DECIMAL, + CONSTRAINT sweets_pkey PRIMARY KEY (id) + ); + +CREATE TABLE IF NOT EXISTS public.block ( + id serial NOT NULL, + geom public.geometry(Polygon,4326), + fid bigint, + tile_name character varying, + location character varying +); +-- Create a publication +CREATE SUBSCRIPTION logical_subscription + CONNECTION 'host=pg-publisher port=5432 password=docker user=docker dbname=gis' + PUBLICATION logical_replication; diff --git a/scenario_tests/logical_replication/docker-compose.yml b/scenario_tests/logical_replication/docker-compose.yml new file mode 100644 index 0000000..46b61bb --- /dev/null +++ b/scenario_tests/logical_replication/docker-compose.yml @@ -0,0 +1,58 @@ + +version: '2.1' + +volumes: + pg-publisher-data-dir: + pg-subscriber-data-dir: + +services: + pg-publisher: + image: 'kartoza/postgis:${TAG:-manual-build}' + restart: 'always' + # You can optionally mount to volume, to play with the persistence and + # observe how the node will behave after restarts. + volumes: + - pg-publisher-data-dir:/var/lib/postgresql + - ./tests:/tests + - ../utils:/lib/utils + environment: + ALLOW_IP_RANGE: '0.0.0.0/0' + REPLICATION_USER: 'replicator' + REPLICATION_PASS: 'replicator' + WAL_LEVEL: 'logical' + ports: + - "7777:5432" + healthcheck: + interval: 60s + timeout: 30s + retries: 3 + test: "pg_isready" + + pg-subscriber: + image: 'kartoza/postgis:${TAG:-manual-build}' + restart: 'always' + # You can optionally mount to volume, but we're not able to scale it + # in that case. + # The node will always destroy its database and copy from master at + # runtime + volumes: + - pg-subscriber-data-dir:/var/lib/postgresql + - ./tests:/tests + - ../utils:/lib/utils + environment: + ALLOW_IP_RANGE: '0.0.0.0/0' + WAL_LEVEL: 'logical' + REPLICATION_USER: 'replicator' + REPLICATION_PASS: 'replicator' + depends_on: + pg-publisher: + condition: service_healthy + # You can expose the port to observe it in your local machine + # For this sample, it was disabled by default to allow scaling test + ports: + - "7776:5432" + healthcheck: + interval: 60s + timeout: 30s + retries: 3 + test: "pg_isready" diff --git a/scenario_tests/logical_replication/test.sh b/scenario_tests/logical_replication/test.sh new file mode 100755 index 0000000..b0945c8 --- /dev/null +++ b/scenario_tests/logical_replication/test.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +# exit immediately if test fails +set -e + +source ../test-env.sh + +# Run service +docker-compose up -d + +sleep 5 + +# Preparing publisher cluster +until docker-compose exec pg-publisher pg_isready; do + sleep 1 +done; + +# Execute tests +docker-compose exec pg-publisher /bin/bash /tests/test_publisher.sh + +# Preparing node cluster +until docker-compose exec pg-subscriber pg_isready; do + sleep 1 +done; + +# Execute tests +docker-compose exec pg-subscriber /bin/bash /tests/test_subscriber.sh + +docker-compose down -v diff --git a/scenario_tests/logical_replication/tests/__init__.py b/scenario_tests/logical_replication/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scenario_tests/logical_replication/tests/test_logical_replication.py b/scenario_tests/logical_replication/tests/test_logical_replication.py new file mode 100644 index 0000000..7286752 --- /dev/null +++ b/scenario_tests/logical_replication/tests/test_logical_replication.py @@ -0,0 +1,178 @@ +from time import sleep + +import psycopg2 +import unittest +from utils.utils import DBConnection + + +class TestReplicationPublisher(unittest.TestCase): + + def setUp(self): + self.db = DBConnection() + + def test_create_new_data(self): + # create new table + self.db.conn.autocommit = True + with self.db.cursor() as c: + c.execute( + """ + CREATE TABLE IF NOT EXISTS block ( + id serial NOT NULL primary key, + geom geometry(Point,4326), + tile_name character varying, + location character varying + ); + """ + ) + + # Add table to publication if it doesn't included already + # Using PL/PGSQL + c.execute( + """ + do + $$ + begin + if not exists( + select * from pg_publication_tables + where tablename = 'block' + and pubname = 'logical_replication') then + alter publication logical_replication add table block; + end if; + end; + $$ + """ + ) + + c.execute( + """ + INSERT INTO block (id, geom, tile_name, location) + VALUES + ( + 1, + st_setsrid(st_makepoint(107.6097, 6.9120),4326), + '2956BC', + 'Oceanic' + ) ON CONFLICT (id) DO NOTHING; + """ + ) + + +class TestReplicationSubscriber(unittest.TestCase): + + def setUp(self): + self.db = DBConnection() + + @classmethod + def assert_in_loop( + cls, func_action, func_assert, + back_off_limit=5, base_seconds=2, const_seconds=5): + retry = 0 + last_error = None + while retry < back_off_limit: + try: + output = func_action() + func_assert(output) + print('Assertion succes') + return + except Exception as e: + last_error = e + print(e) + retry += 1 + print('Retry [{}]. Attempting to try again later.'.format( + retry)) + sleep(const_seconds + base_seconds ** retry) + raise last_error + + def test_read_data(self): + # create new table + self.db.conn.autocommit = True + with self.db.cursor() as c: + c.execute( + """ + CREATE TABLE IF NOT EXISTS public.block ( + id serial NOT NULL primary key , + geom public.geometry(Point,4326), + fid bigint, + tile_name character varying, + location character varying + ); + """ + ) + + # Hardcoded because the replication is setup using manual query + publisher_conn_string = 'host=pg-publisher port=5432 ' \ + 'password=docker user=docker dbname=gis' + + # Subscribe to the table that is published if it doesn't + # subscribed already + try: + # Apparently create subscription cannot run inside + # a transaction block. + # So we run it without transaction block + c.execute( + f""" + create subscription logical_subscription + connection '{publisher_conn_string}' + publication logical_replication + """ + ) + # Make sure that new changes are replicated immediately. + c.execute( + """ + alter subscription logical_subscription + refresh publication + """ + ) + except Exception as e: + print(e) + + # We don't know when the changes are sync'd so we loop the test + # Testing insertion sync + print('Insertion sync test') + self.assert_in_loop( + lambda : c.execute( + """ + SELECT * FROM block + """ + ) or c.fetchall(), + lambda _rows: self.assertEqual(len(_rows), 1) + ) + + # Testing update sync + print('Update sync test') + publisher_conn = psycopg2.connect(publisher_conn_string) + publisher_conn.autocommit = True + with publisher_conn.cursor() as publisher_c: + publisher_c.execute( + """ + UPDATE block set location = 'Oceanic territory' + WHERE id = 1 + """ + ) + self.assert_in_loop( + lambda : c.execute( + """ + SELECT location FROM block WHERE id = 1; + """ + ) or c.fetchone(), + lambda _rows: self.assertEqual( + _rows[0], 'Oceanic territory') + ) + + # Testing delete sync + print('Delete sync test') + with publisher_conn.cursor() as publisher_c: + publisher_c.execute( + """ + DELETE FROM block WHERE id = 1 + """ + ) + + self.assert_in_loop( + lambda: c.execute( + """ + SELECT * FROM block + """ + ) or c.fetchall(), + lambda _rows: self.assertEqual(len(_rows), 0) + ) diff --git a/scenario_tests/logical_replication/tests/test_publisher.sh b/scenario_tests/logical_replication/tests/test_publisher.sh new file mode 100644 index 0000000..42d7afc --- /dev/null +++ b/scenario_tests/logical_replication/tests/test_publisher.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +set -e + +source /scripts/env-data.sh + +# execute tests +pushd /tests + +PGHOST=localhost \ +PGDATABASE=gis \ +PYTHONPATH=/lib \ + python3 -m unittest -v test_logical_replication.TestReplicationPublisher diff --git a/scenario_tests/logical_replication/tests/test_subscriber.sh b/scenario_tests/logical_replication/tests/test_subscriber.sh new file mode 100644 index 0000000..7a461e0 --- /dev/null +++ b/scenario_tests/logical_replication/tests/test_subscriber.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +set -e + +source /scripts/env-data.sh + +# execute tests +pushd /tests + +PGHOST=localhost \ +PGDATABASE=gis \ +PYTHONPATH=/lib \ + python3 -m unittest -v test_logical_replication.TestReplicationSubscriber diff --git a/scripts/env-data.sh b/scripts/env-data.sh index 6ae26d4..e9c5293 100644 --- a/scripts/env-data.sh +++ b/scripts/env-data.sh @@ -56,6 +56,15 @@ file_env 'POSTGRES_PASS' file_env 'POSTGRES_USER' file_env 'POSTGRES_DBNAME' +function create_dir() { +DATA_PATH=$1 + +if [[ ! -d ${DATA_PATH} ]]; +then + echo "Creating" ${DATA_PATH} "directory" + mkdir -p ${DATA_PATH} +fi +} # Make sure we have a user set up if [ -z "${POSTGRES_USER}" ]; then POSTGRES_USER=docker @@ -90,6 +99,10 @@ if [ -z "${TOPOLOGY}" ]; then TOPOLOGY=true fi # Replication settings + +if [ -z "${REPLICATION}" ]; then + REPLICATION=true +fi if [ -z "${REPLICATE_PORT}" ]; then REPLICATE_PORT=5432 fi @@ -103,6 +116,16 @@ if [ -z "${PG_WAL_KEEP_SEGMENTS}" ]; then PG_WAL_KEEP_SEGMENTS=20 fi + +#Logical replication settings +if [ -z "${MAX_LOGICAL_REPLICATION_WORKERS}" ]; then + MAX_LOGICAL_REPLICATION_WORKERS=4 +fi + +if [ -z "${MAX_SYNC_WORKERS_PER_SUBSCRIPTION}" ]; then + MAX_SYNC_WORKERS_PER_SUBSCRIPTION=2 +fi + if [ -z "${IP_LIST}" ]; then IP_LIST='*' fi diff --git a/scripts/setup-conf.sh b/scripts/setup-conf.sh index c6efcfd..bcda381 100644 --- a/scripts/setup-conf.sh +++ b/scripts/setup-conf.sh @@ -22,17 +22,7 @@ echo "data_directory = '${DATADIR}'" >> $CONF # This script will setup necessary configuration to optimise for PostGIS and to enable replications cat >> $CONF <> "$CONF" <> "$CONF" <> $CONF # Optimise PostgreSQL shared memory for PostGIS diff --git a/scripts/setup-database.sh b/scripts/setup-database.sh index feb79ed..567763c 100644 --- a/scripts/setup-database.sh +++ b/scripts/setup-database.sh @@ -9,7 +9,7 @@ if [[ -z "$(ls -A ${DATADIR} 2> /dev/null)" || "${RECREATE_DATADIR}" == 'TRUE' ] # No Replicate From settings. Assume that this is a master database. # Initialise db echo "Initializing Postgres Database at ${DATADIR}" - mkdir -p ${DATADIR} + create_dir ${DATADIR} rm -rf ${DATADIR}/* chown -R postgres:postgres ${DATADIR} echo "Initializing with command:" @@ -21,8 +21,9 @@ fi; # Set proper permissions # needs to be done as root: -chown -R postgres:postgres ${DATADIR} -chmod -R 750 ${DATADIR} +create_dir ${WAL_ARCHIVE} +chown -R postgres:postgres ${DATADIR} ${WAL_ARCHIVE} +chmod -R 750 ${DATADIR} ${WAL_ARCHIVE} # test database existing trap "echo \"Sending SIGTERM to postgres\"; killall -s SIGTERM postgres" SIGTERM @@ -51,10 +52,7 @@ fi # Create a default db called 'gis' or $POSTGRES_DBNAME that you can use to get up and running quickly # It will be owned by the docker db user # Since we now pass a comma separated list in database creation we need to search for all databases as a test -touch custom.sql -cat >> custom.sql <&1" diff --git a/scripts/setup-replication.sh b/scripts/setup-replication.sh index 01db10a..519dbdf 100755 --- a/scripts/setup-replication.sh +++ b/scripts/setup-replication.sh @@ -9,16 +9,11 @@ source /scripts/env-data.sh -mkdir -p ${DATADIR} -chown -R postgres:postgres ${DATADIR} -chmod -R 700 ${DATADIR} +create_dir ${WAL_ARCHIVE} +chown -R postgres:postgres ${DATADIR} ${WAL_ARCHIVE} +chmod -R 750 ${DATADIR} ${WAL_ARCHIVE} + -# No content yet - but this is a slave database -until ping -c 1 -W 1 ${REPLICATE_FROM} -do - echo "Waiting for master to ping..." - sleep 1s -done function configure_replication_permissions { @@ -44,20 +39,32 @@ until su - postgres -c "${PG_BASEBACKUP} -X stream -h ${REPLICATE_FROM} -p ${REP -if [[ "$DESTROY_DATABASE_ON_RESTART" =~ [Tt][Rr][Uu][Ee] ]]; then - echo "Get initial database from master" - configure_replication_permissions - if [ -f "${DATADIR}/backup_label.old" ]; then - echo "PG Basebackup already exists so proceed to start the DB" - else - streaming_replication +if [[ "$WAL_LEVEL" == 'logical' ]]; then + echo "We have setup logical replication" +elif [[ "$WAL_LEVEL" == 'replica' ]]; then + # No content yet - but this is a slave database + if [ -z "${REPLICATE_FROM}" ]; then + echo "You have not set REPLICATE_FROM variable." + echo "Specify the master address/hostname in REPLICATE_FROM and REPLICATE_PORT variable." fi + + until su - postgres -c "pg_isready -h ${REPLICATE_FROM} -p ${REPLICATE_PORT}" + do + echo "Waiting for master to ping..." + sleep 1s + done + if [[ "$DESTROY_DATABASE_ON_RESTART" =~ [Tt][Rr][Uu][Ee] ]]; then + echo "Get initial database from master" + configure_replication_permissions + if [ -f "${DATADIR}/backup_label.old" ]; then + echo "PG Basebackup already exists so proceed to start the DB" + else + streaming_replication + fi + fi + fi - - - - # Promote to master if desired if [[ ! -z "${PROMOTE_MASTER}" ]]; then touch ${PROMOTE_FILE}