openmaptiles/.github/workflows/tests.yml

# Workflow to validate OMT`s new Pull Requests and commits pushed into OMT repo

name: OpenMapTiles CI

on:
  push:
    branches: [ master ]
  pull_request:

jobs:

  integrity_test:
    name: Run integrity test
    runs-on: ubuntu-latest
    steps:

      - name: Checkout the changes
        uses: actions/checkout@v2

      - name: Run quickstart for a small area
        env:
          area: monaco
          QUICKSTART_MIN_ZOOM: 0
          QUICKSTART_MAX_ZOOM: 14
        run: |
          # For now, change the quickstart values directly in the .env file
          # TODO: We should probably use env vars instead
          sed -i 's/QUICKSTART_MAX_ZOOM=7/QUICKSTART_MAX_ZOOM=14/g' .env
          export QUIET=1
          ./quickstart.sh $area

      - name: Save quickstart.log
        uses: actions/upload-artifact@v1
        with:
          name: quickstart.log
          path: quickstart.log

      - name: Test etldoc images
        run: |
          export TEST_MODE=yes
          make generate-devdoc

  performance:
    name: Evaluate performance
    runs-on: ubuntu-latest
    # Even though we technically don't need to wait for integrity test to finish,
    # there is no point to run long perf test until we know the code is OK
    needs: integrity_test
    env:
      # Smaller tests (runs everything in about 30 minutes)
      # Two test areas:  equatorial-guinea  and  liechtenstein
      TEST_PERF_PARAMS: "--minzoom 0 --maxzoom 14 --bbox 5.4172943,-1.6732196,12.3733400,4.3475256 --bbox 9.0900979,46.9688169,9.6717077,47.5258072"
      TEST_DATA_URL: "https://drive.google.com/uc?export=download&id=12vw07f9W0MiAHIqMztRiIMwahJfqTi21"

      ## Large test data -- we should switch to it after everything is working ok
      # TEST_PERF_PARAMS: "--minzoom 0 --maxzoom 14 --test hungary --test isle-of-man"
      # TEST_DATA_URL: "https://drive.google.com/uc?export=download&id=1kw7XPDPd1Rc-Zi2XxGLTXdinUSq-S4pT"
    steps:
      - name: Cache test data download
        id: cache-testdata
        uses: actions/cache@v1
        with:
          path: ci_cache
          key: "${{ env.TEST_DATA_URL }}"

      - name: Download test data on cache miss
        if: steps.cache-testdata.outputs.cache-hit != 'true'
        run: |
          echo "Data file does not exist, downloading $TEST_DATA_URL"
          mkdir -p ci_cache
          curl --silent --show-error --location --output ci_cache/perf-test-areas-latest.osm.pbf "$TEST_DATA_URL"

      - name: Get code
        uses: actions/checkout@v2
        with:
          # Fetch the last two commits in case this is a PR,
          # and we need to profile the base branch first
          fetch-depth: 2
          path: code

      - name: Compute git revision hash to cache
        id: calc
        run: |
          # If this is a pull request, we should cache the parent (base) revision
          # Otherwise cache the current one
          cd code
          REV_HASH=$(git log -1 --format="%H")
          if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
            # Take the first parent of the grafted commit (cannot use HEAD^1 with shallow clones)
            REV_HASH=$(git cat-file -p $REV_HASH | awk 'NR > 1 {if(/^parent/){print $2; exit}}')
          fi
          echo "::set-output name=hash::$REV_HASH"

      - name: Set up caching for the performance results
        uses: actions/cache@v1
        with:
          path: perf_cache
          # If profiling result cache has incompatible format, increase this "v" number
          key: "v10-${{ steps.calc.outputs.hash }}-${{ env.TEST_DATA_URL }}"

      - name: Load test data into DB and run performance test
        id: main
        env:
          CACHE_SHA: "${{ steps.calc.outputs.hash }}"
        run: |
          profile() {
            TIME_FORMAT="# %C
          P\t%P\tPercentage of the CPU that this job got.  This is just user + system times divided by the total running time.  It also prints a percentage sign.
          x\t%x\tExit status of the command.
          -\t-----\t-- Time --------------------------------------------------------------------------------------------------
          e\t%e\tElapsed real (wall clock) time used by the process, in seconds.
          E\t%E\tElapsed real (wall clock) time used by the process, in [hours:]minutes:seconds.
          U\t%U\tTotal number of CPU-seconds that the process used directly (in user mode), in seconds.
          S\t%S\tTotal number of CPU-seconds used by the system on behalf of the process (in kernel mode), in seconds.
          -\t-----\t-- Context Switches --------------------------------------------------------------------------------------
          c\t%c\tNumber of times the process was context-switched involuntarily (because the time slice expired).
          w\t%w\tNumber of times that the program was context-switched voluntarily, for instance while waiting for an I/O operation to complete.
          -\t-----\t-- Page faults -------------------------------------------------------------------------------------------
          F\t%F\tNumber of major, or I/O-requiring, page faults that occurred while the process was running.  These are faults where the page has actually migrated out of primary memory.
          R\t%R\tNumber of minor, or recoverable, page faults.  These are pages that are not valid (so they fault) but which have not yet been claimed by other virtual pages.  Thus the data in the page is still valid but the system tables must be updated.
          W\t%W\tNumber of times the process was swapped out of main memory.
          -\t-----\t-- Memory ------------------------------------------------------------------------------------------------
          K\t%K\tAverage total (data+stack+text) memory use of the process, in Kilobytes.
          t\t%t\tAverage resident set size of the process, in Kilobytes.
          p\t%p\tAverage unshared stack size of the process, in Kilobytes.
          M\t%M\tMaximum resident set size of the process during its lifetime, in Kilobytes.
          D\t%D\tAverage size of the process's unshared data area, in Kilobytes.
          X\t%X\tAverage amount of shared text in the process, in Kilobytes.
          Z\t%Z\tSystem's page size, in bytes.  This is a per-system constant, but varies between systems.
          -\t-----\t-- Other -------------------------------------------------------------------------------------------------
          I\t%I\tNumber of file system inputs by the process.
          O\t%O\tNumber of file system outputs by the process.
          k\t%k\tNumber of signals delivered to the process.
          r\t%r\tNumber of socket messages received by the process.
          s\t%s\tNumber of socket messages sent by the process.
          "
            # Must use full path to get the full-featured version of time
            # profile-*.tsv filenames are parsed using ${file:8:-4} below
            /usr/bin/time --format "$TIME_FORMAT" --output "${PROFILE_DIR}/profile-${1}.tsv" "${@:2}"
          }

          create_db() {
            make clean
            make init-dirs
            cp ../ci_cache/perf-test-areas-latest.osm.pbf data/perf-test-areas-latest.osm.pbf
            make db-destroy
            make all
            make db-start
            profile 1_data     make import-data
            profile 2_osm      make import-osm
            profile 3_borders  make import-borders
            profile 4_wikidata make import-wikidata
            profile 5_sql      make import-sql

            # Get database total size, in MB
            # Once Makefile has a few more improvements, we can use this approach instead:
            # echo $'\\set QUIET on \\a \\x off \\t \\\\ select pg_database_size(current_database())/1024/1024;' | make -s psql
            if grep -qE '^  import-osm:$' docker-compose.yml; then
              # old version using dedicated import-osm docker image
              DB_SIZE_MB=$(docker-compose run --rm -u $(id -u):$(id -g) import-osm ./psql.sh -qtAc 'select pg_database_size(current_database())/1024/1024;')
            else
              DB_SIZE_MB=$(docker-compose run --rm -u $(id -u):$(id -g) openmaptiles-tools psql.sh -qtAc 'select pg_database_size(current_database())/1024/1024;')
            fi
            echo "$DB_SIZE_MB" > "${PROFILE_DIR}/db_size.tsv"
          }

          mkdir -p perf_cache
          mkdir -p artifacts
          mkdir -p pr_message
          cd code

          CURRENT_SHA=$(git log -1 --format="%H")

          if [ ! -f ../perf_cache/results.json ]; then
            echo "We do not have cached performance results, create them..."
            if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
              git reset --hard ${CURRENT_SHA}^1
            fi

            PROFILE_DIR=../perf_cache
            create_db

            # Use latest tools version because these specific tests do not yet exist in the 4.1 tools version
            # Custom TOOLS_VERSION can be removed once OMT master is migrated to the next tools version
            TOOLS_VERSION=latest profile test-perf docker-compose run --rm -T openmaptiles-tools \
              test-perf openmaptiles.yaml $TEST_PERF_PARAMS \
              --record /tileset/results.json
            mv results.json ../perf_cache

            if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
              # For pull requests, restore to the PR version before continuing
              git reset --hard ${CURRENT_SHA}
            fi
          else
            echo "Found cached performance results"
          fi

          pushd ../perf_cache
          if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
            cp results.json ../artifacts/base-results.json
            # Copy all tsv files, not just the ones with "profile-" prefix.
            for file in *.tsv; do
              cp "$file" "../artifacts/base-$file"
            done
          else
            cp results.json ../artifacts/results.json
            # Copy all tsv files, not just the ones with "profile-" prefix.
            cp *.tsv ../artifacts
          fi
          popd

          if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
            echo "Comparing pull request results with the base..."

            PROFILE_DIR=../artifacts
            create_db

            # Use latest tools version because these specific tests do not yet exist in the 4.1 tools version
            # Custom TOOLS_VERSION can be removed once OMT master is migrated to the next tools version
            cp ../perf_cache/results.json .
            OUTPUT="$(TOOLS_VERSION=latest profile test-perf docker-compose run --rm -T openmaptiles-tools \
                        test-perf openmaptiles.yaml $TEST_PERF_PARAMS \
                        --compare /tileset/results.json --record /tileset/pr-results.json)"
            rm results.json
            mv pr-results.json ../artifacts/

            # Split into two parts -- before and after the ===== SUMMARY =====
            OUT_SUMMARY="${OUTPUT##*========}"
            OUT_DETAILS="${OUTPUT%%========*}"

            pushd ../artifacts
            PROFILING_MSG="step | total time | change | user time | change
            --- | --- | --- | --- | ---
            "
            for file in profile*.tsv; do
              new_total_time_fmt=$(grep -E '^E' "$file"   | cut -d$'\t' -f 2)
              old_total_time=$(grep -E '^e' "base-$file"  | cut -d$'\t' -f 2)
              new_total_time=$(grep -E '^e' "$file"       | cut -d$'\t' -f 2)
              old_user_time=$(grep -E '^U' "base-$file"   | cut -d$'\t' -f 2)
              new_user_time=$(grep -E '^U' "$file"        | cut -d$'\t' -f 2)

              PROFILING_MSG="${PROFILING_MSG}${file:8:-4} \
              | ${new_total_time_fmt} | $( echo "$old_total_time $new_total_time" | awk '{ if ($1 == 0) print "-"; else printf  "%.1f%%", ($2-$1)*100/$1 }' ) \
              | $( echo "$old_user_time $new_user_time" | awk '{ if ($1 == 0) print "%s | -", $2 ; else printf  "%s | %.1f%%", $2, ($2-$1)*100/$1 }' ) \
              "$'\n'
            done

            DB_SIZE_CHANGE=$( echo "$(cat base-db_size.tsv) $(cat db_size.tsv)" | \
              awk '{ printf "PostgreSQL DB size in MB: %s ⇒ %s", $1, $2; if ($1 != 0) printf " (%.1f%% change)", ($2-$1)*100/$1 }' )

            popd

            cat > ../pr_message/message.md <<EOF
          Results evaluating commit $(git log HEAD^2 -1 --format="%H") (merged with base $CACHE_SHA as $GITHUB_SHA). See [run details](https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID).

          $DB_SIZE_CHANGE

          $PROFILING_MSG

          \`\`\`
          $OUT_SUMMARY
          \`\`\`

          <details>
          <summary>expand for details...</summary>

          \`\`\`
          $OUT_DETAILS
          \`\`\`

          </details>
          EOF

          fi

      - name: Save performance artifacts
        uses: actions/upload-artifact@v1
        with:
          name: performance_results
          path: artifacts

      - name: Save PR message artifact
        if: github.event_name == 'pull_request'
        uses: actions/upload-artifact@v1
        with:
          name: pr_message
          path: pr_message