ensure_build_metadata() function for metadata

This will be run at compile time - the goal is to generate a build-
metadata.json file with a bunch of useful facts about the databases that could
be expensive to generate at run-time.

Example metadata:

    {
        "flights": {
        "file": "flights.db",
        "tables": {
            "airlines": 6048,
            "airports": 8107,
            "routes": 67663
        },
        "hash": "07d1283e07786b1235bb7041ea445ae103d1571565580a29eab0203c555725fd"
    }

So far we have a sha256 hash of the database file itself, plus a row count for
each table.

Fixes #11
pull/383/head
Simon Willison 2017-10-23 09:02:40 -07:00
rodzic f571b19d8a
commit 6a0c5de615
3 zmienionych plików z 51 dodań i 0 usunięć

2
.gitignore vendored
Wyświetl plik

@ -1,3 +1,5 @@
build-metadata.json
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]

48
app.py
Wyświetl plik

@ -2,8 +2,55 @@ from sanic import Sanic
from sanic import response
from sanic_jinja2 import SanicJinja2
import sqlite3
from pathlib import Path
from functools import wraps
import json
import hashlib
app_root = Path(__file__).parent
BUILD_METADATA = 'build-metadata.json'
DB_GLOBS = ('*.db', '*.sqlite', '*.sqlite3')
HASH_BLOCK_SIZE = 1024 * 1024
def ensure_build_metadata(regenerate=False):
build_metadata = app_root / BUILD_METADATA
if build_metadata.exists() and not regenerate:
json.loads(build_metadata.read_text())
metadata = {}
for glob in DB_GLOBS:
for path in app_root.glob(glob):
name = path.stem
if name in metadata:
raise Exception('Multiple files with same stem %s' % name)
# Calculate hash, efficiently
m = hashlib.sha256()
with path.open('rb') as fp:
while True:
data = fp.read(HASH_BLOCK_SIZE)
if not data:
break
m.update(data)
# List tables and their row counts
tables = {}
with sqlite3.connect('file:{}?immutable=1'.format(path.name), uri=True) as conn:
conn.row_factory = sqlite3.Row
table_names = [
r['name']
for r in conn.execute('select * from sqlite_master where type="table"')
]
for table in table_names:
tables[table] = conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0]
metadata[name] = {
'hash': m.hexdigest(),
'file': path.name,
'tables': tables,
}
build_metadata.write_text(json.dumps(metadata, indent=4))
return metadata
app = Sanic(__name__)
jinja = SanicJinja2(app)
@ -37,6 +84,7 @@ async def index(request, sql=None):
return jinja.render('index.html', request,
headers=headers,
rows=list(rows),
metadata=json.dumps(ensure_build_metadata(True), indent=2)
)

Wyświetl plik

@ -6,6 +6,7 @@ td {
padding: 2px 4px;
}
</style>
<pre>{{ metadata }}</pre>
<table>
<tr>
{% for header in headers %}<th scope="col">{{ header }}</th>{% endfor %}