kopia lustrzana https://github.com/simonw/datasette
ensure_build_metadata() function for metadata
This will be run at compile time - the goal is to generate a build- metadata.json file with a bunch of useful facts about the databases that could be expensive to generate at run-time. Example metadata: { "flights": { "file": "flights.db", "tables": { "airlines": 6048, "airports": 8107, "routes": 67663 }, "hash": "07d1283e07786b1235bb7041ea445ae103d1571565580a29eab0203c555725fd" } So far we have a sha256 hash of the database file itself, plus a row count for each table. Fixes #11pull/383/head
rodzic
f571b19d8a
commit
6a0c5de615
|
@ -1,3 +1,5 @@
|
||||||
|
build-metadata.json
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
|
|
48
app.py
48
app.py
|
@ -2,8 +2,55 @@ from sanic import Sanic
|
||||||
from sanic import response
|
from sanic import response
|
||||||
from sanic_jinja2 import SanicJinja2
|
from sanic_jinja2 import SanicJinja2
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
from pathlib import Path
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
import json
|
import json
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
app_root = Path(__file__).parent
|
||||||
|
|
||||||
|
BUILD_METADATA = 'build-metadata.json'
|
||||||
|
DB_GLOBS = ('*.db', '*.sqlite', '*.sqlite3')
|
||||||
|
HASH_BLOCK_SIZE = 1024 * 1024
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_build_metadata(regenerate=False):
|
||||||
|
build_metadata = app_root / BUILD_METADATA
|
||||||
|
if build_metadata.exists() and not regenerate:
|
||||||
|
json.loads(build_metadata.read_text())
|
||||||
|
metadata = {}
|
||||||
|
for glob in DB_GLOBS:
|
||||||
|
for path in app_root.glob(glob):
|
||||||
|
name = path.stem
|
||||||
|
if name in metadata:
|
||||||
|
raise Exception('Multiple files with same stem %s' % name)
|
||||||
|
# Calculate hash, efficiently
|
||||||
|
m = hashlib.sha256()
|
||||||
|
with path.open('rb') as fp:
|
||||||
|
while True:
|
||||||
|
data = fp.read(HASH_BLOCK_SIZE)
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
m.update(data)
|
||||||
|
# List tables and their row counts
|
||||||
|
tables = {}
|
||||||
|
with sqlite3.connect('file:{}?immutable=1'.format(path.name), uri=True) as conn:
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
table_names = [
|
||||||
|
r['name']
|
||||||
|
for r in conn.execute('select * from sqlite_master where type="table"')
|
||||||
|
]
|
||||||
|
for table in table_names:
|
||||||
|
tables[table] = conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0]
|
||||||
|
|
||||||
|
metadata[name] = {
|
||||||
|
'hash': m.hexdigest(),
|
||||||
|
'file': path.name,
|
||||||
|
'tables': tables,
|
||||||
|
}
|
||||||
|
build_metadata.write_text(json.dumps(metadata, indent=4))
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
app = Sanic(__name__)
|
app = Sanic(__name__)
|
||||||
jinja = SanicJinja2(app)
|
jinja = SanicJinja2(app)
|
||||||
|
@ -37,6 +84,7 @@ async def index(request, sql=None):
|
||||||
return jinja.render('index.html', request,
|
return jinja.render('index.html', request,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
rows=list(rows),
|
rows=list(rows),
|
||||||
|
metadata=json.dumps(ensure_build_metadata(True), indent=2)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,7 @@ td {
|
||||||
padding: 2px 4px;
|
padding: 2px 4px;
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
|
<pre>{{ metadata }}</pre>
|
||||||
<table>
|
<table>
|
||||||
<tr>
|
<tr>
|
||||||
{% for header in headers %}<th scope="col">{{ header }}</th>{% endfor %}
|
{% for header in headers %}<th scope="col">{{ header }}</th>{% endfor %}
|
||||||
|
|
Ładowanie…
Reference in New Issue