kopia lustrzana https://github.com/simonw/datasette
Reworked metadata building options
Building metadata is now optional. If you want to do it, do this: datasette build *.db --metadata=metadata.json Then when you run the server you can tell it to read from metadata: datasette serve *.db --metadata=metadata.json The Dockerfile generated by datasette publish now uses this mechanism. Closes #60pull/81/head
rodzic
ad8b5d3bd2
commit
40a563ebac
158
datasette/app.py
158
datasette/app.py
|
@ -27,56 +27,17 @@ from .utils import (
|
||||||
|
|
||||||
app_root = Path(__file__).parent.parent
|
app_root = Path(__file__).parent.parent
|
||||||
|
|
||||||
BUILD_METADATA = 'build-metadata.json'
|
|
||||||
HASH_BLOCK_SIZE = 1024 * 1024
|
HASH_BLOCK_SIZE = 1024 * 1024
|
||||||
SQL_TIME_LIMIT_MS = 1000
|
SQL_TIME_LIMIT_MS = 1000
|
||||||
|
|
||||||
connections = threading.local()
|
connections = threading.local()
|
||||||
|
|
||||||
|
|
||||||
def ensure_build_metadata(files, regenerate=True):
|
|
||||||
build_metadata = app_root / BUILD_METADATA
|
|
||||||
if build_metadata.exists() and not regenerate:
|
|
||||||
return json.loads(build_metadata.read_text())
|
|
||||||
print('Building metadata... path={}'.format(build_metadata))
|
|
||||||
metadata = {}
|
|
||||||
for filename in files:
|
|
||||||
path = Path(filename)
|
|
||||||
name = path.stem
|
|
||||||
if name in metadata:
|
|
||||||
raise Exception('Multiple files with same stem %s' % name)
|
|
||||||
# Calculate hash, efficiently
|
|
||||||
m = hashlib.sha256()
|
|
||||||
with path.open('rb') as fp:
|
|
||||||
while True:
|
|
||||||
data = fp.read(HASH_BLOCK_SIZE)
|
|
||||||
if not data:
|
|
||||||
break
|
|
||||||
m.update(data)
|
|
||||||
# List tables and their row counts
|
|
||||||
tables = {}
|
|
||||||
with sqlite3.connect('file:{}?immutable=1'.format(path.name), uri=True) as conn:
|
|
||||||
conn.row_factory = sqlite3.Row
|
|
||||||
table_names = [
|
|
||||||
r['name']
|
|
||||||
for r in conn.execute('select * from sqlite_master where type="table"')
|
|
||||||
]
|
|
||||||
for table in table_names:
|
|
||||||
tables[table] = conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0]
|
|
||||||
|
|
||||||
metadata[name] = {
|
|
||||||
'hash': m.hexdigest(),
|
|
||||||
'file': path.name,
|
|
||||||
'tables': tables,
|
|
||||||
}
|
|
||||||
build_metadata.write_text(json.dumps(metadata, indent=4))
|
|
||||||
return metadata
|
|
||||||
|
|
||||||
|
|
||||||
class BaseView(HTTPMethodView):
|
class BaseView(HTTPMethodView):
|
||||||
template = None
|
template = None
|
||||||
|
|
||||||
def __init__(self, datasette):
|
def __init__(self, datasette):
|
||||||
|
self.ds = datasette
|
||||||
self.files = datasette.files
|
self.files = datasette.files
|
||||||
self.jinja = datasette.jinja
|
self.jinja = datasette.jinja
|
||||||
self.executor = datasette.executor
|
self.executor = datasette.executor
|
||||||
|
@ -103,12 +64,45 @@ class BaseView(HTTPMethodView):
|
||||||
rows.sort(key=lambda row: row[-1])
|
rows.sort(key=lambda row: row[-1])
|
||||||
return [str(r[1]) for r in rows]
|
return [str(r[1]) for r in rows]
|
||||||
|
|
||||||
|
def resolve_db_name(self, db_name, **kwargs):
|
||||||
|
databases = self.ds.metadata()
|
||||||
|
hash = None
|
||||||
|
name = None
|
||||||
|
if '-' in db_name:
|
||||||
|
# Might be name-and-hash, or might just be
|
||||||
|
# a name with a hyphen in it
|
||||||
|
name, hash = db_name.rsplit('-', 1)
|
||||||
|
if name not in databases:
|
||||||
|
# Try the whole name
|
||||||
|
name = db_name
|
||||||
|
hash = None
|
||||||
|
else:
|
||||||
|
name = db_name
|
||||||
|
# Verify the hash
|
||||||
|
try:
|
||||||
|
info = databases[name]
|
||||||
|
except KeyError:
|
||||||
|
raise NotFound('Database not found: {}'.format(name))
|
||||||
|
expected = info['hash'][:7]
|
||||||
|
if expected != hash:
|
||||||
|
should_redirect = '/{}-{}'.format(
|
||||||
|
name, expected,
|
||||||
|
)
|
||||||
|
if 'table' in kwargs:
|
||||||
|
should_redirect += '/' + kwargs['table']
|
||||||
|
if 'as_json' in kwargs:
|
||||||
|
should_redirect += kwargs['as_json']
|
||||||
|
if 'as_db' in kwargs:
|
||||||
|
should_redirect += kwargs['as_db']
|
||||||
|
return name, expected, should_redirect
|
||||||
|
return name, expected, None
|
||||||
|
|
||||||
async def execute(self, db_name, sql, params=None):
|
async def execute(self, db_name, sql, params=None):
|
||||||
"""Executes sql against db_name in a thread"""
|
"""Executes sql against db_name in a thread"""
|
||||||
def sql_operation_in_thread():
|
def sql_operation_in_thread():
|
||||||
conn = getattr(connections, db_name, None)
|
conn = getattr(connections, db_name, None)
|
||||||
if not conn:
|
if not conn:
|
||||||
info = ensure_build_metadata(self.files)[db_name]
|
info = self.ds.metadata()[db_name]
|
||||||
conn = sqlite3.connect(
|
conn = sqlite3.connect(
|
||||||
'file:{}?immutable=1'.format(info['file']),
|
'file:{}?immutable=1'.format(info['file']),
|
||||||
uri=True,
|
uri=True,
|
||||||
|
@ -133,7 +127,7 @@ class BaseView(HTTPMethodView):
|
||||||
)
|
)
|
||||||
|
|
||||||
async def get(self, request, db_name, **kwargs):
|
async def get(self, request, db_name, **kwargs):
|
||||||
name, hash, should_redirect = resolve_db_name(self.files, db_name, **kwargs)
|
name, hash, should_redirect = self.resolve_db_name(db_name, **kwargs)
|
||||||
if should_redirect:
|
if should_redirect:
|
||||||
return self.redirect(request, should_redirect)
|
return self.redirect(request, should_redirect)
|
||||||
return await self.view_get(request, name, hash, **kwargs)
|
return await self.view_get(request, name, hash, **kwargs)
|
||||||
|
@ -196,13 +190,14 @@ class BaseView(HTTPMethodView):
|
||||||
|
|
||||||
class IndexView(HTTPMethodView):
|
class IndexView(HTTPMethodView):
|
||||||
def __init__(self, datasette):
|
def __init__(self, datasette):
|
||||||
|
self.ds = datasette
|
||||||
self.files = datasette.files
|
self.files = datasette.files
|
||||||
self.jinja = datasette.jinja
|
self.jinja = datasette.jinja
|
||||||
self.executor = datasette.executor
|
self.executor = datasette.executor
|
||||||
|
|
||||||
async def get(self, request, as_json):
|
async def get(self, request, as_json):
|
||||||
databases = []
|
databases = []
|
||||||
for key, info in sorted(ensure_build_metadata(self.files).items()):
|
for key, info in sorted(self.ds.metadata().items()):
|
||||||
database = {
|
database = {
|
||||||
'name': key,
|
'name': key,
|
||||||
'hash': info['hash'],
|
'hash': info['hash'],
|
||||||
|
@ -263,7 +258,7 @@ class DatabaseView(BaseView):
|
||||||
|
|
||||||
class DatabaseDownload(BaseView):
|
class DatabaseDownload(BaseView):
|
||||||
async def view_get(self, request, name, hash, **kwargs):
|
async def view_get(self, request, name, hash, **kwargs):
|
||||||
filepath = ensure_build_metadata(self.files)[name]['file']
|
filepath = self.ds.metadata()[name]['file']
|
||||||
return await response.file_stream(
|
return await response.file_stream(
|
||||||
filepath, headers={
|
filepath, headers={
|
||||||
'Content-Disposition': 'attachment; filename="{}"'.format(filepath)
|
'Content-Disposition': 'attachment; filename="{}"'.format(filepath)
|
||||||
|
@ -339,7 +334,7 @@ class TableView(BaseView):
|
||||||
if use_rowid:
|
if use_rowid:
|
||||||
display_columns = display_columns[1:]
|
display_columns = display_columns[1:]
|
||||||
rows = list(rows)
|
rows = list(rows)
|
||||||
info = ensure_build_metadata(self.files)
|
info = self.ds.metadata()
|
||||||
total_rows = info[name]['tables'].get(table)
|
total_rows = info[name]['tables'].get(table)
|
||||||
after = None
|
after = None
|
||||||
after_link = None
|
after_link = None
|
||||||
|
@ -404,42 +399,8 @@ class RowView(BaseView):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def resolve_db_name(files, db_name, **kwargs):
|
|
||||||
databases = ensure_build_metadata(files)
|
|
||||||
hash = None
|
|
||||||
name = None
|
|
||||||
if '-' in db_name:
|
|
||||||
# Might be name-and-hash, or might just be
|
|
||||||
# a name with a hyphen in it
|
|
||||||
name, hash = db_name.rsplit('-', 1)
|
|
||||||
if name not in databases:
|
|
||||||
# Try the whole name
|
|
||||||
name = db_name
|
|
||||||
hash = None
|
|
||||||
else:
|
|
||||||
name = db_name
|
|
||||||
# Verify the hash
|
|
||||||
try:
|
|
||||||
info = databases[name]
|
|
||||||
except KeyError:
|
|
||||||
raise NotFound('Database not found: {}'.format(name))
|
|
||||||
expected = info['hash'][:7]
|
|
||||||
if expected != hash:
|
|
||||||
should_redirect = '/{}-{}'.format(
|
|
||||||
name, expected,
|
|
||||||
)
|
|
||||||
if 'table' in kwargs:
|
|
||||||
should_redirect += '/' + kwargs['table']
|
|
||||||
if 'as_json' in kwargs:
|
|
||||||
should_redirect += kwargs['as_json']
|
|
||||||
if 'as_db' in kwargs:
|
|
||||||
should_redirect += kwargs['as_db']
|
|
||||||
return name, expected, should_redirect
|
|
||||||
return name, expected, None
|
|
||||||
|
|
||||||
|
|
||||||
class Datasette:
|
class Datasette:
|
||||||
def __init__(self, files, num_threads=3, cache_headers=True, page_size=50):
|
def __init__(self, files, num_threads=3, cache_headers=True, page_size=50, metadata=None):
|
||||||
self.files = files
|
self.files = files
|
||||||
self.num_threads = num_threads
|
self.num_threads = num_threads
|
||||||
self.executor = futures.ThreadPoolExecutor(
|
self.executor = futures.ThreadPoolExecutor(
|
||||||
|
@ -447,6 +408,43 @@ class Datasette:
|
||||||
)
|
)
|
||||||
self.cache_headers = cache_headers
|
self.cache_headers = cache_headers
|
||||||
self.page_size = page_size
|
self.page_size = page_size
|
||||||
|
self._metadata = metadata
|
||||||
|
|
||||||
|
def metadata(self):
|
||||||
|
if self._metadata:
|
||||||
|
return self._metadata
|
||||||
|
metadata = {}
|
||||||
|
for filename in self.files:
|
||||||
|
path = Path(filename)
|
||||||
|
name = path.stem
|
||||||
|
if name in metadata:
|
||||||
|
raise Exception('Multiple files with same stem %s' % name)
|
||||||
|
# Calculate hash, efficiently
|
||||||
|
m = hashlib.sha256()
|
||||||
|
with path.open('rb') as fp:
|
||||||
|
while True:
|
||||||
|
data = fp.read(HASH_BLOCK_SIZE)
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
m.update(data)
|
||||||
|
# List tables and their row counts
|
||||||
|
tables = {}
|
||||||
|
with sqlite3.connect('file:{}?immutable=1'.format(path.name), uri=True) as conn:
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
table_names = [
|
||||||
|
r['name']
|
||||||
|
for r in conn.execute('select * from sqlite_master where type="table"')
|
||||||
|
]
|
||||||
|
for table in table_names:
|
||||||
|
tables[table] = conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0]
|
||||||
|
|
||||||
|
metadata[name] = {
|
||||||
|
'hash': m.hexdigest(),
|
||||||
|
'file': path.name,
|
||||||
|
'tables': tables,
|
||||||
|
}
|
||||||
|
self._metadata = metadata
|
||||||
|
return metadata
|
||||||
|
|
||||||
def app(self):
|
def app(self):
|
||||||
app = Sanic(__name__)
|
app = Sanic(__name__)
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
import click
|
import click
|
||||||
from click_default_group import DefaultGroup
|
from click_default_group import DefaultGroup
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from subprocess import call
|
from subprocess import call
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
from .app import Datasette, ensure_build_metadata
|
from .app import Datasette
|
||||||
from .utils import make_dockerfile
|
from .utils import make_dockerfile
|
||||||
|
|
||||||
|
|
||||||
|
@ -18,8 +19,10 @@ def cli():
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
@click.argument('files', type=click.Path(exists=True), nargs=-1)
|
@click.argument('files', type=click.Path(exists=True), nargs=-1)
|
||||||
def build(files):
|
@click.option('-m', '--metadata', default='metadata.json')
|
||||||
ensure_build_metadata(files, True)
|
def build_metadata(files, metadata):
|
||||||
|
app = Datasette(files)
|
||||||
|
open(metadata, 'w').write(json.dumps(app.metadata(), indent=2))
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
|
@ -62,12 +65,20 @@ def publish(files):
|
||||||
@click.option('-p', '--port', default=8001)
|
@click.option('-p', '--port', default=8001)
|
||||||
@click.option('--debug', is_flag=True)
|
@click.option('--debug', is_flag=True)
|
||||||
@click.option('--reload', is_flag=True)
|
@click.option('--reload', is_flag=True)
|
||||||
def serve(files, host, port, debug, reload):
|
@click.option('-m', '--metadata')
|
||||||
|
def serve(files, host, port, debug, reload, metadata):
|
||||||
"""Serve up specified database files with a web UI"""
|
"""Serve up specified database files with a web UI"""
|
||||||
if reload:
|
if reload:
|
||||||
import hupper
|
import hupper
|
||||||
hupper.start_reloader('datasette.cli.serve')
|
hupper.start_reloader('datasette.cli.serve')
|
||||||
|
|
||||||
|
if metadata:
|
||||||
|
metadata = json.load(open(metadata))
|
||||||
|
|
||||||
click.echo('Serve! files={} on port {}'.format(files, port))
|
click.echo('Serve! files={} on port {}'.format(files, port))
|
||||||
app = Datasette(files, cache_headers=not debug and not reload).app()
|
app = Datasette(
|
||||||
|
files,
|
||||||
|
cache_headers=not debug and not reload,
|
||||||
|
metadata=metadata,
|
||||||
|
).app()
|
||||||
app.run(host=host, port=port, debug=debug)
|
app.run(host=host, port=port, debug=debug)
|
||||||
|
|
|
@ -122,10 +122,10 @@ def make_dockerfile(files):
|
||||||
FROM python:3
|
FROM python:3
|
||||||
COPY . /app
|
COPY . /app
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
RUN pip install https://static.simonwillison.net/static/2017/datasette-0.1-py3-none-any.whl
|
RUN pip install https://static.simonwillison.net/static/2017/datasette-0.2-py3-none-any.whl
|
||||||
RUN datasette build {}
|
RUN datasette build_metadata {} --metadata metadata.json
|
||||||
EXPOSE 8006
|
EXPOSE 8006
|
||||||
CMD ["datasette", "serve", {}, "--port", "8006"]'''.format(
|
CMD ["datasette", "serve", {}, "--port", "8006", "--metadata", "metadata.json"]'''.format(
|
||||||
' '.join(files),
|
' '.join(files),
|
||||||
'"' + '", "'.join(files) + '"',
|
'"' + '", "'.join(files) + '"',
|
||||||
).strip()
|
).strip()
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='datasette',
|
name='datasette',
|
||||||
version='0.1',
|
version='0.2',
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
package_data={'datasette': ['templates/*.html']},
|
package_data={'datasette': ['templates/*.html']},
|
||||||
include_package_data=True,
|
include_package_data=True,
|
||||||
|
|
Ładowanie…
Reference in New Issue