From 3ef35ca8b4369af6a8bcdd9e5cfbb5f3a7d17cf8 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 13 Nov 2017 07:20:02 -0800 Subject: [PATCH] serve and publish commands now take a --metadata option If provided, the --metadata option is the path to a JSON file containing metadata that should be displayed alongside the dataset. datasette /tmp/fivethirtyeight.db --metadata /tmp/metadata.json Currently that metadata format looks like this: { "title": "Five Thirty Eight", "license": "CC Attribution 4.0 License", "license_url": "http://creativecommons.org/licenses/by/4.0/", "source": "fivethirtyeight/data on GitHub", "source_url": "https://github.com/fivethirtyeight/data" } If provided, this will be used by the index template and to populate the common footer. The publish command also accepts this argument, and will package any provided metadata up and include it with the resulting Docker container. datasette publish --metadata /tmp/metadata.json /tmp/fivethirtyeight.db Closes #68 --- datasette/app.py | 87 +++++++++++++++++----------------- datasette/cli.py | 36 ++++++++++---- datasette/templates/base.html | 14 ++++++ datasette/templates/index.html | 23 ++++++++- datasette/utils.py | 9 ++-- setup.py | 2 +- 6 files changed, 111 insertions(+), 60 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 7e2e6e4b..cb42f91b 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -73,7 +73,7 @@ class BaseView(HTTPMethodView): return [str(r[1]) for r in rows] def resolve_db_name(self, db_name, **kwargs): - databases = self.ds.metadata() + databases = self.ds.inspect() hash = None name = None if '-' in db_name: @@ -110,7 +110,7 @@ class BaseView(HTTPMethodView): def sql_operation_in_thread(): conn = getattr(connections, db_name, None) if not conn: - info = self.ds.metadata()[db_name] + info = self.ds.inspect()[db_name] conn = sqlite3.connect( 'file:{}?immutable=1'.format(info['file']), uri=True, @@ -192,6 +192,7 @@ class BaseView(HTTPMethodView): ), **{ 'url_json': path_with_ext(request, '.json'), 'url_jsono': path_with_ext(request, '.jsono'), + 'metadata': self.ds.metadata, }} r = self.jinja.render( template, @@ -216,7 +217,7 @@ class IndexView(HTTPMethodView): async def get(self, request, as_json): databases = [] - for key, info in sorted(self.ds.metadata().items()): + for key, info in sorted(self.ds.inspect().items()): database = { 'name': key, 'hash': info['hash'], @@ -247,6 +248,7 @@ class IndexView(HTTPMethodView): 'index.html', request, databases=databases, + metadata=self.ds.metadata, ) @@ -261,8 +263,8 @@ class DatabaseView(BaseView): if request.args.get('sql'): return await self.custom_sql(request, name, hash) tables = [] - table_metadata = self.ds.metadata()[name]['tables'] - for table_name, table_rows in table_metadata.items(): + table_inspect = self.ds.inspect()[name]['tables'] + for table_name, table_rows in table_inspect.items(): rows = await self.execute( name, 'PRAGMA table_info([{}]);'.format(table_name) @@ -304,7 +306,7 @@ class DatabaseView(BaseView): class DatabaseDownload(BaseView): async def view_get(self, request, name, hash, **kwargs): - filepath = self.ds.metadata()[name]['file'] + filepath = self.ds.inspect()[name]['file'] return await response.file_stream( filepath, headers={ 'Content-Disposition': 'attachment; filename="{}"'.format(filepath) @@ -399,7 +401,7 @@ class TableView(BaseView): if use_rowid: display_columns = display_columns[1:] rows = list(rows) - info = self.ds.metadata() + info = self.ds.inspect() table_rows = info[name]['tables'].get(table) after = None after_link = None @@ -471,7 +473,7 @@ class RowView(BaseView): class Datasette: - def __init__(self, files, num_threads=3, cache_headers=True, page_size=50, metadata=None): + def __init__(self, files, num_threads=3, cache_headers=True, page_size=50, inspect_data=None, metadata=None): self.files = files self.num_threads = num_threads self.executor = futures.ThreadPoolExecutor( @@ -479,43 +481,42 @@ class Datasette: ) self.cache_headers = cache_headers self.page_size = page_size - self._metadata = metadata + self._inspect = inspect_data + self.metadata = metadata - def metadata(self): - if self._metadata: - return self._metadata - metadata = {} - for filename in self.files: - path = Path(filename) - name = path.stem - if name in metadata: - raise Exception('Multiple files with same stem %s' % name) - # Calculate hash, efficiently - m = hashlib.sha256() - with path.open('rb') as fp: - while True: - data = fp.read(HASH_BLOCK_SIZE) - if not data: - break - m.update(data) - # List tables and their row counts - tables = {} - with sqlite3.connect('file:{}?immutable=1'.format(path), uri=True) as conn: - conn.row_factory = sqlite3.Row - table_names = [ - r['name'] - for r in conn.execute('select * from sqlite_master where type="table"') - ] - for table in table_names: - tables[table] = conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0] + def inspect(self): + if not self._inspect: + self._inspect = {} + for filename in self.files: + path = Path(filename) + name = path.stem + if name in self._inspect: + raise Exception('Multiple files with same stem %s' % name) + # Calculate hash, efficiently + m = hashlib.sha256() + with path.open('rb') as fp: + while True: + data = fp.read(HASH_BLOCK_SIZE) + if not data: + break + m.update(data) + # List tables and their row counts + tables = {} + with sqlite3.connect('file:{}?immutable=1'.format(path), uri=True) as conn: + conn.row_factory = sqlite3.Row + table_names = [ + r['name'] + for r in conn.execute('select * from sqlite_master where type="table"') + ] + for table in table_names: + tables[table] = conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0] - metadata[name] = { - 'hash': m.hexdigest(), - 'file': str(path), - 'tables': tables, - } - self._metadata = metadata - return metadata + self._inspect[name] = { + 'hash': m.hexdigest(), + 'file': str(path), + 'tables': tables, + } + return self._inspect def app(self): app = Sanic(__name__) diff --git a/datasette/cli.py b/datasette/cli.py index 4e6fc888..2d1e7539 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -19,16 +19,23 @@ def cli(): @cli.command() @click.argument('files', type=click.Path(exists=True), nargs=-1) -@click.option('-m', '--metadata', default='metadata.json') -def build_metadata(files, metadata): +@click.option('--inspect-file', default='inspect-data.json') +def build(files, inspect_file): app = Datasette(files) - open(metadata, 'w').write(json.dumps(app.metadata(), indent=2)) + open(inspect_file, 'w').write(json.dumps(app.inspect(), indent=2)) @cli.command() @click.argument('files', type=click.Path(exists=True), nargs=-1) -@click.option('-n', '--name', default='datasette') -def publish(files, name): +@click.option( + '-n', '--name', default='datasette', + help='Application name to use when deploying to Now' +) +@click.option( + '-m', '--metadata', type=click.File(mode='r'), + help='Path to JSON file containing metadata to publish' +) +def publish(files, name, metadata): if not shutil.which('now'): click.secho( ' The publish command requires "now" to be installed and configured ', @@ -50,9 +57,11 @@ def publish(files, name): ] file_names = [os.path.split(f)[-1] for f in files] try: - dockerfile = make_dockerfile(file_names) + dockerfile = make_dockerfile(file_names, metadata and 'metadata.json') os.chdir(datasette_dir) open('Dockerfile', 'w').write(dockerfile) + if metadata: + open('metadata.json', 'w').write(metadata.read()) for path, filename in zip(file_paths, file_names): os.link(path, os.path.join(datasette_dir, filename)) call('now') @@ -67,20 +76,27 @@ def publish(files, name): @click.option('-p', '--port', default=8001) @click.option('--debug', is_flag=True) @click.option('--reload', is_flag=True) -@click.option('-m', '--metadata') -def serve(files, host, port, debug, reload, metadata): +@click.option('--inspect-file') +@click.option('-m', '--metadata', type=click.File(mode='r')) +def serve(files, host, port, debug, reload, inspect_file, metadata): """Serve up specified database files with a web UI""" if reload: import hupper hupper.start_reloader('datasette.cli.serve') + inspect_data = None + if inspect_file: + inspect_data = json.load(open(inspect_file)) + + metadata_data = None if metadata: - metadata = json.load(open(metadata)) + metadata_data = json.loads(metadata.read()) click.echo('Serve! files={} on port {}'.format(files, port)) app = Datasette( files, cache_headers=not debug and not reload, - metadata=metadata, + inspect_data=inspect_data, + metadata=metadata_data, ).app() app.run(host=host, port=port, debug=debug) diff --git a/datasette/templates/base.html b/datasette/templates/base.html index 901c92d9..d779f12c 100644 --- a/datasette/templates/base.html +++ b/datasette/templates/base.html @@ -14,6 +14,20 @@
Powered by Datasette {% if query_ms %}· Query took {{ query_ms|round(3) }}ms{% endif %} + {% if metadata.license %}· Data license: + {% if metadata.license_url %} + {{ metadata.license }} + {% else %} + {{ metadata.license }} + {% endif %} + {% endif %} + {% if metadata.source_url %}· + {% if metadata.source %} + Data source: {{ metadata.source }} + {% else %} + Data source + {% endif %} + {% endif %}
diff --git a/datasette/templates/index.html b/datasette/templates/index.html index beda106b..1cc038da 100644 --- a/datasette/templates/index.html +++ b/datasette/templates/index.html @@ -1,9 +1,28 @@ {% extends "base.html" %} -{% block title %}Datasette: {% for database in databases %}{{ database.name }}{% if not loop.last %}, {% endif %}{% endfor %}{% endblock %} +{% block title %}{{ metadata.title or "Datasette" }}: {% for database in databases %}{{ database.name }}{% if not loop.last %}, {% endif %}{% endfor %}{% endblock %} {% block content %} -

Datasette

+

{{ metadata.title or "Datasette" }}

+{% if metadata.license or metadata.source_url %} +

+ {% if metadata.license %}Data license: + {% if metadata.license_url %} + {{ metadata.license }} + {% else %} + {{ metadata.license }} + {% endif %} + {% endif %} + {% if metadata.source_url %}· + {% if metadata.source %} + Data source: {{ metadata.source }} + {% else %} + Data source + {% endif %} + {% endif %} +

+{% endif %} + {% for database in databases %}

{{ database.name }}

{{ "{:,}".format(database.table_rows) }} rows in {{ database.tables_count }} table{% if database.tables_count != 1 %}s{% endif %}

diff --git a/datasette/utils.py b/datasette/utils.py index ae8ad3a7..a520843d 100644 --- a/datasette/utils.py +++ b/datasette/utils.py @@ -133,15 +133,16 @@ def escape_sqlite_table_name(s): return '[{}]'.format(s) -def make_dockerfile(files): +def make_dockerfile(files, metadata_file): return ''' FROM python:3 COPY . /app WORKDIR /app -RUN pip install https://static.simonwillison.net/static/2017/datasette-0.5-py3-none-any.whl -RUN datasette build_metadata {} --metadata metadata.json +RUN pip install https://static.simonwillison.net/static/2017/datasette-0.6-py3-none-any.whl +RUN datasette build {} --inspect-file inspect-data.json EXPOSE 8006 -CMD ["datasette", "serve", {}, "--port", "8006", "--metadata", "metadata.json"]'''.format( +CMD ["datasette", "serve", {}, "--port", "8006", "--inspect-file", "inspect-data.json"{}]'''.format( ' '.join(files), '"' + '", "'.join(files) + '"', + metadata_file and ', "--metadata", "{}"'.format(metadata_file) or '', ).strip() diff --git a/setup.py b/setup.py index e2e64999..8894b2b8 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup( name='datasette', - version='0.5', + version='0.6', packages=find_packages(), package_data={'datasette': ['templates/*.html']}, include_package_data=True,