serve and publish commands now take a --metadata option

If provided, the --metadata option is the path to a JSON file containing
metadata that should be displayed alongside the dataset.

    datasette /tmp/fivethirtyeight.db --metadata /tmp/metadata.json

Currently that metadata format looks like this:

    {
        "title": "Five Thirty Eight",
        "license": "CC Attribution 4.0 License",
        "license_url": "http://creativecommons.org/licenses/by/4.0/",
        "source": "fivethirtyeight/data on GitHub",
        "source_url": "https://github.com/fivethirtyeight/data"
    }

If provided, this will be used by the index template and to populate the
common footer.

The publish command also accepts this argument, and will package any provided
metadata up and include it with the resulting Docker container.

    datasette publish --metadata /tmp/metadata.json /tmp/fivethirtyeight.db

Closes #68
pull/81/head
Simon Willison 2017-11-13 07:20:02 -08:00
rodzic ff2fb573cd
commit 3ef35ca8b4
6 zmienionych plików z 111 dodań i 60 usunięć

Wyświetl plik

@ -73,7 +73,7 @@ class BaseView(HTTPMethodView):
return [str(r[1]) for r in rows]
def resolve_db_name(self, db_name, **kwargs):
databases = self.ds.metadata()
databases = self.ds.inspect()
hash = None
name = None
if '-' in db_name:
@ -110,7 +110,7 @@ class BaseView(HTTPMethodView):
def sql_operation_in_thread():
conn = getattr(connections, db_name, None)
if not conn:
info = self.ds.metadata()[db_name]
info = self.ds.inspect()[db_name]
conn = sqlite3.connect(
'file:{}?immutable=1'.format(info['file']),
uri=True,
@ -192,6 +192,7 @@ class BaseView(HTTPMethodView):
), **{
'url_json': path_with_ext(request, '.json'),
'url_jsono': path_with_ext(request, '.jsono'),
'metadata': self.ds.metadata,
}}
r = self.jinja.render(
template,
@ -216,7 +217,7 @@ class IndexView(HTTPMethodView):
async def get(self, request, as_json):
databases = []
for key, info in sorted(self.ds.metadata().items()):
for key, info in sorted(self.ds.inspect().items()):
database = {
'name': key,
'hash': info['hash'],
@ -247,6 +248,7 @@ class IndexView(HTTPMethodView):
'index.html',
request,
databases=databases,
metadata=self.ds.metadata,
)
@ -261,8 +263,8 @@ class DatabaseView(BaseView):
if request.args.get('sql'):
return await self.custom_sql(request, name, hash)
tables = []
table_metadata = self.ds.metadata()[name]['tables']
for table_name, table_rows in table_metadata.items():
table_inspect = self.ds.inspect()[name]['tables']
for table_name, table_rows in table_inspect.items():
rows = await self.execute(
name,
'PRAGMA table_info([{}]);'.format(table_name)
@ -304,7 +306,7 @@ class DatabaseView(BaseView):
class DatabaseDownload(BaseView):
async def view_get(self, request, name, hash, **kwargs):
filepath = self.ds.metadata()[name]['file']
filepath = self.ds.inspect()[name]['file']
return await response.file_stream(
filepath, headers={
'Content-Disposition': 'attachment; filename="{}"'.format(filepath)
@ -399,7 +401,7 @@ class TableView(BaseView):
if use_rowid:
display_columns = display_columns[1:]
rows = list(rows)
info = self.ds.metadata()
info = self.ds.inspect()
table_rows = info[name]['tables'].get(table)
after = None
after_link = None
@ -471,7 +473,7 @@ class RowView(BaseView):
class Datasette:
def __init__(self, files, num_threads=3, cache_headers=True, page_size=50, metadata=None):
def __init__(self, files, num_threads=3, cache_headers=True, page_size=50, inspect_data=None, metadata=None):
self.files = files
self.num_threads = num_threads
self.executor = futures.ThreadPoolExecutor(
@ -479,43 +481,42 @@ class Datasette:
)
self.cache_headers = cache_headers
self.page_size = page_size
self._metadata = metadata
self._inspect = inspect_data
self.metadata = metadata
def metadata(self):
if self._metadata:
return self._metadata
metadata = {}
for filename in self.files:
path = Path(filename)
name = path.stem
if name in metadata:
raise Exception('Multiple files with same stem %s' % name)
# Calculate hash, efficiently
m = hashlib.sha256()
with path.open('rb') as fp:
while True:
data = fp.read(HASH_BLOCK_SIZE)
if not data:
break
m.update(data)
# List tables and their row counts
tables = {}
with sqlite3.connect('file:{}?immutable=1'.format(path), uri=True) as conn:
conn.row_factory = sqlite3.Row
table_names = [
r['name']
for r in conn.execute('select * from sqlite_master where type="table"')
]
for table in table_names:
tables[table] = conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0]
def inspect(self):
if not self._inspect:
self._inspect = {}
for filename in self.files:
path = Path(filename)
name = path.stem
if name in self._inspect:
raise Exception('Multiple files with same stem %s' % name)
# Calculate hash, efficiently
m = hashlib.sha256()
with path.open('rb') as fp:
while True:
data = fp.read(HASH_BLOCK_SIZE)
if not data:
break
m.update(data)
# List tables and their row counts
tables = {}
with sqlite3.connect('file:{}?immutable=1'.format(path), uri=True) as conn:
conn.row_factory = sqlite3.Row
table_names = [
r['name']
for r in conn.execute('select * from sqlite_master where type="table"')
]
for table in table_names:
tables[table] = conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0]
metadata[name] = {
'hash': m.hexdigest(),
'file': str(path),
'tables': tables,
}
self._metadata = metadata
return metadata
self._inspect[name] = {
'hash': m.hexdigest(),
'file': str(path),
'tables': tables,
}
return self._inspect
def app(self):
app = Sanic(__name__)

Wyświetl plik

@ -19,16 +19,23 @@ def cli():
@cli.command()
@click.argument('files', type=click.Path(exists=True), nargs=-1)
@click.option('-m', '--metadata', default='metadata.json')
def build_metadata(files, metadata):
@click.option('--inspect-file', default='inspect-data.json')
def build(files, inspect_file):
app = Datasette(files)
open(metadata, 'w').write(json.dumps(app.metadata(), indent=2))
open(inspect_file, 'w').write(json.dumps(app.inspect(), indent=2))
@cli.command()
@click.argument('files', type=click.Path(exists=True), nargs=-1)
@click.option('-n', '--name', default='datasette')
def publish(files, name):
@click.option(
'-n', '--name', default='datasette',
help='Application name to use when deploying to Now'
)
@click.option(
'-m', '--metadata', type=click.File(mode='r'),
help='Path to JSON file containing metadata to publish'
)
def publish(files, name, metadata):
if not shutil.which('now'):
click.secho(
' The publish command requires "now" to be installed and configured ',
@ -50,9 +57,11 @@ def publish(files, name):
]
file_names = [os.path.split(f)[-1] for f in files]
try:
dockerfile = make_dockerfile(file_names)
dockerfile = make_dockerfile(file_names, metadata and 'metadata.json')
os.chdir(datasette_dir)
open('Dockerfile', 'w').write(dockerfile)
if metadata:
open('metadata.json', 'w').write(metadata.read())
for path, filename in zip(file_paths, file_names):
os.link(path, os.path.join(datasette_dir, filename))
call('now')
@ -67,20 +76,27 @@ def publish(files, name):
@click.option('-p', '--port', default=8001)
@click.option('--debug', is_flag=True)
@click.option('--reload', is_flag=True)
@click.option('-m', '--metadata')
def serve(files, host, port, debug, reload, metadata):
@click.option('--inspect-file')
@click.option('-m', '--metadata', type=click.File(mode='r'))
def serve(files, host, port, debug, reload, inspect_file, metadata):
"""Serve up specified database files with a web UI"""
if reload:
import hupper
hupper.start_reloader('datasette.cli.serve')
inspect_data = None
if inspect_file:
inspect_data = json.load(open(inspect_file))
metadata_data = None
if metadata:
metadata = json.load(open(metadata))
metadata_data = json.loads(metadata.read())
click.echo('Serve! files={} on port {}'.format(files, port))
app = Datasette(
files,
cache_headers=not debug and not reload,
metadata=metadata,
inspect_data=inspect_data,
metadata=metadata_data,
).app()
app.run(host=host, port=port, debug=debug)

Wyświetl plik

@ -14,6 +14,20 @@
<div class="ft">
Powered by <a href="https://github.com/simonw/datasette">Datasette</a>
{% if query_ms %}&middot; Query took {{ query_ms|round(3) }}ms{% endif %}
{% if metadata.license %}&middot; Data license:
{% if metadata.license_url %}
<a href="{{ metadata.license_url }}">{{ metadata.license }}</a>
{% else %}
{{ metadata.license }}
{% endif %}
{% endif %}
{% if metadata.source_url %}&middot;
{% if metadata.source %}
Data source: <a href="{{ metadata.source_url }}">{{ metadata.source }}</a>
{% else %}
<a href="{{ metadata.source_url }}">Data source</a>
{% endif %}
{% endif %}
</div>
</body>

Wyświetl plik

@ -1,9 +1,28 @@
{% extends "base.html" %}
{% block title %}Datasette: {% for database in databases %}{{ database.name }}{% if not loop.last %}, {% endif %}{% endfor %}{% endblock %}
{% block title %}{{ metadata.title or "Datasette" }}: {% for database in databases %}{{ database.name }}{% if not loop.last %}, {% endif %}{% endfor %}{% endblock %}
{% block content %}
<h1>Datasette</h1>
<h1>{{ metadata.title or "Datasette" }}</h1>
{% if metadata.license or metadata.source_url %}
<p>
{% if metadata.license %}Data license:
{% if metadata.license_url %}
<a href="{{ metadata.license_url }}">{{ metadata.license }}</a>
{% else %}
{{ metadata.license }}
{% endif %}
{% endif %}
{% if metadata.source_url %}&middot;
{% if metadata.source %}
Data source: <a href="{{ metadata.source_url }}">{{ metadata.source }}</a>
{% else %}
<a href="{{ metadata.source_url }}">Data source</a>
{% endif %}
{% endif %}
</p>
{% endif %}
{% for database in databases %}
<h2 style="padding-left: 10px; border-left: 10px solid #{{ database.hash[:6] }}"><a href="{{ database.path }}">{{ database.name }}</a></h2>
<p>{{ "{:,}".format(database.table_rows) }} rows in {{ database.tables_count }} table{% if database.tables_count != 1 %}s{% endif %}</p>

Wyświetl plik

@ -133,15 +133,16 @@ def escape_sqlite_table_name(s):
return '[{}]'.format(s)
def make_dockerfile(files):
def make_dockerfile(files, metadata_file):
return '''
FROM python:3
COPY . /app
WORKDIR /app
RUN pip install https://static.simonwillison.net/static/2017/datasette-0.5-py3-none-any.whl
RUN datasette build_metadata {} --metadata metadata.json
RUN pip install https://static.simonwillison.net/static/2017/datasette-0.6-py3-none-any.whl
RUN datasette build {} --inspect-file inspect-data.json
EXPOSE 8006
CMD ["datasette", "serve", {}, "--port", "8006", "--metadata", "metadata.json"]'''.format(
CMD ["datasette", "serve", {}, "--port", "8006", "--inspect-file", "inspect-data.json"{}]'''.format(
' '.join(files),
'"' + '", "'.join(files) + '"',
metadata_file and ', "--metadata", "{}"'.format(metadata_file) or '',
).strip()

Wyświetl plik

@ -2,7 +2,7 @@ from setuptools import setup, find_packages
setup(
name='datasette',
version='0.5',
version='0.6',
packages=find_packages(),
package_data={'datasette': ['templates/*.html']},
include_package_data=True,