kopia lustrzana https://github.com/simonw/datasette
serve and publish commands now take a --metadata option
If provided, the --metadata option is the path to a JSON file containing metadata that should be displayed alongside the dataset. datasette /tmp/fivethirtyeight.db --metadata /tmp/metadata.json Currently that metadata format looks like this: { "title": "Five Thirty Eight", "license": "CC Attribution 4.0 License", "license_url": "http://creativecommons.org/licenses/by/4.0/", "source": "fivethirtyeight/data on GitHub", "source_url": "https://github.com/fivethirtyeight/data" } If provided, this will be used by the index template and to populate the common footer. The publish command also accepts this argument, and will package any provided metadata up and include it with the resulting Docker container. datasette publish --metadata /tmp/metadata.json /tmp/fivethirtyeight.db Closes #68pull/81/head
rodzic
ff2fb573cd
commit
3ef35ca8b4
|
@ -73,7 +73,7 @@ class BaseView(HTTPMethodView):
|
|||
return [str(r[1]) for r in rows]
|
||||
|
||||
def resolve_db_name(self, db_name, **kwargs):
|
||||
databases = self.ds.metadata()
|
||||
databases = self.ds.inspect()
|
||||
hash = None
|
||||
name = None
|
||||
if '-' in db_name:
|
||||
|
@ -110,7 +110,7 @@ class BaseView(HTTPMethodView):
|
|||
def sql_operation_in_thread():
|
||||
conn = getattr(connections, db_name, None)
|
||||
if not conn:
|
||||
info = self.ds.metadata()[db_name]
|
||||
info = self.ds.inspect()[db_name]
|
||||
conn = sqlite3.connect(
|
||||
'file:{}?immutable=1'.format(info['file']),
|
||||
uri=True,
|
||||
|
@ -192,6 +192,7 @@ class BaseView(HTTPMethodView):
|
|||
), **{
|
||||
'url_json': path_with_ext(request, '.json'),
|
||||
'url_jsono': path_with_ext(request, '.jsono'),
|
||||
'metadata': self.ds.metadata,
|
||||
}}
|
||||
r = self.jinja.render(
|
||||
template,
|
||||
|
@ -216,7 +217,7 @@ class IndexView(HTTPMethodView):
|
|||
|
||||
async def get(self, request, as_json):
|
||||
databases = []
|
||||
for key, info in sorted(self.ds.metadata().items()):
|
||||
for key, info in sorted(self.ds.inspect().items()):
|
||||
database = {
|
||||
'name': key,
|
||||
'hash': info['hash'],
|
||||
|
@ -247,6 +248,7 @@ class IndexView(HTTPMethodView):
|
|||
'index.html',
|
||||
request,
|
||||
databases=databases,
|
||||
metadata=self.ds.metadata,
|
||||
)
|
||||
|
||||
|
||||
|
@ -261,8 +263,8 @@ class DatabaseView(BaseView):
|
|||
if request.args.get('sql'):
|
||||
return await self.custom_sql(request, name, hash)
|
||||
tables = []
|
||||
table_metadata = self.ds.metadata()[name]['tables']
|
||||
for table_name, table_rows in table_metadata.items():
|
||||
table_inspect = self.ds.inspect()[name]['tables']
|
||||
for table_name, table_rows in table_inspect.items():
|
||||
rows = await self.execute(
|
||||
name,
|
||||
'PRAGMA table_info([{}]);'.format(table_name)
|
||||
|
@ -304,7 +306,7 @@ class DatabaseView(BaseView):
|
|||
|
||||
class DatabaseDownload(BaseView):
|
||||
async def view_get(self, request, name, hash, **kwargs):
|
||||
filepath = self.ds.metadata()[name]['file']
|
||||
filepath = self.ds.inspect()[name]['file']
|
||||
return await response.file_stream(
|
||||
filepath, headers={
|
||||
'Content-Disposition': 'attachment; filename="{}"'.format(filepath)
|
||||
|
@ -399,7 +401,7 @@ class TableView(BaseView):
|
|||
if use_rowid:
|
||||
display_columns = display_columns[1:]
|
||||
rows = list(rows)
|
||||
info = self.ds.metadata()
|
||||
info = self.ds.inspect()
|
||||
table_rows = info[name]['tables'].get(table)
|
||||
after = None
|
||||
after_link = None
|
||||
|
@ -471,7 +473,7 @@ class RowView(BaseView):
|
|||
|
||||
|
||||
class Datasette:
|
||||
def __init__(self, files, num_threads=3, cache_headers=True, page_size=50, metadata=None):
|
||||
def __init__(self, files, num_threads=3, cache_headers=True, page_size=50, inspect_data=None, metadata=None):
|
||||
self.files = files
|
||||
self.num_threads = num_threads
|
||||
self.executor = futures.ThreadPoolExecutor(
|
||||
|
@ -479,43 +481,42 @@ class Datasette:
|
|||
)
|
||||
self.cache_headers = cache_headers
|
||||
self.page_size = page_size
|
||||
self._metadata = metadata
|
||||
self._inspect = inspect_data
|
||||
self.metadata = metadata
|
||||
|
||||
def metadata(self):
|
||||
if self._metadata:
|
||||
return self._metadata
|
||||
metadata = {}
|
||||
for filename in self.files:
|
||||
path = Path(filename)
|
||||
name = path.stem
|
||||
if name in metadata:
|
||||
raise Exception('Multiple files with same stem %s' % name)
|
||||
# Calculate hash, efficiently
|
||||
m = hashlib.sha256()
|
||||
with path.open('rb') as fp:
|
||||
while True:
|
||||
data = fp.read(HASH_BLOCK_SIZE)
|
||||
if not data:
|
||||
break
|
||||
m.update(data)
|
||||
# List tables and their row counts
|
||||
tables = {}
|
||||
with sqlite3.connect('file:{}?immutable=1'.format(path), uri=True) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
table_names = [
|
||||
r['name']
|
||||
for r in conn.execute('select * from sqlite_master where type="table"')
|
||||
]
|
||||
for table in table_names:
|
||||
tables[table] = conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0]
|
||||
def inspect(self):
|
||||
if not self._inspect:
|
||||
self._inspect = {}
|
||||
for filename in self.files:
|
||||
path = Path(filename)
|
||||
name = path.stem
|
||||
if name in self._inspect:
|
||||
raise Exception('Multiple files with same stem %s' % name)
|
||||
# Calculate hash, efficiently
|
||||
m = hashlib.sha256()
|
||||
with path.open('rb') as fp:
|
||||
while True:
|
||||
data = fp.read(HASH_BLOCK_SIZE)
|
||||
if not data:
|
||||
break
|
||||
m.update(data)
|
||||
# List tables and their row counts
|
||||
tables = {}
|
||||
with sqlite3.connect('file:{}?immutable=1'.format(path), uri=True) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
table_names = [
|
||||
r['name']
|
||||
for r in conn.execute('select * from sqlite_master where type="table"')
|
||||
]
|
||||
for table in table_names:
|
||||
tables[table] = conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0]
|
||||
|
||||
metadata[name] = {
|
||||
'hash': m.hexdigest(),
|
||||
'file': str(path),
|
||||
'tables': tables,
|
||||
}
|
||||
self._metadata = metadata
|
||||
return metadata
|
||||
self._inspect[name] = {
|
||||
'hash': m.hexdigest(),
|
||||
'file': str(path),
|
||||
'tables': tables,
|
||||
}
|
||||
return self._inspect
|
||||
|
||||
def app(self):
|
||||
app = Sanic(__name__)
|
||||
|
|
|
@ -19,16 +19,23 @@ def cli():
|
|||
|
||||
@cli.command()
|
||||
@click.argument('files', type=click.Path(exists=True), nargs=-1)
|
||||
@click.option('-m', '--metadata', default='metadata.json')
|
||||
def build_metadata(files, metadata):
|
||||
@click.option('--inspect-file', default='inspect-data.json')
|
||||
def build(files, inspect_file):
|
||||
app = Datasette(files)
|
||||
open(metadata, 'w').write(json.dumps(app.metadata(), indent=2))
|
||||
open(inspect_file, 'w').write(json.dumps(app.inspect(), indent=2))
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument('files', type=click.Path(exists=True), nargs=-1)
|
||||
@click.option('-n', '--name', default='datasette')
|
||||
def publish(files, name):
|
||||
@click.option(
|
||||
'-n', '--name', default='datasette',
|
||||
help='Application name to use when deploying to Now'
|
||||
)
|
||||
@click.option(
|
||||
'-m', '--metadata', type=click.File(mode='r'),
|
||||
help='Path to JSON file containing metadata to publish'
|
||||
)
|
||||
def publish(files, name, metadata):
|
||||
if not shutil.which('now'):
|
||||
click.secho(
|
||||
' The publish command requires "now" to be installed and configured ',
|
||||
|
@ -50,9 +57,11 @@ def publish(files, name):
|
|||
]
|
||||
file_names = [os.path.split(f)[-1] for f in files]
|
||||
try:
|
||||
dockerfile = make_dockerfile(file_names)
|
||||
dockerfile = make_dockerfile(file_names, metadata and 'metadata.json')
|
||||
os.chdir(datasette_dir)
|
||||
open('Dockerfile', 'w').write(dockerfile)
|
||||
if metadata:
|
||||
open('metadata.json', 'w').write(metadata.read())
|
||||
for path, filename in zip(file_paths, file_names):
|
||||
os.link(path, os.path.join(datasette_dir, filename))
|
||||
call('now')
|
||||
|
@ -67,20 +76,27 @@ def publish(files, name):
|
|||
@click.option('-p', '--port', default=8001)
|
||||
@click.option('--debug', is_flag=True)
|
||||
@click.option('--reload', is_flag=True)
|
||||
@click.option('-m', '--metadata')
|
||||
def serve(files, host, port, debug, reload, metadata):
|
||||
@click.option('--inspect-file')
|
||||
@click.option('-m', '--metadata', type=click.File(mode='r'))
|
||||
def serve(files, host, port, debug, reload, inspect_file, metadata):
|
||||
"""Serve up specified database files with a web UI"""
|
||||
if reload:
|
||||
import hupper
|
||||
hupper.start_reloader('datasette.cli.serve')
|
||||
|
||||
inspect_data = None
|
||||
if inspect_file:
|
||||
inspect_data = json.load(open(inspect_file))
|
||||
|
||||
metadata_data = None
|
||||
if metadata:
|
||||
metadata = json.load(open(metadata))
|
||||
metadata_data = json.loads(metadata.read())
|
||||
|
||||
click.echo('Serve! files={} on port {}'.format(files, port))
|
||||
app = Datasette(
|
||||
files,
|
||||
cache_headers=not debug and not reload,
|
||||
metadata=metadata,
|
||||
inspect_data=inspect_data,
|
||||
metadata=metadata_data,
|
||||
).app()
|
||||
app.run(host=host, port=port, debug=debug)
|
||||
|
|
|
@ -14,6 +14,20 @@
|
|||
<div class="ft">
|
||||
Powered by <a href="https://github.com/simonw/datasette">Datasette</a>
|
||||
{% if query_ms %}· Query took {{ query_ms|round(3) }}ms{% endif %}
|
||||
{% if metadata.license %}· Data license:
|
||||
{% if metadata.license_url %}
|
||||
<a href="{{ metadata.license_url }}">{{ metadata.license }}</a>
|
||||
{% else %}
|
||||
{{ metadata.license }}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% if metadata.source_url %}·
|
||||
{% if metadata.source %}
|
||||
Data source: <a href="{{ metadata.source_url }}">{{ metadata.source }}</a>
|
||||
{% else %}
|
||||
<a href="{{ metadata.source_url }}">Data source</a>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
</body>
|
||||
|
|
|
@ -1,9 +1,28 @@
|
|||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Datasette: {% for database in databases %}{{ database.name }}{% if not loop.last %}, {% endif %}{% endfor %}{% endblock %}
|
||||
{% block title %}{{ metadata.title or "Datasette" }}: {% for database in databases %}{{ database.name }}{% if not loop.last %}, {% endif %}{% endfor %}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<h1>Datasette</h1>
|
||||
<h1>{{ metadata.title or "Datasette" }}</h1>
|
||||
{% if metadata.license or metadata.source_url %}
|
||||
<p>
|
||||
{% if metadata.license %}Data license:
|
||||
{% if metadata.license_url %}
|
||||
<a href="{{ metadata.license_url }}">{{ metadata.license }}</a>
|
||||
{% else %}
|
||||
{{ metadata.license }}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% if metadata.source_url %}·
|
||||
{% if metadata.source %}
|
||||
Data source: <a href="{{ metadata.source_url }}">{{ metadata.source }}</a>
|
||||
{% else %}
|
||||
<a href="{{ metadata.source_url }}">Data source</a>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
{% for database in databases %}
|
||||
<h2 style="padding-left: 10px; border-left: 10px solid #{{ database.hash[:6] }}"><a href="{{ database.path }}">{{ database.name }}</a></h2>
|
||||
<p>{{ "{:,}".format(database.table_rows) }} rows in {{ database.tables_count }} table{% if database.tables_count != 1 %}s{% endif %}</p>
|
||||
|
|
|
@ -133,15 +133,16 @@ def escape_sqlite_table_name(s):
|
|||
return '[{}]'.format(s)
|
||||
|
||||
|
||||
def make_dockerfile(files):
|
||||
def make_dockerfile(files, metadata_file):
|
||||
return '''
|
||||
FROM python:3
|
||||
COPY . /app
|
||||
WORKDIR /app
|
||||
RUN pip install https://static.simonwillison.net/static/2017/datasette-0.5-py3-none-any.whl
|
||||
RUN datasette build_metadata {} --metadata metadata.json
|
||||
RUN pip install https://static.simonwillison.net/static/2017/datasette-0.6-py3-none-any.whl
|
||||
RUN datasette build {} --inspect-file inspect-data.json
|
||||
EXPOSE 8006
|
||||
CMD ["datasette", "serve", {}, "--port", "8006", "--metadata", "metadata.json"]'''.format(
|
||||
CMD ["datasette", "serve", {}, "--port", "8006", "--inspect-file", "inspect-data.json"{}]'''.format(
|
||||
' '.join(files),
|
||||
'"' + '", "'.join(files) + '"',
|
||||
metadata_file and ', "--metadata", "{}"'.format(metadata_file) or '',
|
||||
).strip()
|
||||
|
|
2
setup.py
2
setup.py
|
@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|||
|
||||
setup(
|
||||
name='datasette',
|
||||
version='0.5',
|
||||
version='0.6',
|
||||
packages=find_packages(),
|
||||
package_data={'datasette': ['templates/*.html']},
|
||||
include_package_data=True,
|
||||
|
|
Ładowanie…
Reference in New Issue