Merge branch 'master' into publish-heroku

pull/104/head
Jacob Kaplan-Moss 2017-11-17 13:36:50 -08:00
commit 54d58ef690
23 zmienionych plików z 945 dodań i 66 usunięć

13
.dockerignore 100644
Wyświetl plik

@ -0,0 +1,13 @@
.DS_Store
.cache
.eggs
.git
.gitignore
.ipynb_checkpoints
.travis.yml
build
*.spec
*.egg-info
dist
scratchpad
venv

2
.gitignore vendored
Wyświetl plik

@ -1,6 +1,8 @@
build-metadata.json
datasets.json
scratchpad
# SQLite databases
*.db
*.sqlite

Wyświetl plik

@ -1,12 +1,21 @@
FROM python:3.6 as build
FROM python:3.6-slim-stretch as build
ARG VERSION=0.11
RUN pip install datasette==$VERSION
# Setup build dependencies
RUN apt update
RUN apt install -y python3-dev gcc libsqlite3-mod-spatialite
# Add local code to the image instead of fetching from pypi.
ADD . /datasette
FROM python:3.6-slim
RUN pip install /datasette
FROM python:3.6-slim-stretch
# Copy python dependencies
COPY --from=build /usr/local/lib/python3.6/site-packages /usr/local/lib/python3.6/site-packages
# Copy executables
COPY --from=build /usr/local/bin /usr/local/bin
# Copy spatial extensions
COPY --from=build /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu
EXPOSE 8001
CMD ["datasette"]

Wyświetl plik

@ -2,6 +2,7 @@
[![PyPI](https://img.shields.io/pypi/v/datasette.svg)](https://pypi.python.org/pypi/datasette)
[![Travis CI](https://travis-ci.org/simonw/datasette.svg?branch=master)](https://travis-ci.org/simonw/datasette)
[![Documentation Status](https://readthedocs.org/projects/datasette/badge/?version=latest)](http://datasette.readthedocs.io/en/latest/?badge=latest)
*An instant JSON API for your SQLite databases*
@ -95,6 +96,7 @@ http://localhost:8001/History/downloads.jsono will return that data as JSON in a
--max_returned_rows INTEGER Max allowed rows to return at once - default is
1000. Set to 0 to disable check entirely.
--sql_time_limit_ms INTEGER Max time allowed for SQL queries in ms
--load-extension TEXT Path to a SQLite extension to load
--inspect-file TEXT Path to JSON file created using "datasette
build"
-m, --metadata FILENAME Path to JSON file containing license/source

Wyświetl plik

@ -0,0 +1 @@
from datasette.version import __version_info__, __version__ # noqa

Wyświetl plik

@ -4,6 +4,7 @@ from sanic.exceptions import NotFound
from sanic.views import HTTPMethodView
from sanic_jinja2 import SanicJinja2
from jinja2 import FileSystemLoader
import re
import sqlite3
from pathlib import Path
from concurrent import futures
@ -11,21 +12,24 @@ import asyncio
import threading
import urllib.parse
import json
import jinja2
import hashlib
import time
from .utils import (
build_where_clauses,
compound_pks_from_path,
CustomJSONEncoder,
escape_css_string,
escape_sqlite_table_name,
get_all_foreign_keys,
InvalidSql,
path_from_row_pks,
path_with_added_args,
path_with_ext,
compound_pks_from_path,
sqlite_timelimit,
validate_sql_select,
)
from .version import __version__
app_root = Path(__file__).parent.parent
@ -113,6 +117,10 @@ class BaseView(HTTPMethodView):
conn.text_factory = lambda x: str(x, 'utf-8', 'replace')
for name, num_args, func in self.ds.sqlite_functions:
conn.create_function(name, num_args, func)
if self.ds.sqlite_extensions:
conn.enable_load_extension(True)
for extension in self.ds.sqlite_extensions:
conn.execute("SELECT load_extension('{}')".format(extension))
async def execute(self, db_name, sql, params=None, truncate=False, custom_time_limit=None):
"""Executes sql against db_name in a thread"""
@ -221,6 +229,7 @@ class BaseView(HTTPMethodView):
'url_json': path_with_ext(request, '.json'),
'url_jsono': path_with_ext(request, '.jsono'),
'metadata': self.ds.metadata,
'datasette_version': __version__,
}}
r = self.jinja.render(
template,
@ -252,12 +261,12 @@ class IndexView(HTTPMethodView):
'path': '{}-{}'.format(key, info['hash'][:7]),
'tables_truncated': sorted(
info['tables'].items(),
key=lambda p: p[1],
key=lambda p: p[1]['count'],
reverse=True
)[:5],
'tables_count': len(info['tables'].items()),
'tables_more': len(info['tables'].items()) > 5,
'table_rows': sum(info['tables'].values()),
'table_rows': sum([t['count'] for t in info['tables'].values()]),
}
databases.append(database)
if as_json:
@ -277,6 +286,7 @@ class IndexView(HTTPMethodView):
request,
databases=databases,
metadata=self.ds.metadata,
datasette_version=__version__,
)
@ -286,13 +296,14 @@ async def favicon(request):
class DatabaseView(BaseView):
template = 'database.html'
re_named_parameter = re.compile(':([a-zA-Z0-0_]+)')
async def data(self, request, name, hash):
if request.args.get('sql'):
return await self.custom_sql(request, name, hash)
tables = []
table_inspect = self.ds.inspect()[name]['tables']
for table_name, table_rows in table_inspect.items():
for table_name, info in table_inspect.items():
rows = await self.execute(
name,
'PRAGMA table_info([{}]);'.format(table_name)
@ -300,7 +311,7 @@ class DatabaseView(BaseView):
tables.append({
'name': table_name,
'columns': [r[1] for r in rows],
'table_rows': table_rows,
'table_rows': info['count'],
})
tables.sort(key=lambda t: t['name'])
views = await self.execute(name, 'select name from sqlite_master where type = "view"')
@ -316,6 +327,19 @@ class DatabaseView(BaseView):
params = request.raw_args
sql = params.pop('sql')
validate_sql_select(sql)
# Extract any :named parameters
named_parameters = self.re_named_parameter.findall(sql)
named_parameter_values = {
named_parameter: params.get(named_parameter) or ''
for named_parameter in named_parameters
}
# Set to blank string if missing from params
for named_parameter in named_parameters:
if named_parameter not in params:
params[named_parameter] = ''
extra_args = {}
if params.get('_sql_time_limit_ms'):
extra_args['custom_time_limit'] = int(params['_sql_time_limit_ms'])
@ -335,6 +359,7 @@ class DatabaseView(BaseView):
}, {
'database_hash': hash,
'custom_sql': True,
'named_parameter_values': named_parameter_values,
}
@ -446,12 +471,16 @@ class TableView(BaseView):
)
columns = [r[0] for r in description]
display_columns = columns
if use_rowid:
display_columns = display_columns[1:]
rows = list(rows)
display_columns = columns
if not use_rowid and not is_view:
display_columns = ['Link'] + display_columns
info = self.ds.inspect()
table_rows = info[name]['tables'].get(table)
table_rows = None
if not is_view:
table_rows = info[name]['tables'][table]['count']
next_value = None
next_url = None
if len(rows) > self.page_size:
@ -462,6 +491,7 @@ class TableView(BaseView):
next_url = urllib.parse.urljoin(request.url, path_with_added_args(request, {
'_next': next_value,
}))
return {
'database': name,
'table': table,
@ -482,11 +512,47 @@ class TableView(BaseView):
}, lambda: {
'database_hash': hash,
'use_rowid': use_rowid,
'row_link': lambda row: path_from_row_pks(row, pks, use_rowid),
'display_columns': display_columns,
'display_rows': make_display_rows(name, hash, table, rows, display_columns, pks, is_view, use_rowid),
}
def make_display_rows(database, database_hash, table, rows, display_columns, pks, is_view, use_rowid):
for row in rows:
cells = []
# Unless we are a view, the first column is a link - either to the rowid
# or to the simple or compound primary key
if not is_view:
display_value = jinja2.Markup(
'<a href="/{database}-{database_hash}/{table}/{flat_pks}">{flat_pks}</a>'.format(
database=database,
database_hash=database_hash,
table=urllib.parse.quote_plus(table),
flat_pks=path_from_row_pks(row, pks, use_rowid),
)
)
cells.append({
'column': 'rowid' if use_rowid else 'Link',
'value': display_value,
})
for value, column in zip(row, display_columns):
if use_rowid and column == 'rowid':
# We already showed this in the linked first column
continue
if False: # TODO: This is where we will do foreign key linking
display_value = jinja2.Markup('<a href="#">{}</a>'.format('foreign key'))
elif value is None:
display_value = jinja2.Markup('&nbsp;')
else:
display_value = str(value)
cells.append({
'column': column,
'value': display_value,
})
yield cells
class RowView(BaseView):
template = 'row.html'
@ -524,7 +590,6 @@ class RowView(BaseView):
'primary_key_values': pk_values,
}, {
'database_hash': hash,
'row_link': None,
}
@ -532,7 +597,7 @@ class Datasette:
def __init__(
self, files, num_threads=3, cache_headers=True, page_size=100,
max_returned_rows=1000, sql_time_limit_ms=1000, cors=False,
inspect_data=None, metadata=None):
inspect_data=None, metadata=None, sqlite_extensions=None):
self.files = files
self.num_threads = num_threads
self.executor = futures.ThreadPoolExecutor(
@ -546,6 +611,7 @@ class Datasette:
self._inspect = inspect_data
self.metadata = metadata or {}
self.sqlite_functions = []
self.sqlite_extensions = sqlite_extensions or []
def inspect(self):
if not self._inspect:
@ -572,7 +638,13 @@ class Datasette:
for r in conn.execute('select * from sqlite_master where type="table"')
]
for table in table_names:
tables[table] = conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0]
tables[table] = {
'count': conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0],
}
foreign_keys = get_all_foreign_keys(conn)
for table, info in foreign_keys.items():
tables[table]['foreign_keys'] = info
self._inspect[name] = {
'hash': m.hexdigest(),
@ -587,7 +659,8 @@ class Datasette:
app,
loader=FileSystemLoader([
str(app_root / 'datasette' / 'templates')
])
]),
autoescape=True,
)
self.jinja.add_env('escape_css_string', escape_css_string, 'filters')
self.jinja.add_env('quote_plus', lambda u: urllib.parse.quote_plus(u), 'filters')

Wyświetl plik

@ -124,9 +124,13 @@ def package(files, tag, metadata, extra_options, **extra_metadata):
@click.option('--page_size', default=100, help='Page size - default is 100')
@click.option('--max_returned_rows', default=1000, help='Max allowed rows to return at once - default is 1000. Set to 0 to disable check entirely.')
@click.option('--sql_time_limit_ms', default=1000, help='Max time allowed for SQL queries in ms')
@click.option(
'sqlite_extensions', '--load-extension', envvar='SQLITE_EXTENSIONS', multiple=True,
type=click.Path(exists=True, resolve_path=True), help='Path to a SQLite extension to load'
)
@click.option('--inspect-file', help='Path to JSON file created using "datasette build"')
@click.option('-m', '--metadata', type=click.File(mode='r'), help='Path to JSON file containing license/source metadata')
def serve(files, host, port, debug, reload, cors, page_size, max_returned_rows, sql_time_limit_ms, inspect_file, metadata):
def serve(files, host, port, debug, reload, cors, page_size, max_returned_rows, sql_time_limit_ms, sqlite_extensions, inspect_file, metadata):
"""Serve up specified SQLite database files with a web UI"""
if reload:
import hupper
@ -150,6 +154,7 @@ def serve(files, host, port, debug, reload, cors, page_size, max_returned_rows,
sql_time_limit_ms=sql_time_limit_ms,
inspect_data=inspect_data,
metadata=metadata_data,
sqlite_extensions=sqlite_extensions,
)
# Force initial hashing/table counting
ds.inspect()

Wyświetl plik

@ -83,14 +83,24 @@ form.sql textarea {
font-family: monospace;
font-size: 1.3em;
}
form.sql label {
font-weight: bold;
display: inline-block;
width: 15%;
}
form.sql input[type=text] {
border: 1px solid #ccc;
width: 60%;
padding: 4px;
font-family: monospace;
display: inline-block;
font-size: 1.1em;
}
@media only screen and (max-width: 576px) {
form.sql textarea {
width: 95%;
}
}
form.sql p {
margin: 0;
}
form.sql input[type=submit] {
color: #fff;
background-color: #007bff;

Wyświetl plik

@ -12,7 +12,7 @@
{% endblock %}
<div class="ft">
Powered by <a href="https://github.com/simonw/datasette">Datasette</a>
Powered by <a href="https://github.com/simonw/datasette" title="Datasette v{{ datasette_version }}">Datasette</a>
{% if query_ms %}&middot; Query took {{ query_ms|round(3) }}ms{% endif %}
{% if metadata.license %}&middot; Data license:
{% if metadata.license_url %}

Wyświetl plik

@ -16,7 +16,7 @@
<link rel="stylesheet" href="/-/static/codemirror-5.31.0-min.css" />
<script src="/-/static/codemirror-5.31.0-sql.min.js"></script>
<style>
.CodeMirror { height: 70px; width: 80%; border: 1px solid #ddd; }
.CodeMirror { height: auto; min-height: 70px; width: 80%; border: 1px solid #ddd; }
.CodeMirror-scroll { max-height: 200px; }
</style>
{% endblock %}
@ -33,7 +33,14 @@
{% endif %}
<form class="sql" action="/{{ database }}-{{ database_hash }}" method="get">
<h3>Custom SQL query</h3>
<p><textarea name="sql">{% if query and query.sql %}{{ query.sql }}{% else %}select * from {{ tables[0].name|escape_table_name }}{% endif %}</textarea></p>
{% if named_parameter_values %}
<h3>Query parameters</h3>
{% for name, value in named_parameter_values.items() %}
<p><label for="qp{{ loop.counter }}">{{ name }}</label> <input type="text" id="qp{{ loop.counter }}" name="{{ name }}" value="{{ value }}"></p>
{% endfor %}
{% endif %}
<p><input type="submit" value="Run SQL"></p>
</form>
@ -52,7 +59,7 @@
{% for row in rows %}
<tr>
{% for td in row %}
<td>{{ td or "&nbsp;" }}</td>
<td>{{ td or "&nbsp;"|safe }}</td>
{% endfor %}
</tr>
{% endfor %}
@ -83,6 +90,11 @@
mode: "text/x-sql",
lineWrapping: true,
});
editor.setOption("extraKeys", {
"Shift-Enter": function() {
document.getElementsByClassName("sql")[0].submit();
}
});
</script>
{% endblock %}

Wyświetl plik

@ -5,11 +5,8 @@
{% block extra_head %}
<style>
@media only screen and (max-width: 576px) {
{% if not is_view %}
td:nth-of-type(1):before { content: "{% if use_rowid %}rowid{% else %}Link{% endif %}"; }
{% endif %}
{% for column in display_columns %}
td:nth-of-type({% if is_view %}{{ loop.index }}{% else %}{{ loop.index + 1 }}{% endif %}):before { content: "{{ column|escape_css_string }}"; }
td:nth-of-type({{ loop.index }}):before { content: "{{ column|escape_css_string }}"; }
{% endfor %}
}
</style>
@ -34,18 +31,14 @@
<table>
<thead>
<tr>
{% if not is_view %}<th scope="col">{% if use_rowid %}rowid{% else %}Link{% endif %}</th>{% endif %}
{% for column in display_columns %}<th scope="col">{{ column }}</th>{% endfor %}
</tr>
</thead>
<tbody>
{% for row in rows %}
{% for row in display_rows %}
<tr>
{% if not is_view %}<td><a href="/{{ database }}-{{ database_hash }}/{{ table|quote_plus }}/{{ row_link(row) }}">{{ row_link(row) }}</a></td>{% endif %}
{% for td in row %}
{% if not use_rowid or (use_rowid and not loop.first) %}
<td>{{ td or "&nbsp;" }}</td>
{% endif %}
{% for cell in row %}
<td>{{ cell.value }}</td>
{% endfor %}
</tr>
{% endfor %}

Wyświetl plik

@ -245,3 +245,240 @@ def temporary_heroku_directory(files, name, metadata, extra_options, extra_metad
tmp.cleanup()
os.chdir(saved_cwd)
from contextlib import contextmanager
import base64
import json
import os
import re
import sqlite3
import tempfile
import time
import urllib
def compound_pks_from_path(path):
return [
urllib.parse.unquote_plus(b) for b in path.split(',')
]
def path_from_row_pks(row, pks, use_rowid):
if use_rowid:
return urllib.parse.quote_plus(str(row['rowid']))
bits = []
for pk in pks:
bits.append(
urllib.parse.quote_plus(str(row[pk]))
)
return ','.join(bits)
def build_where_clauses(args):
sql_bits = []
params = {}
for i, (key, value) in enumerate(sorted(args.items())):
if '__' in key:
column, lookup = key.rsplit('__', 1)
else:
column = key
lookup = 'exact'
template = {
'exact': '"{}" = :{}',
'contains': '"{}" like :{}',
'endswith': '"{}" like :{}',
'startswith': '"{}" like :{}',
'gt': '"{}" > :{}',
'gte': '"{}" >= :{}',
'lt': '"{}" < :{}',
'lte': '"{}" <= :{}',
'glob': '"{}" glob :{}',
'like': '"{}" like :{}',
'isnull': '"{}" is null',
}[lookup]
numeric_operators = {'gt', 'gte', 'lt', 'lte'}
value_convert = {
'contains': lambda s: '%{}%'.format(s),
'endswith': lambda s: '%{}'.format(s),
'startswith': lambda s: '{}%'.format(s),
}.get(lookup, lambda s: s)
converted = value_convert(value)
if lookup in numeric_operators and converted.isdigit():
converted = int(converted)
if ':{}' in template:
param_id = 'p{}'.format(i)
params[param_id] = converted
tokens = (column, param_id)
else:
tokens = (column,)
sql_bits.append(
template.format(*tokens)
)
return sql_bits, params
class CustomJSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, sqlite3.Row):
return tuple(obj)
if isinstance(obj, sqlite3.Cursor):
return list(obj)
if isinstance(obj, bytes):
# Does it encode to utf8?
try:
return obj.decode('utf8')
except UnicodeDecodeError:
return {
'$base64': True,
'encoded': base64.b64encode(obj).decode('latin1'),
}
return json.JSONEncoder.default(self, obj)
@contextmanager
def sqlite_timelimit(conn, ms):
deadline = time.time() + (ms / 1000)
# n is the number of SQLite virtual machine instructions that will be
# executed between each check. It's hard to know what to pick here.
# After some experimentation, I've decided to go with 1000 by default and
# 1 for time limits that are less than 50ms
n = 1000
if ms < 50:
n = 1
def handler():
if time.time() >= deadline:
return 1
conn.set_progress_handler(handler, n)
yield
conn.set_progress_handler(None, n)
class InvalidSql(Exception):
pass
def validate_sql_select(sql):
sql = sql.strip().lower()
if not sql.startswith('select '):
raise InvalidSql('Statement must begin with SELECT')
if 'pragma' in sql:
raise InvalidSql('Statement may not contain PRAGMA')
def path_with_added_args(request, args):
current = request.raw_args.copy()
current.update(args)
return request.path + '?' + urllib.parse.urlencode(current)
def path_with_ext(request, ext):
path = request.path
path += ext
if request.query_string:
path += '?' + request.query_string
return path
_css_re = re.compile(r'''['"\n\\]''')
_boring_table_name_re = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
def escape_css_string(s):
return _css_re.sub(lambda m: '\\{:X}'.format(ord(m.group())), s)
def escape_sqlite_table_name(s):
if _boring_table_name_re.match(s):
return s
else:
return '[{}]'.format(s)
def make_dockerfile(files, metadata_file, extra_options=''):
cmd = ['"datasette"', '"serve"', '"--host"', '"0.0.0.0"']
cmd.append('"' + '", "'.join(files) + '"')
cmd.extend(['"--cors"', '"--port"', '"8001"', '"--inspect-file"', '"inspect-data.json"'])
if metadata_file:
cmd.extend(['"--metadata"', '"{}"'.format(metadata_file)])
if extra_options:
for opt in extra_options.split():
cmd.append('"{}"'.format(opt))
return '''
FROM python:3
COPY . /app
WORKDIR /app
RUN pip install datasette
RUN datasette build {} --inspect-file inspect-data.json
EXPOSE 8001
CMD [{}]'''.format(
' '.join(files),
', '.join(cmd)
).strip()
@contextmanager
def temporary_docker_directory(files, name, metadata, extra_options, extra_metadata=None):
extra_metadata = extra_metadata or {}
tmp = tempfile.TemporaryDirectory()
# We create a datasette folder in there to get a nicer now deploy name
datasette_dir = os.path.join(tmp.name, name)
os.mkdir(datasette_dir)
saved_cwd = os.getcwd()
file_paths = [
os.path.join(saved_cwd, name)
for name in files
]
file_names = [os.path.split(f)[-1] for f in files]
if metadata:
metadata_content = json.load(metadata)
else:
metadata_content = {}
for key, value in extra_metadata.items():
if value:
metadata_content[key] = value
try:
dockerfile = make_dockerfile(file_names, metadata_content and 'metadata.json', extra_options)
os.chdir(datasette_dir)
if metadata_content:
open('metadata.json', 'w').write(json.dumps(metadata_content, indent=2))
open('Dockerfile', 'w').write(dockerfile)
for path, filename in zip(file_paths, file_names):
os.link(path, os.path.join(datasette_dir, filename))
yield
finally:
tmp.cleanup()
os.chdir(saved_cwd)
def get_all_foreign_keys(conn):
tables = [r[0] for r in conn.execute('select name from sqlite_master where type="table"')]
table_to_foreign_keys = {}
for table in tables:
table_to_foreign_keys[table] = {
'incoming': [],
'outgoing': [],
}
for table in tables:
infos = conn.execute(
'PRAGMA foreign_key_list([{}])'.format(table)
).fetchall()
for info in infos:
if info is not None:
id, seq, table_name, from_, to_, on_update, on_delete, match = info
if table_name not in table_to_foreign_keys:
# Weird edge case where something refers to a table that does
# not actually exist
continue
table_to_foreign_keys[table_name]['incoming'].append({
'other_table': table,
'column': to_,
'other_column': from_
})
table_to_foreign_keys[table]['outgoing'].append({
'other_table': table_name,
'column': from_,
'other_column': to_
})
return table_to_foreign_keys

Wyświetl plik

@ -0,0 +1,2 @@
__version_info__ = (0, 12)
__version__ = '.'.join(map(str, __version_info__))

1
docs/.gitignore vendored 100644
Wyświetl plik

@ -0,0 +1 @@
_build

20
docs/Makefile 100644
Wyświetl plik

@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SPHINXPROJ = Datasette
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

Wyświetl plik

@ -0,0 +1 @@
sphinx-autobuild . _build/html

97
docs/changelog.rst 100644
Wyświetl plik

@ -0,0 +1,97 @@
Changelog
=========
0.12 (2017-11-16)
-----------------
- Added ``__version__``, now displayed as tooltip in page footer (`#108`_).
- Added initial docs, including a changelog (`#99`_).
- Turned on auto-escaping in Jinja.
- Added a UI for editing named parameters (`#96`_).
You can now construct a custom SQL statement using SQLite named
parameters (e.g. ``:name``) and datasette will display form fields for
editing those parameters. `Heres an example`_ which lets you see the
most popular names for dogs of different species registered through
various dog registration schemes in Australia.
.. _Heres an example: https://australian-dogs.now.sh/australian-dogs-3ba9628?sql=select+name%2C+count%28*%29+as+n+from+%28%0D%0A%0D%0Aselect+upper%28%22Animal+name%22%29+as+name+from+%5BAdelaide-City-Council-dog-registrations-2013%5D+where+Breed+like+%3Abreed%0D%0A%0D%0Aunion+all%0D%0A%0D%0Aselect+upper%28Animal_Name%29+as+name+from+%5BAdelaide-City-Council-dog-registrations-2014%5D+where+Breed_Description+like+%3Abreed%0D%0A%0D%0Aunion+all+%0D%0A%0D%0Aselect+upper%28Animal_Name%29+as+name+from+%5BAdelaide-City-Council-dog-registrations-2015%5D+where+Breed_Description+like+%3Abreed%0D%0A%0D%0Aunion+all%0D%0A%0D%0Aselect+upper%28%22AnimalName%22%29+as+name+from+%5BCity-of-Port-Adelaide-Enfield-Dog_Registrations_2016%5D+where+AnimalBreed+like+%3Abreed%0D%0A%0D%0Aunion+all%0D%0A%0D%0Aselect+upper%28%22Animal+Name%22%29+as+name+from+%5BMitcham-dog-registrations-2015%5D+where+Breed+like+%3Abreed%0D%0A%0D%0Aunion+all%0D%0A%0D%0Aselect+upper%28%22DOG_NAME%22%29+as+name+from+%5Bburnside-dog-registrations-2015%5D+where+DOG_BREED+like+%3Abreed%0D%0A%0D%0Aunion+all+%0D%0A%0D%0Aselect+upper%28%22Animal_Name%22%29+as+name+from+%5Bcity-of-playford-2015-dog-registration%5D+where+Breed_Description+like+%3Abreed%0D%0A%0D%0Aunion+all%0D%0A%0D%0Aselect+upper%28%22Animal+Name%22%29+as+name+from+%5Bcity-of-prospect-dog-registration-details-2016%5D+where%22Breed+Description%22+like+%3Abreed%0D%0A%0D%0A%29+group+by+name+order+by+n+desc%3B&breed=pug
- Pin to specific Jinja version. (`#100`_).
- Default to 127.0.0.1 not 0.0.0.0. (`#98`_).
- Added extra metadata options to publish and package commands. (`#92`_).
You can now run these commands like so::
datasette now publish mydb.db \
--title="My Title" \
--source="Source" \
--source_url="http://www.example.com/" \
--license="CC0" \
--license_url="https://creativecommons.org/publicdomain/zero/1.0/"
This will write those values into the metadata.json that is packaged with the
app. If you also pass ``--metadata=metadata.json`` that file will be updated with the extra
values before being written into the Docker image.
- Added simple production-ready Dockerfile (`#94`_) [Andrew
Cutler]
- New ``?_sql_time_limit_ms=10`` argument to database and table page (`#95`_)
- SQL syntax highlighting with Codemirror (`#89`_) [Tom Dyson]
.. _#89: https://github.com/simonw/datasette/issues/89
.. _#92: https://github.com/simonw/datasette/issues/92
.. _#94: https://github.com/simonw/datasette/issues/94
.. _#95: https://github.com/simonw/datasette/issues/95
.. _#96: https://github.com/simonw/datasette/issues/96
.. _#98: https://github.com/simonw/datasette/issues/98
.. _#99: https://github.com/simonw/datasette/issues/99
.. _#100: https://github.com/simonw/datasette/issues/100
.. _#108: https://github.com/simonw/datasette/issues/108
0.11 (2017-11-14)
-----------------
- Added ``datasette publish now --force`` option.
This calls ``now`` with ``--force`` - useful as it means you get a fresh copy of datasette even if Now has already cached that docker layer.
- Enable ``--cors`` by default when running in a container.
0.10 (2017-11-14)
-----------------
- Fixed `#83`_ - 500 error on individual row pages.
- Stop using sqlite WITH RECURSIVE in our tests.
The version of Python 3 running in Travis CI doesn't support this.
.. _#83: https://github.com/simonw/datasette/issues/83
0.9 (2017-11-13)
----------------
- Added ``--sql_time_limit_ms`` and ``--extra-options``.
The serve command now accepts ``--sql_time_limit_ms`` for customizing the SQL time
limit.
The publish and package commands now accept ``--extra-options`` which can be used
to specify additional options to be passed to the datasite serve command when
it executes inside the resulting Docker containers.
0.8 (2017-11-13)
----------------
- V0.8 - added PyPI metadata, ready to ship.
- Implemented offset/limit pagination for views (`#70`_).
- Improved pagination. (`#78`_)
- Limit on max rows returned, controlled by ``--max_returned_rows`` option. (`#69`_)
If someone executes 'select * from table' against a table with a million rows
in it, we could run into problems: just serializing that much data as JSON is
likely to lock up the server.
Solution: we now have a hard limit on the maximum number of rows that can be
returned by a query. If that limit is exceeded, the server will return a
``"truncated": true`` field in the JSON.
This limit can be optionally controlled by the new ``--max_returned_rows``
option. Setting that option to 0 disables the limit entirely.
.. _#70: https://github.com/simonw/datasette/issues/70
.. _#78: https://github.com/simonw/datasette/issues/78
.. _#69: https://github.com/simonw/datasette/issues/69

169
docs/conf.py 100644
Wyświetl plik

@ -0,0 +1,169 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Datasette documentation build configuration file, created by
# sphinx-quickstart on Thu Nov 16 06:50:13 2017.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = []
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = 'Datasette'
copyright = '2017, Simon Willison'
author = 'Simon Willison'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = ''
# The full version, including alpha/beta/rc tags.
release = ''
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'alabaster'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# This is required for the alabaster theme
# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
html_sidebars = {
'**': [
'relations.html', # needs 'show_related': True theme option to display
'searchbox.html',
]
}
# -- Options for HTMLHelp output ------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = 'Datasettedoc'
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'Datasette.tex', 'Datasette Documentation',
'Simon Willison', 'manual'),
]
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'datasette', 'Datasette Documentation',
[author], 1)
]
# -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'Datasette', 'Datasette Documentation',
author, 'Datasette', 'One line description of project.',
'Miscellaneous'),
]

134
docs/index.rst 100644
Wyświetl plik

@ -0,0 +1,134 @@
Datasette
=========
*An instant JSON API for your SQLite databases*
Datasette provides an instant, read-only JSON API for any SQLite
database. It also provides tools for packaging the database up as a
Docker container and deploying that container to hosting providers such
as `Zeit Now`_.
Some examples: https://github.com/simonw/datasette/wiki/Datasettes
Contents
--------
.. toctree::
:maxdepth: 2
changelog
Getting started
---------------
::
pip3 install datasette
Datasette requires Python 3.5 or higher.
Basic usage
-----------
::
datasette serve path/to/database.db
This will start a web server on port 8001 - visit http://localhost:8001/
to access the web interface.
``serve`` is the default subcommand, you can omit it if you like.
Use Chrome on OS X? You can run datasette against your browser history
like so:
::
datasette ~/Library/Application\ Support/Google/Chrome/Default/History
Now visiting http://localhost:8001/History/downloads will show you a web
interface to browse your downloads data:
.. figure:: https://static.simonwillison.net/static/2017/datasette-downloads.png
:alt: Downloads table rendered by datasette
http://localhost:8001/History/downloads.json will return that data as
JSON:
::
{
"database": "History",
"columns": [
"id",
"current_path",
"target_path",
"start_time",
"received_bytes",
"total_bytes",
...
],
"table_rows": 576,
"rows": [
[
1,
"/Users/simonw/Downloads/DropboxInstaller.dmg",
"/Users/simonw/Downloads/DropboxInstaller.dmg",
13097290269022132,
626688,
0,
...
]
]
}
http://localhost:8001/History/downloads.jsono will return that data as
JSON in a more convenient but less efficient format:
::
{
...
"rows": [
{
"start_time": 13097290269022132,
"interrupt_reason": 0,
"hash": "",
"id": 1,
"site_url": "",
"referrer": "https://www.dropbox.com/downloading?src=index",
...
}
]
}
datasette serve options
-----------------------
::
$ datasette serve --help
Usage: datasette serve [OPTIONS] [FILES]...
Serve up specified SQLite database files with a web UI
Options:
-h, --host TEXT host for server, defaults to 127.0.0.1
-p, --port INTEGER port for server, defaults to 8001
--debug Enable debug mode - useful for development
--reload Automatically reload if code change detected -
useful for development
--cors Enable CORS by serving Access-Control-Allow-
Origin: *
--page_size INTEGER Page size - default is 100
--max_returned_rows INTEGER Max allowed rows to return at once - default is
1000. Set to 0 to disable check entirely.
--sql_time_limit_ms INTEGER Max time allowed for SQL queries in ms
--load-extension TEXT Path to a SQLite extension to load
--inspect-file TEXT Path to JSON file created using "datasette
build"
-m, --metadata FILENAME Path to JSON file containing license/source
metadata
--help Show this message and exit.
.. _Zeit Now: https://zeit.co/now

Wyświetl plik

@ -1,10 +1,11 @@
from setuptools import setup, find_packages
from datasette import __version__
setup(
name='datasette',
description='An instant JSON API for your SQLite databases',
author='Simon Willison',
version='0.11',
version=__version__,
license='Apache License, Version 2.0',
url='https://github.com/simonw/datasette',
packages=find_packages(),
@ -13,7 +14,8 @@ setup(
install_requires=[
'click==6.7',
'click-default-group==1.2',
'sanic==0.6.0',
'Sanic==0.6.0',
'Jinja2==2.10',
'sanic-jinja2==0.5.5',
'hupper==1.0',
],

Wyświetl plik

@ -26,7 +26,7 @@ def app_client():
def test_homepage(app_client):
_, response = app_client.get('/')
response = app_client.get('/', gather_request=False)
assert response.status == 200
assert 'test_tables' in response.text
@ -40,12 +40,12 @@ def test_homepage(app_client):
def test_database_page(app_client):
_, response = app_client.get('/test_tables', allow_redirects=False)
response = app_client.get('/test_tables', allow_redirects=False, gather_request=False)
assert response.status == 302
_, response = app_client.get('/test_tables')
response = app_client.get('/test_tables', gather_request=False)
assert 'test_tables' in response.text
# Test JSON list of tables
_, response = app_client.get('/test_tables.json')
response = app_client.get('/test_tables.json', gather_request=False)
data = response.json
assert 'test_tables' == data['database']
assert [{
@ -76,8 +76,9 @@ def test_database_page(app_client):
def test_custom_sql(app_client):
_, response = app_client.get(
'/test_tables.jsono?sql=select+content+from+simple_primary_key'
response = app_client.get(
'/test_tables.jsono?sql=select+content+from+simple_primary_key',
gather_request=False
)
data = response.json
assert {
@ -94,33 +95,38 @@ def test_custom_sql(app_client):
def test_sql_time_limit(app_client):
_, response = app_client.get(
'/test_tables.jsono?sql=select+sleep(0.5)'
response = app_client.get(
'/test_tables.jsono?sql=select+sleep(0.5)',
gather_request=False
)
assert 400 == response.status
assert 'interrupted' == response.json['error']
def test_custom_sql_time_limit(app_client):
_, response = app_client.get(
'/test_tables.jsono?sql=select+sleep(0.01)'
response = app_client.get(
'/test_tables.jsono?sql=select+sleep(0.01)',
gather_request=False
)
assert 200 == response.status
_, response = app_client.get(
'/test_tables.jsono?sql=select+sleep(0.01)&_sql_time_limit_ms=5'
response = app_client.get(
'/test_tables.jsono?sql=select+sleep(0.01)&_sql_time_limit_ms=5',
gather_request=False
)
assert 400 == response.status
assert 'interrupted' == response.json['error']
def test_invalid_custom_sql(app_client):
_, response = app_client.get(
'/test_tables?sql=.schema'
response = app_client.get(
'/test_tables?sql=.schema',
gather_request=False
)
assert response.status == 400
assert 'Statement must begin with SELECT' in response.text
_, response = app_client.get(
'/test_tables.json?sql=.schema'
response = app_client.get(
'/test_tables.json?sql=.schema',
gather_request=False
)
assert response.status == 400
assert response.json['ok'] is False
@ -128,9 +134,9 @@ def test_invalid_custom_sql(app_client):
def test_table_page(app_client):
_, response = app_client.get('/test_tables/simple_primary_key')
response = app_client.get('/test_tables/simple_primary_key', gather_request=False)
assert response.status == 200
_, response = app_client.get('/test_tables/simple_primary_key.jsono')
response = app_client.get('/test_tables/simple_primary_key.jsono', gather_request=False)
assert response.status == 200
data = response.json
assert data['query']['sql'] == 'select * from simple_primary_key order by pk limit 51'
@ -145,9 +151,9 @@ def test_table_page(app_client):
def test_table_with_slashes_in_name(app_client):
_, response = app_client.get('/test_tables/table%2Fwith%2Fslashes.csv')
response = app_client.get('/test_tables/table%2Fwith%2Fslashes.csv', gather_request=False)
assert response.status == 200
_, response = app_client.get('/test_tables/table%2Fwith%2Fslashes.csv.jsono')
response = app_client.get('/test_tables/table%2Fwith%2Fslashes.csv.jsono', gather_request=False)
assert response.status == 200
data = response.json
assert data['rows'] == [{
@ -165,7 +171,7 @@ def test_paginate_tables_and_views(app_client, path, expected_rows, expected_pag
fetched = []
count = 0
while path:
_, response = app_client.get(path)
response = app_client.get(path, gather_request=False)
count += 1
fetched.extend(response.json['rows'])
path = response.json['next_url']
@ -178,8 +184,9 @@ def test_paginate_tables_and_views(app_client, path, expected_rows, expected_pag
def test_max_returned_rows(app_client):
_, response = app_client.get(
'/test_tables.jsono?sql=select+content+from+no_primary_key'
response = app_client.get(
'/test_tables.jsono?sql=select+content+from+no_primary_key',
gather_request=False
)
data = response.json
assert {
@ -191,9 +198,9 @@ def test_max_returned_rows(app_client):
def test_view(app_client):
_, response = app_client.get('/test_tables/simple_view')
response = app_client.get('/test_tables/simple_view', gather_request=False)
assert response.status == 200
_, response = app_client.get('/test_tables/simple_view.jsono')
response = app_client.get('/test_tables/simple_view.jsono', gather_request=False)
assert response.status == 200
data = response.json
assert data['rows'] == [{
@ -206,12 +213,16 @@ def test_view(app_client):
def test_row(app_client):
_, response = app_client.get('/test_tables/simple_primary_key/1', allow_redirects=False)
response = app_client.get(
'/test_tables/simple_primary_key/1',
allow_redirects=False,
gather_request=False
)
assert response.status == 302
assert response.headers['Location'].endswith('/1')
_, response = app_client.get('/test_tables/simple_primary_key/1')
response = app_client.get('/test_tables/simple_primary_key/1', gather_request=False)
assert response.status == 200
_, response = app_client.get('/test_tables/simple_primary_key/1.jsono')
response = app_client.get('/test_tables/simple_primary_key/1.jsono', gather_request=False)
assert response.status == 200
assert [{'pk': '1', 'content': 'hello'}] == response.json['rows']

Wyświetl plik

@ -0,0 +1,76 @@
from datasette.app import Datasette
import os
import pytest
import sqlite3
import tempfile
TABLES = '''
CREATE TABLE "election_results" (
"county" INTEGER,
"party" INTEGER,
"office" INTEGER,
"votes" INTEGER,
FOREIGN KEY (county) REFERENCES county(id),
FOREIGN KEY (party) REFERENCES party(id),
FOREIGN KEY (office) REFERENCES office(id)
);
CREATE TABLE "county" (
"id" INTEGER PRIMARY KEY ,
"name" TEXT
);
CREATE TABLE "party" (
"id" INTEGER PRIMARY KEY ,
"name" TEXT
);
CREATE TABLE "office" (
"id" INTEGER PRIMARY KEY ,
"name" TEXT
);
'''
@pytest.fixture(scope='module')
def ds_instance():
with tempfile.TemporaryDirectory() as tmpdir:
filepath = os.path.join(tmpdir, 'test_tables.db')
conn = sqlite3.connect(filepath)
conn.executescript(TABLES)
yield Datasette([filepath])
def test_inspect(ds_instance):
info = ds_instance.inspect()
tables = info['test_tables']['tables']
for table_name in ('county', 'party', 'office'):
assert 0 == tables[table_name]['count']
foreign_keys = tables[table_name]['foreign_keys']
assert [] == foreign_keys['outgoing']
assert [{
'column': 'id',
'other_column': table_name,
'other_table': 'election_results'
}] == foreign_keys['incoming']
election_results = tables['election_results']
assert 0 == election_results['count']
assert sorted([{
'column': 'county',
'other_column': 'id',
'other_table': 'county'
}, {
'column': 'party',
'other_column': 'id',
'other_table': 'party'
}, {
'column': 'office',
'other_column': 'id',
'other_table': 'office'
}], key=lambda d: d['column']) == sorted(
election_results['foreign_keys']['outgoing'],
key=lambda d: d['column']
)
assert [] == election_results['foreign_keys']['incoming']

Wyświetl plik

@ -88,6 +88,15 @@ def test_custom_json_encoder(obj, expected):
['"foo" like :p0', '"zax" glob :p1'],
['2%2', '3*']
),
(
{
'foo__isnull': '1',
'baz__isnull': '1',
'bar__gt': '10'
},
['"bar" > :p0', '"baz" is null', '"foo" is null'],
[10]
),
])
def test_build_where(args, expected_where, expected_params):
sql_bits, actual_params = utils.build_where_clauses(args)