diff --git a/README.md b/README.md index 9cff2732..579fe843 100644 --- a/README.md +++ b/README.md @@ -141,6 +141,7 @@ This will create a docker image containing both the datasette application and th -m, --metadata FILENAME Path to JSON file containing metadata to publish --extra-options TEXT Extra options to pass to datasette serve --force Pass --force option to now + --branch TEXT Install datasette from a GitHub branch e.g. master --title TEXT Title for metadata --license TEXT License label for metadata --license_url TEXT License URL for metadata @@ -162,6 +163,7 @@ If you have docker installed you can use `datasette package` to create a new Doc optionally use name:tag format -m, --metadata FILENAME Path to JSON file containing metadata to publish --extra-options TEXT Extra options to pass to datasette serve + --branch TEXT Install datasette from a GitHub branch e.g. master --title TEXT Title for metadata --license TEXT License label for metadata --license_url TEXT License URL for metadata diff --git a/datasette/app.py b/datasette/app.py index 2a7a5e2b..9aede385 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -16,9 +16,10 @@ import jinja2 import hashlib import time from .utils import ( - build_where_clauses, + Filters, compound_pks_from_path, CustomJSONEncoder, + detect_fts_sql, escape_css_string, escape_sqlite_table_name, get_all_foreign_keys, @@ -56,7 +57,7 @@ class BaseView(HTTPMethodView): return r def redirect(self, request, path): - if request.query_string: + if request.query_string and '?' not in path: path = '{}?{}'.format( path, request.query_string ) @@ -181,9 +182,13 @@ class BaseView(HTTPMethodView): template = self.template status_code = 200 try: - data, extra_template_data = await self.data( + response_or_template_contexts = await self.data( request, name, hash, **kwargs ) + if isinstance(response_or_template_contexts, response.HTTPResponse): + return response_or_template_contexts + else: + data, extra_template_data = response_or_template_contexts except (sqlite3.OperationalError, InvalidSql) as e: data = { 'ok': False, @@ -221,16 +226,23 @@ class BaseView(HTTPMethodView): headers=headers, ) else: - context = {**data, **dict( - extra_template_data() - if callable(extra_template_data) - else extra_template_data - ), **{ - 'url_json': path_with_ext(request, '.json'), - 'url_jsono': path_with_ext(request, '.jsono'), - 'metadata': self.ds.metadata, - 'datasette_version': __version__, - }} + extras = {} + if callable(extra_template_data): + extras = extra_template_data() + if asyncio.iscoroutine(extras): + extras = await extras + else: + extras = extra_template_data + context = { + **data, + **extras, + **{ + 'url_json': path_with_ext(request, '.json'), + 'url_jsono': path_with_ext(request, '.jsono'), + 'metadata': self.ds.metadata, + 'datasette_version': __version__, + } + } r = self.jinja.render( template, request, @@ -407,18 +419,47 @@ class TableView(BaseView): # That's so if there is a column that starts with _ # it can still be queried using ?_col__exact=blah special_args = {} + special_args_lists = {} other_args = {} for key, value in request.args.items(): if key.startswith('_') and '__' not in key: special_args[key] = value[0] + special_args_lists[key] = value else: other_args[key] = value[0] - if other_args: - where_clauses, params = build_where_clauses(other_args) - else: - where_clauses = [] - params = {} + # Handle ?_filter_column and redirect, if present + if '_filter_column' in special_args: + filter_column = special_args['_filter_column'] + filter_op = special_args.get('_filter_op') or '' + filter_value = special_args.get('_filter_value') or '' + if '__' in filter_op: + filter_op, filter_value = filter_op.split('__', 1) + return self.redirect(request, path_with_added_args(request, { + '{}__{}'.format(filter_column, filter_op): filter_value, + '_filter_column': None, + '_filter_op': None, + '_filter_value': None, + })) + + filters = Filters(sorted(other_args.items())) + where_clauses, params = filters.build_where_clauses() + + # _search support: + fts_table = None + fts_sql = detect_fts_sql(table) + fts_rows = list(await self.execute(name, fts_sql)) + if fts_rows: + fts_table = fts_rows[0][0] + + search = special_args.get('_search') + if search and fts_table: + where_clauses.append( + 'rowid in (select rowid from [{fts_table}] where [{fts_table}] match :search)'.format( + fts_table=fts_table + ) + ) + params['search'] = search next = special_args.get('_next') offset = '' @@ -453,14 +494,24 @@ class TableView(BaseView): if order_by: order_by = 'order by {} '.format(order_by) - sql = 'select {select} from {table_name} {where}{order_by}limit {limit}{offset}'.format( - select=select, - table_name=escape_sqlite_table_name(table), - where=where_clause, - order_by=order_by, - limit=self.page_size + 1, - offset=offset, - ) + # _group_count=col1&_group_count=col2 + group_count = special_args_lists.get('_group_count') or [] + if group_count: + sql = 'select {group_cols}, count(*) as "count" from {table_name} {where} group by {group_cols} order by "count" desc limit 100'.format( + group_cols=', '.join('"{}"'.format(group_count_col) for group_count_col in group_count), + table_name=escape_sqlite_table_name(table), + where=where_clause, + ) + is_view = True + else: + sql = 'select {select} from {table_name} {where}{order_by}limit {limit}{offset}'.format( + select=select, + table_name=escape_sqlite_table_name(table), + where=where_clause, + order_by=order_by, + limit=self.page_size + 1, + offset=offset, + ) extra_args = {} if request.raw_args.get('_sql_time_limit_ms'): @@ -481,6 +532,8 @@ class TableView(BaseView): table_rows = None if not is_view: table_rows = info[name]['tables'][table]['count'] + + # Pagination next link next_value = None next_url = None if len(rows) > self.page_size: @@ -492,6 +545,17 @@ class TableView(BaseView): '_next': next_value, })) + async def extra_template(): + return { + 'database_hash': hash, + 'human_filter_description': filters.human_description(), + 'supports_search': bool(fts_table), + 'search': search or '', + 'use_rowid': use_rowid, + 'display_columns': display_columns, + 'display_rows': await self.make_display_rows(name, hash, table, rows, display_columns, pks, is_view, use_rowid), + } + return { 'database': name, 'table': table, @@ -509,48 +573,81 @@ class TableView(BaseView): }, 'next': next_value and str(next_value) or None, 'next_url': next_url, - }, lambda: { - 'database_hash': hash, - 'use_rowid': use_rowid, - 'display_columns': display_columns, - 'display_rows': make_display_rows(name, hash, table, rows, display_columns, pks, is_view, use_rowid), - } + }, extra_template - -def make_display_rows(database, database_hash, table, rows, display_columns, pks, is_view, use_rowid): - for row in rows: - cells = [] - # Unless we are a view, the first column is a link - either to the rowid - # or to the simple or compound primary key - if not is_view: - display_value = jinja2.Markup( - '{flat_pks}'.format( - database=database, - database_hash=database_hash, - table=urllib.parse.quote_plus(table), - flat_pks=path_from_row_pks(row, pks, use_rowid), + async def make_display_rows(self, database, database_hash, table, rows, display_columns, pks, is_view, use_rowid): + # Get fancy with foreign keys + expanded = {} + tables = self.ds.inspect()[database]['tables'] + table_info = tables.get(table) or {} + if table_info and not is_view: + foreign_keys = table_info['foreign_keys']['outgoing'] + for fk in foreign_keys: + label_column = tables.get(fk['other_table'], {}).get('label_column') + if not label_column: + # We only link cells to other tables with label columns defined + continue + ids_to_lookup = set([row[fk['column']] for row in rows]) + sql = 'select "{other_column}", "{label_column}" from {other_table} where "{other_column}" in ({placeholders})'.format( + other_column=fk['other_column'], + label_column=label_column, + other_table=escape_sqlite_table_name(fk['other_table']), + placeholders=', '.join(['?'] * len(ids_to_lookup)), ) - ) - cells.append({ - 'column': 'rowid' if use_rowid else 'Link', - 'value': display_value, - }) + try: + results = await self.execute(database, sql, list(set(ids_to_lookup))) + except sqlite3.OperationalError: + # Probably hit the timelimit + pass + else: + for id, value in results: + expanded[(fk['column'], id)] = (fk['other_table'], value) - for value, column in zip(row, display_columns): - if use_rowid and column == 'rowid': - # We already showed this in the linked first column - continue - if False: # TODO: This is where we will do foreign key linking - display_value = jinja2.Markup('{}'.format('foreign key')) - elif value is None: - display_value = jinja2.Markup(' ') - else: - display_value = str(value) - cells.append({ - 'column': column, - 'value': display_value, - }) - yield cells + to_return = [] + for row in rows: + cells = [] + # Unless we are a view, the first column is a link - either to the rowid + # or to the simple or compound primary key + if not is_view: + display_value = jinja2.Markup( + '{flat_pks}'.format( + database=database, + database_hash=database_hash, + table=urllib.parse.quote_plus(table), + flat_pks=path_from_row_pks(row, pks, use_rowid), + ) + ) + cells.append({ + 'column': 'rowid' if use_rowid else 'Link', + 'value': display_value, + }) + + for value, column in zip(row, display_columns): + if use_rowid and column == 'rowid': + # We already showed this in the linked first column + continue + elif (column, value) in expanded: + other_table, label = expanded[(column, value)] + display_value = jinja2.Markup( + # TODO: Escape id/label/etc so no XSS here + '{label}'.format( + database=database, + database_hash=database_hash, + table=escape_sqlite_table_name(other_table), + id=value, + label=label, + ) + ) + elif value is None: + display_value = jinja2.Markup(' ') + else: + display_value = str(value) + cells.append({ + 'column': column, + 'value': display_value, + }) + to_return.append(cells) + return to_return class RowView(BaseView): @@ -581,6 +678,13 @@ class RowView(BaseView): rows = list(rows) if not rows: raise NotFound('Record not found: {}'.format(pk_values)) + + async def template_data(): + return { + 'database_hash': hash, + 'foreign_key_tables': await self.foreign_key_tables(name, table, pk_values), + } + return { 'database': name, 'table': table, @@ -588,9 +692,34 @@ class RowView(BaseView): 'columns': columns, 'primary_keys': pks, 'primary_key_values': pk_values, - }, { - 'database_hash': hash, - } + }, template_data + + async def foreign_key_tables(self, name, table, pk_values): + if len(pk_values) != 1: + return [] + table_info = self.ds.inspect()[name]['tables'].get(table) + if not table: + return [] + foreign_keys = table_info['foreign_keys']['incoming'] + sql = 'select ' + ', '.join([ + '(select count(*) from {table} where "{column}"= :id)'.format( + table=escape_sqlite_table_name(fk['other_table']), + column=fk['other_column'], + ) + for fk in foreign_keys + ]) + try: + rows = list(await self.execute(name, sql, {'id': pk_values[0]})) + except sqlite3.OperationalError: + # Almost certainly hit the timeout + return [] + foreign_table_counts = dict(zip([fk['other_table'] for fk in foreign_keys], rows[0])) + foreign_key_tables = [] + for fk in foreign_keys: + count = foreign_table_counts[fk['other_table']] + if count: + foreign_key_tables.append({**fk, **{'count': count}}) + return foreign_key_tables class Datasette: @@ -638,8 +767,19 @@ class Datasette: for r in conn.execute('select * from sqlite_master where type="table"') ] for table in table_names: + count = conn.execute( + 'select count(*) from {}'.format(escape_sqlite_table_name(table)) + ).fetchone()[0] + label_column = None + # If table has two columns, one of which is ID, then label_column is the other one + column_names = [r[1] for r in conn.execute( + 'PRAGMA table_info({});'.format(escape_sqlite_table_name(table)) + ).fetchall()] + if column_names and len(column_names) == 2 and 'id' in column_names: + label_column = [c for c in column_names if c != 'id'][0] tables[table] = { - 'count': conn.execute('select count(*) from "{}"'.format(table)).fetchone()[0], + 'count': count, + 'label_column': label_column, } foreign_keys = get_all_foreign_keys(conn) diff --git a/datasette/cli.py b/datasette/cli.py index 23fd0f15..b87f36f1 100644 --- a/datasette/cli.py +++ b/datasette/cli.py @@ -11,6 +11,7 @@ from .utils import ( @click.group(cls=DefaultGroup, default='serve', default_if_no_args=True) +@click.version_option() def cli(): """ Datasette! @@ -38,12 +39,13 @@ def build(files, inspect_file): ) @click.option('--extra-options', help='Extra options to pass to datasette serve') @click.option('--force', is_flag=True, help='Pass --force option to now') +@click.option('--branch', help='Install datasette from a GitHub branch e.g. master') @click.option('--title', help='Title for metadata') @click.option('--license', help='License label for metadata') @click.option('--license_url', help='License URL for metadata') @click.option('--source', help='Source label for metadata') @click.option('--source_url', help='Source URL for metadata') -def publish(publisher, files, name, metadata, extra_options, force, **extra_metadata): +def publish(publisher, files, name, metadata, extra_options, force, branch, **extra_metadata): """ Publish specified SQLite database files to the internet along with a datasette API. @@ -68,7 +70,7 @@ def publish(publisher, files, name, metadata, extra_options, force, **extra_meta if publisher == 'now': _fail_if_publish_binary_not_installed('now', 'Zeit Now', 'https://zeit.co/now') - with temporary_docker_directory(files, name, metadata, extra_options, extra_metadata): + with temporary_docker_directory(files, name, metadata, extra_options, branch, extra_metadata): if force: call(['now', '--force']) else: @@ -84,7 +86,7 @@ def publish(publisher, files, name, metadata, extra_options, force, **extra_meta click.confirm('Install it? (this will run `heroku plugins:install heroku-builds`)', abort=True) call(["heroku", "plugins:install", "heroku-builds"]) - with temporary_heroku_directory(files, name, metadata, extra_options, extra_metadata): + with temporary_heroku_directory(files, name, metadata, extra_options, branch, extra_metadata): create_output = check_output(['heroku', 'apps:create', '--json']) app_name = json.loads(create_output)["name"] call(["heroku", "builds:create", "-a", app_name]) @@ -100,12 +102,13 @@ def publish(publisher, files, name, metadata, extra_options, force, **extra_meta help='Path to JSON file containing metadata to publish' ) @click.option('--extra-options', help='Extra options to pass to datasette serve') +@click.option('--branch', help='Install datasette from a GitHub branch e.g. master') @click.option('--title', help='Title for metadata') @click.option('--license', help='License label for metadata') @click.option('--license_url', help='License URL for metadata') @click.option('--source', help='Source label for metadata') @click.option('--source_url', help='Source URL for metadata') -def package(files, tag, metadata, extra_options, **extra_metadata): +def package(files, tag, metadata, extra_options, branch, **extra_metadata): "Package specified SQLite files into a new datasette Docker container" if not shutil.which('docker'): click.secho( @@ -116,7 +119,7 @@ def package(files, tag, metadata, extra_options, **extra_metadata): err=True, ) sys.exit(1) - with temporary_docker_directory(files, 'datasette', metadata, extra_options, extra_metadata): + with temporary_docker_directory(files, 'datasette', metadata, extra_options, branch, extra_metadata): args = ['docker', 'build'] if tag: args.append('-t') diff --git a/datasette/static/app.css b/datasette/static/app.css index 9a3f153d..b05782da 100644 --- a/datasette/static/app.css +++ b/datasette/static/app.css @@ -21,6 +21,13 @@ td { th { padding-right: 1em; } +table a:link { + text-decoration: none; + color: #445ac8; +} +table a:visited { + color: #8f54c4; +} @media only screen and (max-width: 576px) { /* Force table to not be like tables anymore */ table, thead, tbody, th, td, tr { @@ -83,12 +90,13 @@ form.sql textarea { font-family: monospace; font-size: 1.3em; } -form.sql label { +form label { font-weight: bold; display: inline-block; width: 15%; } -form.sql input[type=text] { +form input[type=text], +form input[type=search] { border: 1px solid #ccc; width: 60%; padding: 4px; @@ -96,12 +104,15 @@ form.sql input[type=text] { display: inline-block; font-size: 1.1em; } +form input[type=search] { + width: 40%; +} @media only screen and (max-width: 576px) { form.sql textarea { width: 95%; } } -form.sql input[type=submit] { +form input[type=submit] { color: #fff; background-color: #007bff; border-color: #007bff; @@ -111,7 +122,7 @@ form.sql input[type=submit] { vertical-align: middle; border: 1px solid blue; padding: .275rem .75rem; - font-size: 1rem; - line-height: 1.5; + font-size: 0.9rem; + line-height: 1; border-radius: .25rem; } diff --git a/datasette/templates/database.html b/datasette/templates/database.html index 59115c66..cdf128b8 100644 --- a/datasette/templates/database.html +++ b/datasette/templates/database.html @@ -44,11 +44,8 @@

-{% if truncated %} -
These results were truncated. You will need to apply OFFSET/LIMIT to see the whole result set.
-{% endif %} - {% if rows %} +

Returned {% if truncated %}more than {% endif %}{{ "{:,}".format(rows|length) }} row{% if rows|length == 1 %}{% else %}s{% endif %}

@@ -59,7 +56,7 @@ {% for row in rows %} {% for td in row %} - + {% endfor %} {% endfor %} @@ -93,7 +90,8 @@ editor.setOption("extraKeys", { "Shift-Enter": function() { document.getElementsByClassName("sql")[0].submit(); - } + }, + Tab: false }); diff --git a/datasette/templates/row.html b/datasette/templates/row.html index 2d4ca39b..c4ecf959 100644 --- a/datasette/templates/row.html +++ b/datasette/templates/row.html @@ -35,4 +35,18 @@ {% endfor %}
{{ td or " "|safe }}{% if td == None %}{{ " "|safe }}{% else %}{{ td }}{% endif %}
+ +{% if foreign_key_tables %} +

Links from other tables

+ +{% endif %} + {% endblock %} diff --git a/datasette/templates/table.html b/datasette/templates/table.html index 082d2782..ef9b7296 100644 --- a/datasette/templates/table.html +++ b/datasette/templates/table.html @@ -21,6 +21,14 @@

{{ "{:,}".format(table_rows) }} total row{% if table_rows == 1 %}{% else %}s{% endif %} in this table

{% endif %} +

{{ human_filter_description }}

+ +{% if supports_search %} +
+

+
+{% endif %} + {% if query.params %}
{{ query.sql }}
params = {{ query.params|tojson(4) }}
diff --git a/datasette/utils.py b/datasette/utils.py index 544fdad2..322ad86f 100644 --- a/datasette/utils.py +++ b/datasette/utils.py @@ -7,7 +7,6 @@ import sqlite3 import tempfile import time import urllib -import shlex def compound_pks_from_path(path): @@ -27,44 +26,6 @@ def path_from_row_pks(row, pks, use_rowid): return ','.join(bits) -def build_where_clauses(args): - sql_bits = [] - params = {} - for i, (key, value) in enumerate(sorted(args.items())): - if '__' in key: - column, lookup = key.rsplit('__', 1) - else: - column = key - lookup = 'exact' - template = { - 'exact': '"{}" = :{}', - 'contains': '"{}" like :{}', - 'endswith': '"{}" like :{}', - 'startswith': '"{}" like :{}', - 'gt': '"{}" > :{}', - 'gte': '"{}" >= :{}', - 'lt': '"{}" < :{}', - 'lte': '"{}" <= :{}', - 'glob': '"{}" glob :{}', - 'like': '"{}" like :{}', - }[lookup] - numeric_operators = {'gt', 'gte', 'lt', 'lte'} - value_convert = { - 'contains': lambda s: '%{}%'.format(s), - 'endswith': lambda s: '%{}'.format(s), - 'startswith': lambda s: '{}%'.format(s), - }.get(lookup, lambda s: s) - converted = value_convert(value) - if lookup in numeric_operators and converted.isdigit(): - converted = int(converted) - param_id = 'p{}'.format(i) - sql_bits.append( - template.format(column, param_id) - ) - params[param_id] = converted - return sql_bits, params - - class CustomJSONEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, sqlite3.Row): @@ -116,9 +77,17 @@ def validate_sql_select(sql): def path_with_added_args(request, args): - current = request.raw_args.copy() - current.update(args) - return request.path + '?' + urllib.parse.urlencode(current) + current = { + key: value + for key, value in request.raw_args.items() + if key not in args + } + current.update({ + key: value + for key, value in args.items() + if value is not None + }) + return request.path + '?' + urllib.parse.urlencode(sorted(current.items())) def path_with_ext(request, ext): @@ -144,7 +113,7 @@ def escape_sqlite_table_name(s): return '[{}]'.format(s) -def make_dockerfile(files, metadata_file, extra_options=''): +def make_dockerfile(files, metadata_file, extra_options='', branch=None): cmd = ['"datasette"', '"serve"', '"--host"', '"0.0.0.0"'] cmd.append('"' + '", "'.join(files) + '"') cmd.extend(['"--cors"', '"--port"', '"8001"', '"--inspect-file"', '"inspect-data.json"']) @@ -153,21 +122,27 @@ def make_dockerfile(files, metadata_file, extra_options=''): if extra_options: for opt in extra_options.split(): cmd.append('"{}"'.format(opt)) + install_from = 'datasette' + if branch: + install_from = 'https://github.com/simonw/datasette/archive/{}.zip'.format( + branch + ) return ''' FROM python:3 COPY . /app WORKDIR /app -RUN pip install datasette -RUN datasette build {} --inspect-file inspect-data.json +RUN pip install {install_from} +RUN datasette build {files} --inspect-file inspect-data.json EXPOSE 8001 -CMD [{}]'''.format( - ' '.join(files), - ', '.join(cmd) +CMD [{cmd}]'''.format( + files=' '.join(files), + cmd=', '.join(cmd), + install_from=install_from, ).strip() @contextmanager -def temporary_docker_directory(files, name, metadata, extra_options, extra_metadata=None): +def temporary_docker_directory(files, name, metadata, extra_options, branch=None, extra_metadata=None): extra_metadata = extra_metadata or {} tmp = tempfile.TemporaryDirectory() # We create a datasette folder in there to get a nicer now deploy name @@ -187,7 +162,7 @@ def temporary_docker_directory(files, name, metadata, extra_options, extra_metad if value: metadata_content[key] = value try: - dockerfile = make_dockerfile(file_names, metadata_content and 'metadata.json', extra_options) + dockerfile = make_dockerfile(file_names, metadata_content and 'metadata.json', extra_options, branch) os.chdir(datasette_dir) if metadata_content: open('metadata.json', 'w').write(json.dumps(metadata_content, indent=2)) @@ -199,8 +174,9 @@ def temporary_docker_directory(files, name, metadata, extra_options, extra_metad tmp.cleanup() os.chdir(saved_cwd) + @contextmanager -def temporary_heroku_directory(files, name, metadata, extra_options, extra_metadata=None): +def temporary_heroku_directory(files, name, metadata, extra_options, branch=None, extra_metadata=None): # FIXME: lots of duplicated code from above extra_metadata = extra_metadata or {} @@ -228,7 +204,13 @@ def temporary_heroku_directory(files, name, metadata, extra_options, extra_metad open('metadata.json', 'w').write(json.dumps(metadata_content, indent=2)) open('runtime.txt', 'w').write('python-3.6.2') - open('requirements.txt', 'w').write('datasette') + + if branch: + install_from = f'https://github.com/simonw/datasette/archive/{branch}.zip' + else: + install_from = 'datasette' + + open('requirements.txt', 'w').write(install_from) os.mkdir('bin') open('bin/post_compile', 'w').write('datasette build --inspect-file inspect-data.json') @@ -245,211 +227,6 @@ def temporary_heroku_directory(files, name, metadata, extra_options, extra_metad tmp.cleanup() os.chdir(saved_cwd) -from contextlib import contextmanager -import base64 -import json -import os -import re -import sqlite3 -import tempfile -import time -import urllib - - -def compound_pks_from_path(path): - return [ - urllib.parse.unquote_plus(b) for b in path.split(',') - ] - - -def path_from_row_pks(row, pks, use_rowid): - if use_rowid: - return urllib.parse.quote_plus(str(row['rowid'])) - bits = [] - for pk in pks: - bits.append( - urllib.parse.quote_plus(str(row[pk])) - ) - return ','.join(bits) - - -def build_where_clauses(args): - sql_bits = [] - params = {} - for i, (key, value) in enumerate(sorted(args.items())): - if '__' in key: - column, lookup = key.rsplit('__', 1) - else: - column = key - lookup = 'exact' - template = { - 'exact': '"{}" = :{}', - 'contains': '"{}" like :{}', - 'endswith': '"{}" like :{}', - 'startswith': '"{}" like :{}', - 'gt': '"{}" > :{}', - 'gte': '"{}" >= :{}', - 'lt': '"{}" < :{}', - 'lte': '"{}" <= :{}', - 'glob': '"{}" glob :{}', - 'like': '"{}" like :{}', - 'isnull': '"{}" is null', - }[lookup] - numeric_operators = {'gt', 'gte', 'lt', 'lte'} - value_convert = { - 'contains': lambda s: '%{}%'.format(s), - 'endswith': lambda s: '%{}'.format(s), - 'startswith': lambda s: '{}%'.format(s), - }.get(lookup, lambda s: s) - converted = value_convert(value) - if lookup in numeric_operators and converted.isdigit(): - converted = int(converted) - if ':{}' in template: - param_id = 'p{}'.format(i) - params[param_id] = converted - tokens = (column, param_id) - else: - tokens = (column,) - sql_bits.append( - template.format(*tokens) - ) - return sql_bits, params - - -class CustomJSONEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, sqlite3.Row): - return tuple(obj) - if isinstance(obj, sqlite3.Cursor): - return list(obj) - if isinstance(obj, bytes): - # Does it encode to utf8? - try: - return obj.decode('utf8') - except UnicodeDecodeError: - return { - '$base64': True, - 'encoded': base64.b64encode(obj).decode('latin1'), - } - return json.JSONEncoder.default(self, obj) - - -@contextmanager -def sqlite_timelimit(conn, ms): - deadline = time.time() + (ms / 1000) - # n is the number of SQLite virtual machine instructions that will be - # executed between each check. It's hard to know what to pick here. - # After some experimentation, I've decided to go with 1000 by default and - # 1 for time limits that are less than 50ms - n = 1000 - if ms < 50: - n = 1 - - def handler(): - if time.time() >= deadline: - return 1 - - conn.set_progress_handler(handler, n) - yield - conn.set_progress_handler(None, n) - - -class InvalidSql(Exception): - pass - - -def validate_sql_select(sql): - sql = sql.strip().lower() - if not sql.startswith('select '): - raise InvalidSql('Statement must begin with SELECT') - if 'pragma' in sql: - raise InvalidSql('Statement may not contain PRAGMA') - - -def path_with_added_args(request, args): - current = request.raw_args.copy() - current.update(args) - return request.path + '?' + urllib.parse.urlencode(current) - - -def path_with_ext(request, ext): - path = request.path - path += ext - if request.query_string: - path += '?' + request.query_string - return path - - -_css_re = re.compile(r'''['"\n\\]''') -_boring_table_name_re = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$') - - -def escape_css_string(s): - return _css_re.sub(lambda m: '\\{:X}'.format(ord(m.group())), s) - - -def escape_sqlite_table_name(s): - if _boring_table_name_re.match(s): - return s - else: - return '[{}]'.format(s) - - -def make_dockerfile(files, metadata_file, extra_options=''): - cmd = ['"datasette"', '"serve"', '"--host"', '"0.0.0.0"'] - cmd.append('"' + '", "'.join(files) + '"') - cmd.extend(['"--cors"', '"--port"', '"8001"', '"--inspect-file"', '"inspect-data.json"']) - if metadata_file: - cmd.extend(['"--metadata"', '"{}"'.format(metadata_file)]) - if extra_options: - for opt in extra_options.split(): - cmd.append('"{}"'.format(opt)) - return ''' -FROM python:3 -COPY . /app -WORKDIR /app -RUN pip install datasette -RUN datasette build {} --inspect-file inspect-data.json -EXPOSE 8001 -CMD [{}]'''.format( - ' '.join(files), - ', '.join(cmd) - ).strip() - - -@contextmanager -def temporary_docker_directory(files, name, metadata, extra_options, extra_metadata=None): - extra_metadata = extra_metadata or {} - tmp = tempfile.TemporaryDirectory() - # We create a datasette folder in there to get a nicer now deploy name - datasette_dir = os.path.join(tmp.name, name) - os.mkdir(datasette_dir) - saved_cwd = os.getcwd() - file_paths = [ - os.path.join(saved_cwd, name) - for name in files - ] - file_names = [os.path.split(f)[-1] for f in files] - if metadata: - metadata_content = json.load(metadata) - else: - metadata_content = {} - for key, value in extra_metadata.items(): - if value: - metadata_content[key] = value - try: - dockerfile = make_dockerfile(file_names, metadata_content and 'metadata.json', extra_options) - os.chdir(datasette_dir) - if metadata_content: - open('metadata.json', 'w').write(json.dumps(metadata_content, indent=2)) - open('Dockerfile', 'w').write(dockerfile) - for path, filename in zip(file_paths, file_names): - os.link(path, os.path.join(datasette_dir, filename)) - yield - finally: - tmp.cleanup() - os.chdir(saved_cwd) - def get_all_foreign_keys(conn): tables = [r[0] for r in conn.execute('select name from sqlite_master where type="table"')] @@ -482,3 +259,123 @@ def get_all_foreign_keys(conn): }) return table_to_foreign_keys + + +def detect_fts(conn, table, return_sql=False): + "Detect if table has a corresponding FTS virtual table and return it" + rows = conn.execute(detect_fts_sql(table)).fetchall() + if len(rows) == 0: + return None + else: + return rows[0][0] + + +def detect_fts_sql(table): + return r''' + select name from sqlite_master + where rootpage = 0 + and ( + sql like '%VIRTUAL TABLE%USING FTS%content="{table}"%' + or tbl_name = "{table}" + ) + '''.format(table=table) + + +class Filter: + def __init__(self, key, sql_template, human_template, format='{}', numeric=False, no_argument=False): + self.key = key + self.sql_template = sql_template + self.human_template = human_template + self.format = format + self.numeric = numeric + self.no_argument = no_argument + + def where_clause(self, column, value, param_counter): + converted = self.format.format(value) + if self.numeric and converted.isdigit(): + converted = int(converted) + if self.no_argument: + kwargs = { + 'c': column, + } + converted = None + else: + kwargs = { + 'c': column, + 'p': 'p{}'.format(param_counter), + } + return self.sql_template.format(**kwargs), converted + + def human_clause(self, column, value): + if callable(self.human_template): + template = self.human_template(column, value) + else: + template = self.human_template + if self.no_argument: + return template.format(c=column) + else: + return template.format(c=column, v=value) + + +class Filters: + _filters = [ + Filter('exact', '"{c}" = :{p}', lambda c, v: '{c} = {v}' if v.isdigit() else '{c} = "{v}"'), + Filter('contains', '"{c}" like :{p}', '{c} contains "{v}"', format='%{}%'), + Filter('endswith', '"{c}" like :{p}', '{c} ends with "{v}"', format='%{}'), + Filter('startswith', '"{c}" like :{p}', '{c} starts with "{v}"', format='{}%'), + Filter('gt', '"{c}" > :{p}', '{c} > {v}', numeric=True), + Filter('gte', '"{c}" >= :{p}', '{c} \u2265 {v}', numeric=True), + Filter('lt', '"{c}" < :{p}', '{c} < {v}', numeric=True), + Filter('lte', '"{c}" <= :{p}', '{c} \u2264 {v}', numeric=True), + Filter('glob', '"{c}" glob :{p}', '{c} glob "{v}"'), + Filter('like', '"{c}" like :{p}', '{c} like "{v}"'), + Filter('isnull', '"{c}" is null', '{c} is null', no_argument=True), + Filter('notnull', '"{c}" is not null', '{c} is not null', no_argument=True), + Filter('isblank', '("{c}" is null or "{c}" = "")', '{c} is blank', no_argument=True), + Filter('notblank', '("{c}" is not null and "{c}" != "")', '{c} is not blank', no_argument=True), + ] + _filters_by_key = { + f.key: f for f in _filters + } + + def __init__(self, pairs): + self.pairs = pairs + + def human_description(self): + bits = [] + for key, value in self.pairs: + if '__' in key: + column, lookup = key.rsplit('__', 1) + else: + column = key + lookup = 'exact' + filter = self._filters_by_key.get(lookup, None) + if filter: + bits.append(filter.human_clause(column, value)) + # Comma separated, with an ' and ' at the end + and_bits = [] + commas, tail = bits[:-1], bits[-1:] + if commas: + and_bits.append(', '.join(commas)) + if tail: + and_bits.append(tail[0]) + return ' and '.join(and_bits) + + def build_where_clauses(self): + sql_bits = [] + params = {} + for i, (key, value) in enumerate(self.pairs): + if '__' in key: + column, lookup = key.rsplit('__', 1) + else: + column = key + lookup = 'exact' + filter = self._filters_by_key.get(lookup, None) + if filter: + sql_bit, param = filter.where_clause(column, value, i) + sql_bits.append(sql_bit) + if param is not None: + param_id = 'p{}'.format(i) + params[param_id] = param + return sql_bits, params + return ' and '.join(sql_bits), params diff --git a/tests/test_app.py b/tests/test_app.py index 17d1fc30..003b5aca 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -4,6 +4,7 @@ import pytest import sqlite3 import tempfile import time +import urllib.parse @pytest.fixture(scope='module') @@ -227,6 +228,38 @@ def test_row(app_client): assert [{'pk': '1', 'content': 'hello'}] == response.json['rows'] +def test_add_filter_redirects(app_client): + filter_args = urllib.parse.urlencode({ + '_filter_column': 'content', + '_filter_op': 'startswith', + '_filter_value': 'x' + }) + # First we need to resolve the correct path before testing more redirects + path_base = app_client.get( + '/test_tables/simple_primary_key', allow_redirects=False, gather_request=False + ).headers['Location'] + path = path_base + '?' + filter_args + response = app_client.get(path, allow_redirects=False, gather_request=False) + assert response.status == 302 + assert response.headers['Location'].endswith('?content__startswith=x') + + # Adding a redirect to an existing querystring: + path = path_base + '?foo=bar&' + filter_args + response = app_client.get(path, allow_redirects=False, gather_request=False) + assert response.status == 302 + assert response.headers['Location'].endswith('?content__startswith=x&foo=bar') + + # Test that op with a __x suffix overrides the filter value + path = path_base + '?' + urllib.parse.urlencode({ + '_filter_column': 'content', + '_filter_op': 'isnull__5', + '_filter_value': 'x' + }) + response = app_client.get(path, allow_redirects=False, gather_request=False) + assert response.status == 302 + assert response.headers['Location'].endswith('?content__isnull=5') + + TABLES = ''' CREATE TABLE simple_primary_key ( pk varchar(30) primary key, diff --git a/tests/test_utils.py b/tests/test_utils.py index 168b9253..c5c9a8ed 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,6 +4,7 @@ Tests for various datasette helper functions. from datasette import utils import pytest +import sqlite3 import json @@ -99,7 +100,8 @@ def test_custom_json_encoder(obj, expected): ), ]) def test_build_where(args, expected_where, expected_params): - sql_bits, actual_params = utils.build_where_clauses(args) + f = utils.Filters(sorted(args.items())) + sql_bits, actual_params = f.build_where_clauses() assert expected_where == sql_bits assert { 'p{}'.format(i): param @@ -124,3 +126,26 @@ def test_validate_sql_select_bad(bad_sql): ]) def test_validate_sql_select_good(good_sql): utils.validate_sql_select(good_sql) + + +def test_detect_fts(): + sql = ''' + CREATE TABLE "Dumb_Table" ( + "TreeID" INTEGER, + "qSpecies" TEXT + ); + CREATE TABLE "Street_Tree_List" ( + "TreeID" INTEGER, + "qSpecies" TEXT, + "qAddress" TEXT, + "SiteOrder" INTEGER, + "qSiteInfo" TEXT, + "PlantType" TEXT, + "qCaretaker" TEXT + ); + CREATE VIRTUAL TABLE "Street_Tree_List_fts" USING FTS4 ("qAddress", "qCaretaker", "qSpecies", content="Street_Tree_List"); + ''' + conn = sqlite3.connect(':memory:') + conn.executescript(sql) + assert None is utils.detect_fts(conn, 'Dumb_Table') + assert 'Street_Tree_List_fts' == utils.detect_fts(conn, 'Street_Tree_List')