Limit on max rows returned, controlled by --max_returned_rows option

If someone executes 'select * from table' against a table with a million rows
in it, we could run into problems: just serializing that much data as JSON is
likely to lock up the server.

Solution: we now have a hard limit on the maximum number of rows that can be
returned by a query. If that limit is exceeded, the server will return a
`"truncated": true` field in the JSON.

This limit can be optionally controlled by the new `--max_returned_rows`
option. Setting that option to 0 disables the limit entirely.

Closes #69
pull/81/head
Simon Willison 2017-11-13 11:33:01 -08:00
rodzic 6b3b05b6db
commit 8252e71da4
6 zmienionych plików z 76 dodań i 25 usunięć

Wyświetl plik

@ -80,17 +80,21 @@ http://localhost:8001/History/downloads.jsono will return that data as JSON in a
Serve up specified SQLite database files with a web UI
Options:
-h, --host TEXT host for server, defaults to 0.0.0.0
-p, --port INTEGER port for server, defaults to 8001
--debug Enable debug mode - useful for development
--reload Automatically reload if code change detected -
useful for development
--cors Enable CORS by serving Access-Control-Allow-Origin:
*
--inspect-file TEXT Path to JSON file created using "datasette build"
-m, --metadata FILENAME Path to JSON file containing license/source
metadata
--help Show this message and exit.
-h, --host TEXT host for server, defaults to 0.0.0.0
-p, --port INTEGER port for server, defaults to 8001
--debug Enable debug mode - useful for development
--reload Automatically reload if code change detected -
useful for development
--cors Enable CORS by serving Access-Control-Allow-
Origin: *
--page_size INTEGER Page size - default is 100
--max_returned_rows INTEGER Max allowed rows to return at once - default is
1000. Set to 0 to disable check entirely.
--inspect-file TEXT Path to JSON file created using "datasette
build"
-m, --metadata FILENAME Path to JSON file containing license/source
metadata
--help Show this message and exit.
## metadata.json

Wyświetl plik

@ -44,7 +44,7 @@ class BaseView(HTTPMethodView):
self.jinja = datasette.jinja
self.executor = datasette.executor
self.page_size = datasette.page_size
self.cache_headers = datasette.cache_headers
self.max_returned_rows = datasette.max_returned_rows
def options(self, request, *args, **kwargs):
r = response.text('ok')
@ -107,7 +107,7 @@ class BaseView(HTTPMethodView):
return name, expected, should_redirect
return name, expected, None
async def execute(self, db_name, sql, params=None):
async def execute(self, db_name, sql, params=None, truncate=False):
"""Executes sql against db_name in a thread"""
def sql_operation_in_thread():
conn = getattr(connections, db_name, None)
@ -124,13 +124,25 @@ class BaseView(HTTPMethodView):
with sqlite_timelimit(conn, SQL_TIME_LIMIT_MS):
try:
rows = conn.execute(sql, params or {})
cursor = conn.cursor()
cursor.execute(sql, params or {})
description = None
if self.max_returned_rows and truncate:
rows = cursor.fetchmany(self.max_returned_rows + 1)
truncated = len(rows) > self.max_returned_rows
rows = rows[:self.max_returned_rows]
else:
rows = cursor.fetchall()
truncated = False
except Exception:
print('ERROR: conn={}, sql = {}, params = {}'.format(
conn, repr(sql), params
))
raise
return rows
if truncate:
return rows, truncated, cursor.description
else:
return rows
return await asyncio.get_event_loop().run_in_executor(
self.executor, sql_operation_in_thread
@ -208,7 +220,7 @@ class BaseView(HTTPMethodView):
)
r.status = status_code
# Set far-future cache expiry
if self.cache_headers:
if self.ds.cache_headers:
r.headers['Cache-Control'] = 'max-age={}'.format(
365 * 24 * 60 * 60
)
@ -295,11 +307,12 @@ class DatabaseView(BaseView):
params = request.raw_args
sql = params.pop('sql')
validate_sql_select(sql)
rows = await self.execute(name, sql, params)
columns = [r[0] for r in rows.description]
rows, truncated, description = await self.execute(name, sql, params, truncate=True)
columns = [r[0] for r in description]
return {
'database': name,
'rows': rows,
'truncated': truncated,
'columns': columns,
'query': {
'sql': sql,
@ -401,9 +414,9 @@ class TableView(BaseView):
select, escape_sqlite_table_name(table), where_clause, order_by, self.page_size + 1,
)
rows = await self.execute(name, sql, params)
rows, truncated, description = await self.execute(name, sql, params, truncate=True)
columns = [r[0] for r in rows.description]
columns = [r[0] for r in description]
display_columns = columns
if use_rowid:
display_columns = display_columns[1:]
@ -422,6 +435,7 @@ class TableView(BaseView):
'view_definition': view_definition,
'table_definition': table_definition,
'rows': rows[:self.page_size],
'truncated': truncated,
'table_rows': table_rows,
'columns': columns,
'primary_keys': pks,
@ -480,7 +494,9 @@ class RowView(BaseView):
class Datasette:
def __init__(self, files, num_threads=3, cache_headers=True, page_size=50, cors=False, inspect_data=None, metadata=None):
def __init__(
self, files, num_threads=3, cache_headers=True, page_size=100,
max_returned_rows=1000, cors=False, inspect_data=None, metadata=None):
self.files = files
self.num_threads = num_threads
self.executor = futures.ThreadPoolExecutor(
@ -488,6 +504,7 @@ class Datasette:
)
self.cache_headers = cache_headers
self.page_size = page_size
self.max_returned_rows = max_returned_rows
self.cors = cors
self._inspect = inspect_data
self.metadata = metadata or {}

Wyświetl plik

@ -97,9 +97,11 @@ def package(files, tag, metadata):
@click.option('--debug', is_flag=True, help='Enable debug mode - useful for development')
@click.option('--reload', is_flag=True, help='Automatically reload if code change detected - useful for development')
@click.option('--cors', is_flag=True, help='Enable CORS by serving Access-Control-Allow-Origin: *')
@click.option('--page_size', default=100, help='Page size - default is 100')
@click.option('--max_returned_rows', default=1000, help='Max allowed rows to return at once - default is 1000. Set to 0 to disable check entirely.')
@click.option('--inspect-file', help='Path to JSON file created using "datasette build"')
@click.option('-m', '--metadata', type=click.File(mode='r'), help='Path to JSON file containing license/source metadata')
def serve(files, host, port, debug, reload, cors, inspect_file, metadata):
def serve(files, host, port, debug, reload, cors, page_size, max_returned_rows, inspect_file, metadata):
"""Serve up specified SQLite database files with a web UI"""
if reload:
import hupper
@ -118,6 +120,8 @@ def serve(files, host, port, debug, reload, cors, inspect_file, metadata):
files,
cache_headers=not debug and not reload,
cors=cors,
page_size=page_size,
max_returned_rows=max_returned_rows,
inspect_data=inspect_data,
metadata=metadata_data,
)

Wyświetl plik

@ -30,6 +30,10 @@
<p><input type="submit" value="Run SQL"></p>
</form>
{% if truncated %}
<div style="padding: 1em; margin: 1em 0; border: 3px solid red;">These results were truncated. You will need to apply OFFSET/LIMIT to see the whole result set.</div>
{% endif %}
{% if rows %}
<table>
<thead>

Wyświetl plik

@ -8,7 +8,7 @@
<h1 style="padding-left: 10px; border-left: 10px solid #{{ database_hash and database_hash[:6] }}">{{ database }}</h1>
{% if error %}
<div style="padding: 1em; margin: 1em; border: 3px solid red;">{{ error }}</div>
<div style="padding: 1em; margin: 1em 0; border: 3px solid red;">{{ error }}</div>
{% endif %}
{% endblock %}

Wyświetl plik

@ -12,7 +12,7 @@ def app_client():
conn = sqlite3.connect(filepath)
conn.executescript(TABLES)
os.chdir(os.path.dirname(filepath))
yield Datasette([filepath]).app().test_client
yield Datasette([filepath], page_size=50, max_returned_rows=100).app().test_client
def test_homepage(app_client):
@ -49,7 +49,7 @@ def test_database_page(app_client):
}, {
'columns': ['content'],
'name': 'no_primary_key',
'table_rows': 0,
'table_rows': 201,
}, {
'columns': ['pk', 'content'],
'name': 'simple_primary_key',
@ -76,6 +76,7 @@ def test_custom_sql(app_client):
] == data['rows']
assert ['content'] == data['columns']
assert 'test_tables' == data['database']
assert not data['truncated']
def test_invalid_custom_sql(app_client):
@ -121,6 +122,19 @@ def test_table_with_slashes_in_name(app_client):
}]
def test_max_returned_rows(app_client):
_, response = app_client.get(
'/test_tables.jsono?sql=select+content+from+no_primary_key'
)
data = response.json
assert {
'sql': 'select content from no_primary_key',
'params': {}
} == data['query']
assert data['truncated']
assert 100 == len(data['rows'])
def test_view(app_client):
_, response = app_client.get('/test_tables/simple_view')
assert response.status == 200
@ -153,6 +167,14 @@ CREATE TABLE no_primary_key (
content text
);
WITH RECURSIVE
cnt(x) AS (
SELECT 1
UNION ALL
SELECT x+1 FROM cnt LIMIT 201
)
INSERT INTO no_primary_key SELECT * from cnt;
CREATE TABLE "Table With Space In Name" (
pk varchar(30) primary key,
content text