kopia lustrzana https://github.com/simonw/datasette
Limit on max rows returned, controlled by --max_returned_rows option
If someone executes 'select * from table' against a table with a million rows in it, we could run into problems: just serializing that much data as JSON is likely to lock up the server. Solution: we now have a hard limit on the maximum number of rows that can be returned by a query. If that limit is exceeded, the server will return a `"truncated": true` field in the JSON. This limit can be optionally controlled by the new `--max_returned_rows` option. Setting that option to 0 disables the limit entirely. Closes #69pull/81/head
rodzic
6b3b05b6db
commit
8252e71da4
10
README.md
10
README.md
|
@ -85,9 +85,13 @@ http://localhost:8001/History/downloads.jsono will return that data as JSON in a
|
||||||
--debug Enable debug mode - useful for development
|
--debug Enable debug mode - useful for development
|
||||||
--reload Automatically reload if code change detected -
|
--reload Automatically reload if code change detected -
|
||||||
useful for development
|
useful for development
|
||||||
--cors Enable CORS by serving Access-Control-Allow-Origin:
|
--cors Enable CORS by serving Access-Control-Allow-
|
||||||
*
|
Origin: *
|
||||||
--inspect-file TEXT Path to JSON file created using "datasette build"
|
--page_size INTEGER Page size - default is 100
|
||||||
|
--max_returned_rows INTEGER Max allowed rows to return at once - default is
|
||||||
|
1000. Set to 0 to disable check entirely.
|
||||||
|
--inspect-file TEXT Path to JSON file created using "datasette
|
||||||
|
build"
|
||||||
-m, --metadata FILENAME Path to JSON file containing license/source
|
-m, --metadata FILENAME Path to JSON file containing license/source
|
||||||
metadata
|
metadata
|
||||||
--help Show this message and exit.
|
--help Show this message and exit.
|
||||||
|
|
|
@ -44,7 +44,7 @@ class BaseView(HTTPMethodView):
|
||||||
self.jinja = datasette.jinja
|
self.jinja = datasette.jinja
|
||||||
self.executor = datasette.executor
|
self.executor = datasette.executor
|
||||||
self.page_size = datasette.page_size
|
self.page_size = datasette.page_size
|
||||||
self.cache_headers = datasette.cache_headers
|
self.max_returned_rows = datasette.max_returned_rows
|
||||||
|
|
||||||
def options(self, request, *args, **kwargs):
|
def options(self, request, *args, **kwargs):
|
||||||
r = response.text('ok')
|
r = response.text('ok')
|
||||||
|
@ -107,7 +107,7 @@ class BaseView(HTTPMethodView):
|
||||||
return name, expected, should_redirect
|
return name, expected, should_redirect
|
||||||
return name, expected, None
|
return name, expected, None
|
||||||
|
|
||||||
async def execute(self, db_name, sql, params=None):
|
async def execute(self, db_name, sql, params=None, truncate=False):
|
||||||
"""Executes sql against db_name in a thread"""
|
"""Executes sql against db_name in a thread"""
|
||||||
def sql_operation_in_thread():
|
def sql_operation_in_thread():
|
||||||
conn = getattr(connections, db_name, None)
|
conn = getattr(connections, db_name, None)
|
||||||
|
@ -124,12 +124,24 @@ class BaseView(HTTPMethodView):
|
||||||
|
|
||||||
with sqlite_timelimit(conn, SQL_TIME_LIMIT_MS):
|
with sqlite_timelimit(conn, SQL_TIME_LIMIT_MS):
|
||||||
try:
|
try:
|
||||||
rows = conn.execute(sql, params or {})
|
cursor = conn.cursor()
|
||||||
|
cursor.execute(sql, params or {})
|
||||||
|
description = None
|
||||||
|
if self.max_returned_rows and truncate:
|
||||||
|
rows = cursor.fetchmany(self.max_returned_rows + 1)
|
||||||
|
truncated = len(rows) > self.max_returned_rows
|
||||||
|
rows = rows[:self.max_returned_rows]
|
||||||
|
else:
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
truncated = False
|
||||||
except Exception:
|
except Exception:
|
||||||
print('ERROR: conn={}, sql = {}, params = {}'.format(
|
print('ERROR: conn={}, sql = {}, params = {}'.format(
|
||||||
conn, repr(sql), params
|
conn, repr(sql), params
|
||||||
))
|
))
|
||||||
raise
|
raise
|
||||||
|
if truncate:
|
||||||
|
return rows, truncated, cursor.description
|
||||||
|
else:
|
||||||
return rows
|
return rows
|
||||||
|
|
||||||
return await asyncio.get_event_loop().run_in_executor(
|
return await asyncio.get_event_loop().run_in_executor(
|
||||||
|
@ -208,7 +220,7 @@ class BaseView(HTTPMethodView):
|
||||||
)
|
)
|
||||||
r.status = status_code
|
r.status = status_code
|
||||||
# Set far-future cache expiry
|
# Set far-future cache expiry
|
||||||
if self.cache_headers:
|
if self.ds.cache_headers:
|
||||||
r.headers['Cache-Control'] = 'max-age={}'.format(
|
r.headers['Cache-Control'] = 'max-age={}'.format(
|
||||||
365 * 24 * 60 * 60
|
365 * 24 * 60 * 60
|
||||||
)
|
)
|
||||||
|
@ -295,11 +307,12 @@ class DatabaseView(BaseView):
|
||||||
params = request.raw_args
|
params = request.raw_args
|
||||||
sql = params.pop('sql')
|
sql = params.pop('sql')
|
||||||
validate_sql_select(sql)
|
validate_sql_select(sql)
|
||||||
rows = await self.execute(name, sql, params)
|
rows, truncated, description = await self.execute(name, sql, params, truncate=True)
|
||||||
columns = [r[0] for r in rows.description]
|
columns = [r[0] for r in description]
|
||||||
return {
|
return {
|
||||||
'database': name,
|
'database': name,
|
||||||
'rows': rows,
|
'rows': rows,
|
||||||
|
'truncated': truncated,
|
||||||
'columns': columns,
|
'columns': columns,
|
||||||
'query': {
|
'query': {
|
||||||
'sql': sql,
|
'sql': sql,
|
||||||
|
@ -401,9 +414,9 @@ class TableView(BaseView):
|
||||||
select, escape_sqlite_table_name(table), where_clause, order_by, self.page_size + 1,
|
select, escape_sqlite_table_name(table), where_clause, order_by, self.page_size + 1,
|
||||||
)
|
)
|
||||||
|
|
||||||
rows = await self.execute(name, sql, params)
|
rows, truncated, description = await self.execute(name, sql, params, truncate=True)
|
||||||
|
|
||||||
columns = [r[0] for r in rows.description]
|
columns = [r[0] for r in description]
|
||||||
display_columns = columns
|
display_columns = columns
|
||||||
if use_rowid:
|
if use_rowid:
|
||||||
display_columns = display_columns[1:]
|
display_columns = display_columns[1:]
|
||||||
|
@ -422,6 +435,7 @@ class TableView(BaseView):
|
||||||
'view_definition': view_definition,
|
'view_definition': view_definition,
|
||||||
'table_definition': table_definition,
|
'table_definition': table_definition,
|
||||||
'rows': rows[:self.page_size],
|
'rows': rows[:self.page_size],
|
||||||
|
'truncated': truncated,
|
||||||
'table_rows': table_rows,
|
'table_rows': table_rows,
|
||||||
'columns': columns,
|
'columns': columns,
|
||||||
'primary_keys': pks,
|
'primary_keys': pks,
|
||||||
|
@ -480,7 +494,9 @@ class RowView(BaseView):
|
||||||
|
|
||||||
|
|
||||||
class Datasette:
|
class Datasette:
|
||||||
def __init__(self, files, num_threads=3, cache_headers=True, page_size=50, cors=False, inspect_data=None, metadata=None):
|
def __init__(
|
||||||
|
self, files, num_threads=3, cache_headers=True, page_size=100,
|
||||||
|
max_returned_rows=1000, cors=False, inspect_data=None, metadata=None):
|
||||||
self.files = files
|
self.files = files
|
||||||
self.num_threads = num_threads
|
self.num_threads = num_threads
|
||||||
self.executor = futures.ThreadPoolExecutor(
|
self.executor = futures.ThreadPoolExecutor(
|
||||||
|
@ -488,6 +504,7 @@ class Datasette:
|
||||||
)
|
)
|
||||||
self.cache_headers = cache_headers
|
self.cache_headers = cache_headers
|
||||||
self.page_size = page_size
|
self.page_size = page_size
|
||||||
|
self.max_returned_rows = max_returned_rows
|
||||||
self.cors = cors
|
self.cors = cors
|
||||||
self._inspect = inspect_data
|
self._inspect = inspect_data
|
||||||
self.metadata = metadata or {}
|
self.metadata = metadata or {}
|
||||||
|
|
|
@ -97,9 +97,11 @@ def package(files, tag, metadata):
|
||||||
@click.option('--debug', is_flag=True, help='Enable debug mode - useful for development')
|
@click.option('--debug', is_flag=True, help='Enable debug mode - useful for development')
|
||||||
@click.option('--reload', is_flag=True, help='Automatically reload if code change detected - useful for development')
|
@click.option('--reload', is_flag=True, help='Automatically reload if code change detected - useful for development')
|
||||||
@click.option('--cors', is_flag=True, help='Enable CORS by serving Access-Control-Allow-Origin: *')
|
@click.option('--cors', is_flag=True, help='Enable CORS by serving Access-Control-Allow-Origin: *')
|
||||||
|
@click.option('--page_size', default=100, help='Page size - default is 100')
|
||||||
|
@click.option('--max_returned_rows', default=1000, help='Max allowed rows to return at once - default is 1000. Set to 0 to disable check entirely.')
|
||||||
@click.option('--inspect-file', help='Path to JSON file created using "datasette build"')
|
@click.option('--inspect-file', help='Path to JSON file created using "datasette build"')
|
||||||
@click.option('-m', '--metadata', type=click.File(mode='r'), help='Path to JSON file containing license/source metadata')
|
@click.option('-m', '--metadata', type=click.File(mode='r'), help='Path to JSON file containing license/source metadata')
|
||||||
def serve(files, host, port, debug, reload, cors, inspect_file, metadata):
|
def serve(files, host, port, debug, reload, cors, page_size, max_returned_rows, inspect_file, metadata):
|
||||||
"""Serve up specified SQLite database files with a web UI"""
|
"""Serve up specified SQLite database files with a web UI"""
|
||||||
if reload:
|
if reload:
|
||||||
import hupper
|
import hupper
|
||||||
|
@ -118,6 +120,8 @@ def serve(files, host, port, debug, reload, cors, inspect_file, metadata):
|
||||||
files,
|
files,
|
||||||
cache_headers=not debug and not reload,
|
cache_headers=not debug and not reload,
|
||||||
cors=cors,
|
cors=cors,
|
||||||
|
page_size=page_size,
|
||||||
|
max_returned_rows=max_returned_rows,
|
||||||
inspect_data=inspect_data,
|
inspect_data=inspect_data,
|
||||||
metadata=metadata_data,
|
metadata=metadata_data,
|
||||||
)
|
)
|
||||||
|
|
|
@ -30,6 +30,10 @@
|
||||||
<p><input type="submit" value="Run SQL"></p>
|
<p><input type="submit" value="Run SQL"></p>
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
|
{% if truncated %}
|
||||||
|
<div style="padding: 1em; margin: 1em 0; border: 3px solid red;">These results were truncated. You will need to apply OFFSET/LIMIT to see the whole result set.</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
{% if rows %}
|
{% if rows %}
|
||||||
<table>
|
<table>
|
||||||
<thead>
|
<thead>
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
<h1 style="padding-left: 10px; border-left: 10px solid #{{ database_hash and database_hash[:6] }}">{{ database }}</h1>
|
<h1 style="padding-left: 10px; border-left: 10px solid #{{ database_hash and database_hash[:6] }}">{{ database }}</h1>
|
||||||
|
|
||||||
{% if error %}
|
{% if error %}
|
||||||
<div style="padding: 1em; margin: 1em; border: 3px solid red;">{{ error }}</div>
|
<div style="padding: 1em; margin: 1em 0; border: 3px solid red;">{{ error }}</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
|
@ -12,7 +12,7 @@ def app_client():
|
||||||
conn = sqlite3.connect(filepath)
|
conn = sqlite3.connect(filepath)
|
||||||
conn.executescript(TABLES)
|
conn.executescript(TABLES)
|
||||||
os.chdir(os.path.dirname(filepath))
|
os.chdir(os.path.dirname(filepath))
|
||||||
yield Datasette([filepath]).app().test_client
|
yield Datasette([filepath], page_size=50, max_returned_rows=100).app().test_client
|
||||||
|
|
||||||
|
|
||||||
def test_homepage(app_client):
|
def test_homepage(app_client):
|
||||||
|
@ -49,7 +49,7 @@ def test_database_page(app_client):
|
||||||
}, {
|
}, {
|
||||||
'columns': ['content'],
|
'columns': ['content'],
|
||||||
'name': 'no_primary_key',
|
'name': 'no_primary_key',
|
||||||
'table_rows': 0,
|
'table_rows': 201,
|
||||||
}, {
|
}, {
|
||||||
'columns': ['pk', 'content'],
|
'columns': ['pk', 'content'],
|
||||||
'name': 'simple_primary_key',
|
'name': 'simple_primary_key',
|
||||||
|
@ -76,6 +76,7 @@ def test_custom_sql(app_client):
|
||||||
] == data['rows']
|
] == data['rows']
|
||||||
assert ['content'] == data['columns']
|
assert ['content'] == data['columns']
|
||||||
assert 'test_tables' == data['database']
|
assert 'test_tables' == data['database']
|
||||||
|
assert not data['truncated']
|
||||||
|
|
||||||
|
|
||||||
def test_invalid_custom_sql(app_client):
|
def test_invalid_custom_sql(app_client):
|
||||||
|
@ -121,6 +122,19 @@ def test_table_with_slashes_in_name(app_client):
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
def test_max_returned_rows(app_client):
|
||||||
|
_, response = app_client.get(
|
||||||
|
'/test_tables.jsono?sql=select+content+from+no_primary_key'
|
||||||
|
)
|
||||||
|
data = response.json
|
||||||
|
assert {
|
||||||
|
'sql': 'select content from no_primary_key',
|
||||||
|
'params': {}
|
||||||
|
} == data['query']
|
||||||
|
assert data['truncated']
|
||||||
|
assert 100 == len(data['rows'])
|
||||||
|
|
||||||
|
|
||||||
def test_view(app_client):
|
def test_view(app_client):
|
||||||
_, response = app_client.get('/test_tables/simple_view')
|
_, response = app_client.get('/test_tables/simple_view')
|
||||||
assert response.status == 200
|
assert response.status == 200
|
||||||
|
@ -153,6 +167,14 @@ CREATE TABLE no_primary_key (
|
||||||
content text
|
content text
|
||||||
);
|
);
|
||||||
|
|
||||||
|
WITH RECURSIVE
|
||||||
|
cnt(x) AS (
|
||||||
|
SELECT 1
|
||||||
|
UNION ALL
|
||||||
|
SELECT x+1 FROM cnt LIMIT 201
|
||||||
|
)
|
||||||
|
INSERT INTO no_primary_key SELECT * from cnt;
|
||||||
|
|
||||||
CREATE TABLE "Table With Space In Name" (
|
CREATE TABLE "Table With Space In Name" (
|
||||||
pk varchar(30) primary key,
|
pk varchar(30) primary key,
|
||||||
content text
|
content text
|
||||||
|
|
Ładowanie…
Reference in New Issue