datasette/datasette/views/table.py

696 wiersze
29 KiB
Python
Czysty Zwykły widok Historia

from sanic.request import RequestParameters
from .base import BaseView, DatasetteError, ureg
from sanic.exceptions import NotFound
from datasette.utils import (
Filters,
compound_keys_after_sql,
escape_sqlite,
filters_should_redirect,
is_url,
path_from_row_pks,
path_with_added_args,
to_css_class,
urlsafe_components,
)
import sqlite3
import jinja2
import urllib
class RowTableShared(BaseView):
def sortable_columns_for_table(self, name, table, use_rowid):
table_metadata = self.table_metadata(name, table)
if 'sortable_columns' in table_metadata:
sortable_columns = set(table_metadata['sortable_columns'])
else:
table_info = self.ds.inspect()[name]['tables'].get(table) or {}
sortable_columns = set(table_info.get('columns', []))
if use_rowid:
sortable_columns.add('rowid')
return sortable_columns
async def display_columns_and_rows(self, database, table, description, rows, link_column=False, expand_foreign_keys=True):
"Returns columns, rows for specified table - including fancy foreign key treatment"
table_metadata = self.table_metadata(database, table)
info = self.ds.inspect()[database]
sortable_columns = self.sortable_columns_for_table(database, table, True)
columns = [{
'name': r[0],
'sortable': r[0] in sortable_columns,
} for r in description]
tables = info['tables']
table_info = tables.get(table) or {}
pks = table_info.get('primary_keys') or []
# Prefetch foreign key resolutions for later expansion:
fks = {}
labeled_fks = {}
if table_info and expand_foreign_keys:
foreign_keys = table_info['foreign_keys']['outgoing']
for fk in foreign_keys:
label_column = (
# First look in metadata.json definition for this foreign key table:
self.table_metadata(database, fk['other_table']).get('label_column')
# Fall back to label_column from .inspect() detection:
or tables.get(fk['other_table'], {}).get('label_column')
)
if not label_column:
# No label for this FK
fks[fk['column']] = fk['other_table']
continue
ids_to_lookup = set([row[fk['column']] for row in rows])
sql = 'select "{other_column}", "{label_column}" from {other_table} where "{other_column}" in ({placeholders})'.format(
other_column=fk['other_column'],
label_column=label_column,
other_table=escape_sqlite(fk['other_table']),
placeholders=', '.join(['?'] * len(ids_to_lookup)),
)
try:
results = await self.execute(database, sql, list(set(ids_to_lookup)))
except sqlite3.OperationalError:
# Probably hit the timelimit
pass
else:
for id, value in results:
labeled_fks[(fk['column'], id)] = (fk['other_table'], value)
cell_rows = []
for row in rows:
cells = []
# Unless we are a view, the first column is a link - either to the rowid
# or to the simple or compound primary key
if link_column:
cells.append({
'column': pks[0] if len(pks) == 1 else 'Link',
'value': jinja2.Markup(
'<a href="/{database}/{table}/{flat_pks_quoted}">{flat_pks}</a>'.format(
database=database,
table=urllib.parse.quote_plus(table),
flat_pks=str(jinja2.escape(path_from_row_pks(row, pks, not pks, False))),
flat_pks_quoted=path_from_row_pks(row, pks, not pks)
)
),
})
for value, column_dict in zip(row, columns):
column = column_dict['name']
if link_column and len(pks) == 1 and column == pks[0]:
# If there's a simple primary key, don't repeat the value as it's
# already shown in the link column.
continue
if (column, value) in labeled_fks:
other_table, label = labeled_fks[(column, value)]
display_value = jinja2.Markup(
'<a href="/{database}/{table}/{link_id}">{label}</a>&nbsp;<em>{id}</em>'.format(
database=database,
table=urllib.parse.quote_plus(other_table),
link_id=urllib.parse.quote_plus(str(value)),
id=str(jinja2.escape(value)),
label=str(jinja2.escape(label)),
)
)
elif column in fks:
display_value = jinja2.Markup(
'<a href="/{database}/{table}/{link_id}">{id}</a>'.format(
database=database,
table=urllib.parse.quote_plus(fks[column]),
link_id=urllib.parse.quote_plus(str(value)),
id=str(jinja2.escape(value))))
elif value is None:
display_value = jinja2.Markup('&nbsp;')
elif is_url(str(value).strip()):
display_value = jinja2.Markup(
'<a href="{url}">{url}</a>'.format(
url=jinja2.escape(value.strip())
)
)
elif column in table_metadata.get('units', {}) and value != '':
# Interpret units using pint
value = value * ureg(table_metadata['units'][column])
# Pint uses floating point which sometimes introduces errors in the compact
# representation, which we have to round off to avoid ugliness. In the vast
# majority of cases this rounding will be inconsequential. I hope.
value = round(value.to_compact(), 6)
display_value = jinja2.Markup('{:~P}'.format(value).replace(' ', '&nbsp;'))
else:
display_value = str(value)
cells.append({
'column': column,
'value': display_value,
})
cell_rows.append(cells)
if link_column:
# Add the link column header.
# If it's a simple primary key, we have to remove and re-add that column name at
# the beginning of the header row.
if len(pks) == 1:
columns = [col for col in columns if col['name'] != pks[0]]
columns = [{
'name': pks[0] if len(pks) == 1 else 'Link',
'sortable': len(pks) == 1,
}] + columns
return columns, cell_rows
class TableView(RowTableShared):
async def data(self, request, name, hash, table):
table = urllib.parse.unquote_plus(table)
canned_query = self.ds.get_canned_query(name, table)
if canned_query is not None:
return await self.custom_sql(request, name, hash, canned_query['sql'], editable=False, canned_query=table)
is_view = bool(list(await self.execute(
name,
"SELECT count(*) from sqlite_master WHERE type = 'view' and name=:n",
{'n': table}
))[0][0])
view_definition = None
table_definition = None
if is_view:
view_definition = list(await self.execute(
name,
'select sql from sqlite_master where name = :n and type="view"',
{'n': table}
))[0][0]
else:
table_definition_rows = list(await self.execute(
name,
'select sql from sqlite_master where name = :n and type="table"',
{'n': table}
))
if not table_definition_rows:
raise NotFound('Table not found: {}'.format(table))
table_definition = table_definition_rows[0][0]
info = self.ds.inspect()
table_info = info[name]['tables'].get(table) or {}
pks = table_info.get('primary_keys') or []
use_rowid = not pks and not is_view
if use_rowid:
select = 'rowid, *'
order_by = 'rowid'
order_by_pks = 'rowid'
else:
select = '*'
order_by_pks = ', '.join([escape_sqlite(pk) for pk in pks])
order_by = order_by_pks
if is_view:
order_by = ''
# We roll our own query_string decoder because by default Sanic
# drops anything with an empty value e.g. ?name__exact=
args = RequestParameters(
urllib.parse.parse_qs(request.query_string, keep_blank_values=True)
)
# Special args start with _ and do not contain a __
# That's so if there is a column that starts with _
# it can still be queried using ?_col__exact=blah
special_args = {}
special_args_lists = {}
other_args = {}
for key, value in args.items():
if key.startswith('_') and '__' not in key:
special_args[key] = value[0]
special_args_lists[key] = value
else:
other_args[key] = value[0]
# Handle ?_filter_column and redirect, if present
redirect_params = filters_should_redirect(special_args)
if redirect_params:
return self.redirect(
request,
path_with_added_args(request, redirect_params),
forward_querystring=False
)
# Spot ?_sort_by_desc and redirect to _sort_desc=(_sort)
if '_sort_by_desc' in special_args:
return self.redirect(
request,
path_with_added_args(request, {
'_sort_desc': special_args.get('_sort'),
'_sort_by_desc': None,
'_sort': None,
}),
forward_querystring=False
)
table_metadata = self.table_metadata(name, table)
units = table_metadata.get('units', {})
filters = Filters(sorted(other_args.items()), units, ureg)
where_clauses, params = filters.build_where_clauses()
# _search support:
fts_table = info[name]['tables'].get(table, {}).get('fts_table')
search_args = dict(
pair for pair in special_args.items()
if pair[0].startswith('_search')
)
search_descriptions = []
search = ''
if fts_table and search_args:
if '_search' in search_args:
# Simple ?_search=xxx
search = search_args['_search']
where_clauses.append(
'rowid in (select rowid from [{fts_table}] where [{fts_table}] match :search)'.format(
fts_table=fts_table
)
)
search_descriptions.append('search matches "{}"'.format(search))
params['search'] = search
else:
# More complex: search against specific columns
valid_columns = set(info[name]['tables'][fts_table]['columns'])
for i, (key, search_text) in enumerate(search_args.items()):
search_col = key.split('_search_', 1)[1]
if search_col not in valid_columns:
raise DatasetteError(
'Cannot search by that column',
status=400
)
where_clauses.append(
'rowid in (select rowid from [{fts_table}] where [{search_col}] match :search_{i})'.format(
fts_table=fts_table,
search_col=search_col,
i=i,
)
)
search_descriptions.append(
'search column "{}" matches "{}"'.format(search_col, search_text)
)
params['search_{}'.format(i)] = search_text
table_rows_count = None
sortable_columns = set()
if not is_view:
table_rows_count = table_info['count']
sortable_columns = self.sortable_columns_for_table(name, table, use_rowid)
# Allow for custom sort order
sort = special_args.get('_sort')
if sort:
if sort not in sortable_columns:
raise DatasetteError('Cannot sort table by {}'.format(sort))
order_by = escape_sqlite(sort)
sort_desc = special_args.get('_sort_desc')
if sort_desc:
if sort_desc not in sortable_columns:
raise DatasetteError('Cannot sort table by {}'.format(sort_desc))
if sort:
raise DatasetteError('Cannot use _sort and _sort_desc at the same time')
order_by = '{} desc'.format(escape_sqlite(sort_desc))
from_sql = 'from {table_name} {where}'.format(
table_name=escape_sqlite(table),
where=(
'where {} '.format(' and '.join(where_clauses))
) if where_clauses else '',
)
count_sql = 'select count(*) {}'.format(from_sql)
_next = special_args.get('_next')
offset = ''
if _next:
if is_view:
# _next is an offset
offset = ' offset {}'.format(int(_next))
else:
components = urlsafe_components(_next)
# If a sort order is applied, the first of these is the sort value
if sort or sort_desc:
sort_value = components[0]
# Special case for if non-urlencoded first token was $null
if _next.split(',')[0] == '$null':
sort_value = None
components = components[1:]
# Figure out the SQL for next-based-on-primary-key first
next_by_pk_clauses = []
if use_rowid:
next_by_pk_clauses.append(
'rowid > :p{}'.format(
len(params),
)
)
params['p{}'.format(len(params))] = components[0]
else:
# Apply the tie-breaker based on primary keys
if len(components) == len(pks):
param_len = len(params)
next_by_pk_clauses.append(compound_keys_after_sql(pks, param_len))
for i, pk_value in enumerate(components):
params['p{}'.format(param_len + i)] = pk_value
# Now add the sort SQL, which may incorporate next_by_pk_clauses
if sort or sort_desc:
if sort_value is None:
if sort_desc:
# Just items where column is null ordered by pk
where_clauses.append(
'({column} is null and {next_clauses})'.format(
column=escape_sqlite(sort_desc),
next_clauses=' and '.join(next_by_pk_clauses),
)
)
else:
where_clauses.append(
'({column} is not null or ({column} is null and {next_clauses}))'.format(
column=escape_sqlite(sort),
next_clauses=' and '.join(next_by_pk_clauses),
)
)
else:
where_clauses.append(
'({column} {op} :p{p}{extra_desc_only} or ({column} = :p{p} and {next_clauses}))'.format(
column=escape_sqlite(sort or sort_desc),
op='>' if sort else '<',
p=len(params),
extra_desc_only='' if sort else ' or {column2} is null'.format(
column2=escape_sqlite(sort or sort_desc),
),
next_clauses=' and '.join(next_by_pk_clauses),
)
)
params['p{}'.format(len(params))] = sort_value
order_by = '{}, {}'.format(
order_by, order_by_pks
)
else:
where_clauses.extend(next_by_pk_clauses)
where_clause = ''
if where_clauses:
where_clause = 'where {} '.format(' and '.join(where_clauses))
if order_by:
order_by = 'order by {} '.format(order_by)
# _group_count=col1&_group_count=col2
group_count = special_args_lists.get('_group_count') or []
if group_count:
sql = 'select {group_cols}, count(*) as "count" from {table_name} {where} group by {group_cols} order by "count" desc limit 100'.format(
group_cols=', '.join('"{}"'.format(group_count_col) for group_count_col in group_count),
table_name=escape_sqlite(table),
where=where_clause,
)
return await self.custom_sql(request, name, hash, sql, editable=True)
extra_args = {}
# Handle ?_page_size=500
page_size = request.raw_args.get('_size')
if page_size:
if page_size == 'max':
page_size = self.max_returned_rows
try:
page_size = int(page_size)
if page_size < 0:
raise ValueError
except ValueError:
raise DatasetteError(
'_size must be a positive integer',
status=400
)
if page_size > self.max_returned_rows:
raise DatasetteError(
'_size must be <= {}'.format(self.max_returned_rows),
status=400
)
extra_args['page_size'] = page_size
else:
page_size = self.page_size
sql = 'select {select} from {table_name} {where}{order_by}limit {limit}{offset}'.format(
select=select,
table_name=escape_sqlite(table),
where=where_clause,
order_by=order_by,
limit=page_size + 1,
offset=offset,
)
if request.raw_args.get('_timelimit'):
extra_args['custom_time_limit'] = int(request.raw_args['_timelimit'])
rows, truncated, description = await self.execute(
name, sql, params, truncate=True, **extra_args
)
# facets support
try:
facets = request.args['_facet']
except KeyError:
facets = table_metadata.get('facets', [])
facet_results = {}
for column in facets:
facet_sql = '''
select {col} as value, count(*) as count
{from_sql}
group by {col} order by count desc limit 20
'''.format(col=escape_sqlite(column), from_sql=from_sql)
try:
facet_rows = await self.execute(
name,
facet_sql,
params,
truncate=False,
custom_time_limit=200
)
facet_results[column] = [{
'value': row['value'],
'count': row['count'],
'toggle_url': urllib.parse.urljoin(
request.url, path_with_added_args(
request, {column: row['value']}
)
)
} for row in facet_rows]
except sqlite3.OperationalError:
# Hit time limit
pass
columns = [r[0] for r in description]
rows = list(rows)
filter_columns = columns[:]
if use_rowid and filter_columns[0] == 'rowid':
filter_columns = filter_columns[1:]
# Pagination next link
next_value = None
next_url = None
if len(rows) > page_size and page_size > 0:
if is_view:
next_value = int(_next or 0) + page_size
else:
next_value = path_from_row_pks(rows[-2], pks, use_rowid)
# If there's a sort or sort_desc, add that value as a prefix
if (sort or sort_desc) and not is_view:
prefix = rows[-2][sort or sort_desc]
if prefix is None:
prefix = '$null'
else:
prefix = urllib.parse.quote_plus(str(prefix))
next_value = '{},{}'.format(prefix, next_value)
added_args = {
'_next': next_value,
}
if sort:
added_args['_sort'] = sort
else:
added_args['_sort_desc'] = sort_desc
else:
added_args = {
'_next': next_value,
}
next_url = urllib.parse.urljoin(request.url, path_with_added_args(
request, added_args
))
rows = rows[:page_size]
# Number of filtered rows in whole set:
filtered_table_rows_count = None
if count_sql:
try:
count_rows = list(await self.execute(name, count_sql, params))
filtered_table_rows_count = count_rows[0][0]
except sqlite3.OperationalError:
# Almost certainly hit the timeout
pass
# human_description_en combines filters AND search, if provided
human_description_en = filters.human_description_en(extra=search_descriptions)
if sort or sort_desc:
sorted_by = 'sorted by {}{}'.format(
(sort or sort_desc),
' descending' if sort_desc else '',
)
human_description_en = ' '.join([
b for b in [human_description_en, sorted_by] if b
])
async def extra_template():
display_columns, display_rows = await self.display_columns_and_rows(
name, table, description, rows, link_column=not is_view, expand_foreign_keys=True
)
metadata = self.ds.metadata.get(
'databases', {}
).get(name, {}).get('tables', {}).get(table, {})
self.ds.update_with_inherited_metadata(metadata)
return {
'database_hash': hash,
'supports_search': bool(fts_table),
'search': search or '',
'use_rowid': use_rowid,
'filters': filters,
'display_columns': display_columns,
'filter_columns': filter_columns,
'display_rows': display_rows,
'is_sortable': any(c['sortable'] for c in display_columns),
'path_with_added_args': path_with_added_args,
'request': request,
'sort': sort,
'sort_desc': sort_desc,
'disable_sort': is_view,
'custom_rows_and_columns_templates': [
'_rows_and_columns-{}-{}.html'.format(to_css_class(name), to_css_class(table)),
'_rows_and_columns-table-{}-{}.html'.format(to_css_class(name), to_css_class(table)),
'_rows_and_columns.html',
],
'metadata': metadata,
}
return {
'database': name,
'table': table,
'is_view': is_view,
'view_definition': view_definition,
'table_definition': table_definition,
'human_description_en': human_description_en,
'rows': rows[:page_size],
'truncated': truncated,
'table_rows_count': table_rows_count,
'filtered_table_rows_count': filtered_table_rows_count,
'columns': columns,
'primary_keys': pks,
'units': units,
'query': {
'sql': sql,
'params': params,
},
'facet_results': facet_results,
'next': next_value and str(next_value) or None,
'next_url': next_url,
}, extra_template, (
'table-{}-{}.html'.format(to_css_class(name), to_css_class(table)),
'table.html'
)
class RowView(RowTableShared):
async def data(self, request, name, hash, table, pk_path):
table = urllib.parse.unquote_plus(table)
pk_values = urlsafe_components(pk_path)
info = self.ds.inspect()[name]
table_info = info['tables'].get(table) or {}
pks = table_info.get('primary_keys') or []
use_rowid = not pks
select = '*'
if use_rowid:
select = 'rowid, *'
pks = ['rowid']
wheres = [
'"{}"=:p{}'.format(pk, i)
for i, pk in enumerate(pks)
]
sql = 'select {} from "{}" where {}'.format(
select, table, ' AND '.join(wheres)
)
params = {}
for i, pk_value in enumerate(pk_values):
params['p{}'.format(i)] = pk_value
# rows, truncated, description = await self.execute(name, sql, params, truncate=True)
rows, truncated, description = await self.execute(name, sql, params, truncate=True)
columns = [r[0] for r in description]
rows = list(rows)
if not rows:
raise NotFound('Record not found: {}'.format(pk_values))
async def template_data():
display_columns, display_rows = await self.display_columns_and_rows(
name, table, description, rows, link_column=False, expand_foreign_keys=True
)
for column in display_columns:
column['sortable'] = False
return {
'database_hash': hash,
'foreign_key_tables': await self.foreign_key_tables(name, table, pk_values),
'display_columns': display_columns,
'display_rows': display_rows,
'custom_rows_and_columns_templates': [
'_rows_and_columns-{}-{}.html'.format(to_css_class(name), to_css_class(table)),
'_rows_and_columns-row-{}-{}.html'.format(to_css_class(name), to_css_class(table)),
'_rows_and_columns.html',
],
'metadata': self.ds.metadata.get(
'databases', {}
).get(name, {}).get('tables', {}).get(table, {}),
}
data = {
'database': name,
'table': table,
'rows': rows,
'columns': columns,
'primary_keys': pks,
'primary_key_values': pk_values,
'units': self.table_metadata(name, table).get('units', {})
}
if 'foreign_key_tables' in (request.raw_args.get('_extras') or '').split(','):
data['foreign_key_tables'] = await self.foreign_key_tables(name, table, pk_values)
return data, template_data, (
'row-{}-{}.html'.format(to_css_class(name), to_css_class(table)),
'row.html'
)
async def foreign_key_tables(self, name, table, pk_values):
if len(pk_values) != 1:
return []
table_info = self.ds.inspect()[name]['tables'].get(table)
if not table_info:
return []
foreign_keys = table_info['foreign_keys']['incoming']
if len(foreign_keys) == 0:
return []
sql = 'select ' + ', '.join([
'(select count(*) from {table} where "{column}"=:id)'.format(
table=escape_sqlite(fk['other_table']),
column=fk['other_column'],
)
for fk in foreign_keys
])
try:
rows = list(await self.execute(name, sql, {'id': pk_values[0]}))
except sqlite3.OperationalError:
# Almost certainly hit the timeout
return []
foreign_table_counts = dict(
zip(
[(fk['other_table'], fk['other_column']) for fk in foreign_keys],
list(rows[0]),
)
)
foreign_key_tables = []
for fk in foreign_keys:
count = foreign_table_counts.get((fk['other_table'], fk['other_column'])) or 0
foreign_key_tables.append({**fk, **{'count': count}})
return foreign_key_tables