?_labels= and ?_label=COL to expand foreign keys in JSON/CSV

These new querystring arguments can be used to request expanded foreign keys
in both JSON and CSV formats.

?_labels=on turns on expansions for ALL foreign key columns

?_label=COLUMN1&_label=COLUMN2 can be used to pick specific columns to expand

e.g. `Street_Tree_List.json?_label=qSpecies&_label=qLegalStatus`

    {
        "rowid": 233,
        "TreeID": 121240,
        "qLegalStatus": {
            "value" 2,
            "label": "Private"
        }
        "qSpecies": {
            "value": 16,
            "label": "Sycamore"
        }
        "qAddress": "91 Commonwealth Ave",
        ...
    }

The labels option also works for the HTML and CSV views.

HTML defaults to `?_labels=on`, so if you pass `?_labels=off` you can disable
foreign key expansion entirely - or you can use `?_label=COLUMN` to request
just specific columns.

If you expand labels on CSV you get additional columns in the output:

`/Street_Tree_List.csv?_label=qLegalStatus`

    rowid,TreeID,qLegalStatus,qLegalStatus_label...
    1,141565,1,Permitted Site...
    2,232565,2,Undocumented...

I also refactored the existing foreign key expansion code.

Closes #233. Refs #266.
asgi-first-attempt
Simon Willison 2018-06-16 15:18:57 -07:00
rodzic 0c22fa8f09
commit ed631e690b
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 17E2DEA2588B7F52
9 zmienionych plików z 276 dodań i 69 usunięć

Wyświetl plik

@ -1,4 +1,5 @@
from contextlib import contextmanager
from collections import OrderedDict
import base64
import hashlib
import imp
@ -802,6 +803,23 @@ def path_with_format(request, format, extra_qs=None):
return path
class CustomRow(OrderedDict):
# Loose imitation of sqlite3.Row which offers
# both index-based AND key-based lookups
def __init__(self, columns):
self.columns = columns
def __getitem__(self, key):
if isinstance(key, int):
return super().__getitem__(self.columns[key])
else:
return super().__getitem__(key)
def __iter__(self):
for column in self.columns:
yield self[column]
def value_as_boolean(value):
if value.lower() not in ('on', 'off', 'true', 'false', '1', '0'):
raise ValueAsBooleanError

Wyświetl plik

@ -168,11 +168,33 @@ class BaseView(RenderMixin):
except DatasetteError:
raise
# Convert rows and columns to CSV
headings = data["columns"]
# if there are columns_expanded we need to add additional headings
columns_expanded = set(data.get("columns_expanded") or [])
if columns_expanded:
headings = []
for column in data["columns"]:
headings.append(column)
if column in columns_expanded:
headings.append("{}_label".format(column))
async def stream_fn(r):
writer = csv.writer(r)
writer.writerow(data["columns"])
writer.writerow(headings)
for row in data["rows"]:
writer.writerow(row)
if not columns_expanded:
# Simple path
writer.writerow(row)
else:
# Look for {"value": "label": } dicts and expand
new_row = []
for cell in row:
if isinstance(cell, dict):
new_row.append(cell["value"])
new_row.append(cell["label"])
else:
new_row.append(cell)
writer.writerow(new_row)
content_type = "text/plain; charset=utf-8"
headers = {}
@ -208,6 +230,10 @@ class BaseView(RenderMixin):
if _format == "csv":
return await self.as_csv(request, name, hash, **kwargs)
if _format is None:
# HTML views default to expanding all forign key labels
kwargs['default_labels'] = True
extra_template_data = {}
start = time.time()
status_code = 200

Wyświetl plik

@ -9,7 +9,7 @@ from .base import BaseView, DatasetteError
class DatabaseView(BaseView):
async def data(self, request, name, hash):
async def data(self, request, name, hash, default_labels=False):
if request.args.get("sql"):
if not self.ds.config["allow_sql"]:
raise DatasetteError("sql= is not allowed", status=400)

Wyświetl plik

@ -1,3 +1,4 @@
from collections import namedtuple
import sqlite3
import urllib
@ -6,6 +7,7 @@ from sanic.exceptions import NotFound
from sanic.request import RequestParameters
from datasette.utils import (
CustomRow,
Filters,
InterruptedError,
compound_keys_after_sql,
@ -18,24 +20,45 @@ from datasette.utils import (
path_with_replaced_args,
to_css_class,
urlsafe_components,
value_as_boolean,
)
from .base import BaseView, DatasetteError, ureg
LINK_WITH_LABEL = '<a href="/{database}/{table}/{link_id}">{label}</a>&nbsp;<em>{id}</em>'
LINK_WITH_VALUE = '<a href="/{database}/{table}/{link_id}">{id}</a>'
class RowTableShared(BaseView):
def sortable_columns_for_table(self, name, table, use_rowid):
table_metadata = self.table_metadata(name, table)
def sortable_columns_for_table(self, database, table, use_rowid):
table_metadata = self.table_metadata(database, table)
if "sortable_columns" in table_metadata:
sortable_columns = set(table_metadata["sortable_columns"])
else:
table_info = self.ds.inspect()[name]["tables"].get(table) or {}
table_info = self.ds.inspect()[database]["tables"].get(table) or {}
sortable_columns = set(table_info.get("columns", []))
if use_rowid:
sortable_columns.add("rowid")
return sortable_columns
def expandable_columns(self, database, table):
# Returns list of (fk_dict, label_column-or-None) pairs for that table
tables = self.ds.inspect()[database].get("tables", {})
table_info = tables.get(table)
if not table_info:
return []
expandables = []
for fk in table_info["foreign_keys"]["outgoing"]:
label_column = (
self.table_metadata(
database, fk["other_table"]
).get("label_column")
or tables.get(fk["other_table"], {}).get("label_column")
) or None
expandables.append((fk, label_column))
return expandables
async def expand_foreign_keys(self, database, table, column, values):
"Returns dict mapping (column, value) -> label"
labeled_fks = {}
@ -60,7 +83,10 @@ class RowTableShared(BaseView):
or tables_info.get(fk["other_table"], {}).get("label_column")
)
if not label_column:
return {}
return {
(fk["column"], value): str(value)
for value in values
}
labeled_fks = {}
sql = '''
select {other_column}, {label_column}
@ -90,7 +116,6 @@ class RowTableShared(BaseView):
description,
rows,
link_column=False,
expand_foreign_keys=True,
):
"Returns columns, rows for specified table - including fancy foreign key treatment"
table_metadata = self.table_metadata(database, table)
@ -102,44 +127,12 @@ class RowTableShared(BaseView):
tables = info["tables"]
table_info = tables.get(table) or {}
pks = table_info.get("primary_keys") or []
# Prefetch foreign key resolutions for later expansion:
fks = {}
labeled_fks = {}
if table_info and expand_foreign_keys:
foreign_keys = table_info["foreign_keys"]["outgoing"]
for fk in foreign_keys:
label_column = (
# First look in metadata.json definition for this foreign key table:
self.table_metadata(database, fk["other_table"]).get("label_column")
# Fall back to label_column from .inspect() detection:
or tables.get(fk["other_table"], {}).get("label_column")
)
if not label_column:
# No label for this FK
fks[fk["column"]] = fk["other_table"]
continue
ids_to_lookup = set([row[fk["column"]] for row in rows])
sql = '''
select {other_column}, {label_column}
from {other_table}
where {other_column} in ({placeholders})
'''.format(
other_column=escape_sqlite(fk["other_column"]),
label_column=escape_sqlite(label_column),
other_table=escape_sqlite(fk["other_table"]),
placeholders=", ".join(["?"] * len(ids_to_lookup)),
)
try:
results = await self.ds.execute(
database, sql, list(set(ids_to_lookup))
)
except InterruptedError:
pass
else:
for id, value in results:
labeled_fks[(fk["column"], id)] = (fk["other_table"], value)
column_to_foreign_key_table = {
fk["column"]: fk["other_table"]
for fk in table_info.get(
"foreign_keys", {}
).get("outgoing", None) or []
}
cell_rows = []
for row in rows:
@ -172,26 +165,22 @@ class RowTableShared(BaseView):
# already shown in the link column.
continue
if (column, value) in labeled_fks:
other_table, label = labeled_fks[(column, value)]
display_value = jinja2.Markup(
'<a href="/{database}/{table}/{link_id}">{label}</a>&nbsp;<em>{id}</em>'.format(
database=database,
table=urllib.parse.quote_plus(other_table),
link_id=urllib.parse.quote_plus(str(value)),
id=str(jinja2.escape(value)),
label=str(jinja2.escape(label)),
)
)
elif column in fks:
display_value = jinja2.Markup(
'<a href="/{database}/{table}/{link_id}">{id}</a>'.format(
database=database,
table=urllib.parse.quote_plus(fks[column]),
link_id=urllib.parse.quote_plus(str(value)),
id=str(jinja2.escape(value)),
)
if isinstance(value, dict):
# It's an expanded foreign key - display link to other row
label = value["label"]
value = value["value"]
# The table we link to depends on the column
other_table = column_to_foreign_key_table[column]
link_template = (
LINK_WITH_LABEL if (label != value) else LINK_WITH_VALUE
)
display_value = jinja2.Markup(link_template.format(
database=database,
table=urllib.parse.quote_plus(other_table),
link_id=urllib.parse.quote_plus(str(value)),
id=str(jinja2.escape(value)),
label=str(jinja2.escape(label)),
))
elif value is None:
display_value = jinja2.Markup("&nbsp;")
elif is_url(str(value).strip()):
@ -231,7 +220,7 @@ class RowTableShared(BaseView):
class TableView(RowTableShared):
async def data(self, request, name, hash, table):
async def data(self, request, name, hash, table, default_labels=False):
canned_query = self.ds.get_canned_query(name, table)
if canned_query is not None:
return await self.custom_sql(
@ -583,6 +572,54 @@ class TableView(RowTableShared):
if use_rowid and filter_columns[0] == "rowid":
filter_columns = filter_columns[1:]
# Expand labeled columns if requested
columns_expanded = []
expandable_columns = self.expandable_columns(name, table)
columns_to_expand = None
try:
all_labels = value_as_boolean(special_args.get("_labels", ""))
except ValueError:
all_labels = default_labels
# Check for explicit _label=
if "_label" in request.args:
columns_to_expand = request.args["_label"]
if columns_to_expand is None and all_labels:
# expand all columns with foreign keys
columns_to_expand = [
fk["column"] for fk, _ in expandable_columns
]
if columns_to_expand:
expanded_labels = {}
for fk, label_column in expandable_columns:
column = fk["column"]
if column not in columns_to_expand:
continue
columns_expanded.append(column)
# Gather the values
column_index = columns.index(column)
values = [row[column_index] for row in rows]
# Expand them
expanded_labels.update(await self.expand_foreign_keys(
name, table, column, values
))
if expanded_labels:
# Rewrite the rows
new_rows = []
for row in rows:
new_row = CustomRow(columns)
for column in row.keys():
value = row[column]
if (column, value) in expanded_labels:
new_row[column] = {
'value': value,
'label': expanded_labels[(column, value)]
}
else:
new_row[column] = value
new_rows.append(new_row)
rows = new_rows
# Pagination next link
next_value = None
next_url = None
@ -681,7 +718,6 @@ class TableView(RowTableShared):
results.description,
rows,
link_column=not is_view,
expand_foreign_keys=True,
)
metadata = self.ds.metadata.get("databases", {}).get(name, {}).get(
"tables", {}
@ -735,6 +771,7 @@ class TableView(RowTableShared):
"truncated": results.truncated,
"table_rows_count": table_rows_count,
"filtered_table_rows_count": filtered_table_rows_count,
"columns_expanded": columns_expanded,
"columns": columns,
"primary_keys": pks,
"units": units,
@ -751,7 +788,7 @@ class TableView(RowTableShared):
class RowView(RowTableShared):
async def data(self, request, name, hash, table, pk_path):
async def data(self, request, name, hash, table, pk_path, default_labels=False):
pk_values = urlsafe_components(pk_path)
info = self.ds.inspect()[name]
table_info = info["tables"].get(table) or {}
@ -781,7 +818,6 @@ class RowView(RowTableShared):
results.description,
rows,
link_column=False,
expand_foreign_keys=True,
)
for column in display_columns:
column["sortable"] = False

Wyświetl plik

@ -163,6 +163,12 @@ Special table arguments
The Datasette table view takes a number of special querystring arguments:
``?_labels=on/off``
Expand foreign key references for every possible column. See below.
``?_label=COLUMN1&_label=COLUMN2``
Expand foreign key references for one or more specified columns.
``?_size=1000`` or ``?_size=max``
Sets a custom page size. This cannot exceed the ``max_returned_rows`` limit
passed to ``datasette serve``. Use ``max`` to get ``max_returned_rows``.
@ -193,3 +199,38 @@ The Datasette table view takes a number of special querystring arguments:
``?_next=TOKEN``
Pagination by continuation token - pass the token that was returned in the
``"next"`` property by the previous page.
``?_labels=1``
Indicates that you would like to expand any foreign key references. These
will be exposed in the JSON as ``{"value": 3, "label": "Monterey"}``.
Expanding foreign key references
--------------------------------
Datasette can detect foreign key relationships and resolve those references into
labels. The HTML interface does this by default for every detected foreign key
column - you can turn that off using ``?_labels=off``.
You can request foreign keys be expanded in JSON using the ``_labels=on`` or
``_label=COLUMN`` special querystring parameters. Here's what an expanded row
looks like::
[
{
"rowid": 1,
"TreeID": 141565,
"qLegalStatus": {
"value": 1,
"label": "Permitted Site"
},
"qSpecies": {
"value": 1,
"label": "Myoporum laetum :: Myoporum"
},
"qAddress": "501X Baker St",
"SiteOrder": 1
}
]
The column in the foreign key table that is used for the label can be specified
in ``metadata.json`` - see :ref:`label_columns`.

Wyświetl plik

@ -121,6 +121,7 @@ This will restrict sorting of ``example_table`` to just the ``height`` and
You can also disable sorting entirely by setting ``"sortable_columns": []``
.. _label_columns:
Specifying the label column for a table
---------------------------------------

Wyświetl plik

@ -1096,6 +1096,52 @@ def test_suggest_facets_off():
).json["suggested_facets"]
def test_expand_labels(app_client):
response = app_client.get(
"/test_tables/facetable.json?_shape=object&_labels=1&_size=2"
"&neighborhood__contains=c"
)
assert {
"2": {
"pk": 2,
"planet_int": 1,
"state": "CA",
"city_id": {
"value": 1,
"label": "San Francisco"
},
"neighborhood": "Dogpatch"
},
"13": {
"pk": 13,
"planet_int": 1,
"state": "MI",
"city_id": {
"value": 3,
"label": "Detroit"
},
"neighborhood": "Corktown"
}
} == response.json
def test_expand_label(app_client):
response = app_client.get(
"/test_tables/foreign_key_references.json?_shape=object"
"&_label=foreign_key_with_label"
)
assert {
"1": {
"pk": "1",
"foreign_key_with_label": {
"value": "1",
"label": "hello"
},
"foreign_key_with_no_label": "1"
}
} == response.json
@pytest.mark.parametrize('path,expected_cache_control', [
("/test_tables/facetable.json", "max-age=31536000"),
("/test_tables/facetable.json?_ttl=invalid", "max-age=31536000"),

Wyświetl plik

@ -11,6 +11,24 @@ hello
world
'''.replace('\n', '\r\n')
EXPECTED_TABLE_WITH_LABELS_CSV = '''
pk,planet_int,state,city_id,city_id_label,neighborhood
1,1,CA,1,San Francisco,Mission
2,1,CA,1,San Francisco,Dogpatch
3,1,CA,1,San Francisco,SOMA
4,1,CA,1,San Francisco,Tenderloin
5,1,CA,1,San Francisco,Bernal Heights
6,1,CA,1,San Francisco,Hayes Valley
7,1,CA,2,Los Angeles,Hollywood
8,1,CA,2,Los Angeles,Downtown
9,1,CA,2,Los Angeles,Los Feliz
10,1,CA,2,Los Angeles,Koreatown
11,1,MI,3,Detroit,Downtown
12,1,MI,3,Detroit,Greektown
13,1,MI,3,Detroit,Corktown
14,1,MI,3,Detroit,Mexicantown
15,2,MC,4,Memnonia,Arcadia Planitia
'''.lstrip().replace('\n', '\r\n')
def test_table_csv(app_client):
response = app_client.get('/test_tables/simple_primary_key.csv')
@ -19,6 +37,13 @@ def test_table_csv(app_client):
assert EXPECTED_TABLE_CSV == response.text
def test_table_csv_with_labels(app_client):
response = app_client.get('/test_tables/facetable.csv?_labels=1')
assert response.status == 200
assert 'text/plain; charset=utf-8' == response.headers['Content-Type']
assert EXPECTED_TABLE_WITH_LABELS_CSV == response.text
def test_custom_sql_csv(app_client):
response = app_client.get(
'/test_tables.csv?sql=select+content+from+simple_primary_key+limit+2'

Wyświetl plik

@ -388,6 +388,20 @@ def test_table_html_foreign_key_links(app_client):
assert expected == [[str(td) for td in tr.select('td')] for tr in table.select('tbody tr')]
def test_table_html_disable_foreign_key_links_with_labels(app_client):
response = app_client.get('/test_tables/foreign_key_references?_labels=off')
assert response.status == 200
table = Soup(response.body, 'html.parser').find('table')
expected = [
[
'<td class="col-pk"><a href="/test_tables/foreign_key_references/1">1</a></td>',
'<td class="col-foreign_key_with_label">1</td>',
'<td class="col-foreign_key_with_no_label">1</td>'
]
]
assert expected == [[str(td) for td in tr.select('td')] for tr in table.select('tbody tr')]
def test_table_html_foreign_key_custom_label_column(app_client):
response = app_client.get('/test_tables/custom_foreign_key_label')
assert response.status == 200