Initial implementation of facets, plus tests and docs

Refs #255
py35
Simon Willison 2018-05-12 19:29:06 -03:00
rodzic 70ff615f1b
commit eaaa3ea149
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 17E2DEA2588B7F52
7 zmienionych plików z 217 dodań i 4 usunięć

Wyświetl plik

@ -733,8 +733,8 @@ class TableView(RowTableShared):
forward_querystring=False
)
units = self.table_metadata(name, table).get('units', {})
table_metadata = self.table_metadata(name, table)
units = table_metadata.get('units', {})
filters = Filters(sorted(other_args.items()), units, ureg)
where_clauses, params = filters.build_where_clauses()
@ -799,12 +799,13 @@ class TableView(RowTableShared):
raise DatasetteError('Cannot use _sort and _sort_desc at the same time')
order_by = '{} desc'.format(escape_sqlite(sort_desc))
count_sql = 'select count(*) from {table_name} {where}'.format(
from_sql = 'from {table_name} {where}'.format(
table_name=escape_sqlite(table),
where=(
'where {} '.format(' and '.join(where_clauses))
) if where_clauses else '',
)
count_sql = 'select count(*) {}'.format(from_sql)
_next = special_args.get('_next')
offset = ''
@ -933,6 +934,39 @@ class TableView(RowTableShared):
name, sql, params, truncate=True, **extra_args
)
# facets support
try:
facets = request.args['_facet']
except KeyError:
facets = table_metadata.get('facets', [])
facet_results = {}
for column in facets:
facet_sql = '''
select {col} as value, count(*) as count
{from_sql}
group by {col} order by count desc limit 20
'''.format(col=escape_sqlite(column), from_sql=from_sql)
try:
facet_rows = await self.execute(
name,
facet_sql,
params,
truncate=False,
custom_time_limit=200
)
facet_results[column] = [{
'value': row['value'],
'count': row['count'],
'toggle_url': urllib.parse.urljoin(
request.url, path_with_added_args(
request, {column: row['value']}
)
)
} for row in facet_rows]
except sqlite3.OperationalError:
# Hit time limit
pass
columns = [r[0] for r in description]
rows = list(rows)
@ -1043,6 +1077,7 @@ class TableView(RowTableShared):
'sql': sql,
'params': params,
},
'facet_results': facet_results,
'next': next_value and str(next_value) or None,
'next_url': next_url,
}, extra_template, (

Wyświetl plik

@ -91,6 +91,15 @@
<p>This data as <a href="{{ url_json }}">.json</a></p>
{% for facet_name, facet_values in facet_results.items() %}
<p><strong>{{ facet_name }}</strong></p>
<ul>
{% for facet_value in facet_values %}
<li><a href="{{ facet_value.toggle_url }}">{{ facet_value.value }}</a> ({{ facet_value.count }})</li>
{% endfor %}
</ul>
{% endfor %}
{% include custom_rows_and_columns_templates %}
{% if next_url %}

70
docs/facets.rst 100644
Wyświetl plik

@ -0,0 +1,70 @@
.. _facets:
Facets
======
This feature is currently under development, see `#255 <https://github.com/simonw/datasette/issues/255>`_
Datasette facets can be used to add a faceted browse interface to any Datasette table. With facets, tables are displayed along with a summary showing the most common values in specified columns. These values can be selected to further filter the table.
Facets can be specified in two ways: using queryset parameters, or in ``metadata.json`` configuration for the table.
Facets in querystrings
----------------------
To turn on faceting for specific columns on a Datasette table view, add one or more ``_facet=COLUMN`` parameters to the URL. For example, if you want to turn on facets for the ``city`` and ``state`` columns, construct a URL that looks like this::
/dbname/tablename?_facet=state&_facet=city
This works for both the HTML interface and the ``.json`` view. When enabled, facets will cause a ``facet_results`` block to be added to the JSON output, looking something like this::
"facet_results": {
"state": [
{
"value": "CA",
"count": 10,
"toggle_url": "http://...&state=CA"
},
{
"value": "MI",
"count": 4,
"toggle_url": "http://...&state=MI"
}
],
"city": [
{
"value": "San Francisco",
"count": 6,
"toggle_url": "http://...=San+Francisco"
},
{
"value": "Detroit",
"count": 4,
"toggle_url": "http://...&city=Detroit"
},
{
"value": "Los Angeles",
"count": 4,
"toggle_url": "http://...=Los+Angeles"
}
]
}
Facets in metadata.json
-----------------------
You can turn facets on by default for specific tables by adding them to a ``"facets"`` key in a Datasette :ref:`metadata` file.
Here's an example that turns on faceting by default for the ``qLegalStatus`` column in the ``Street_Tree_List`` table in the ``sf-trees`` database::
{
"databases": {
"sf-trees": {
"tables": {
"Street_Tree_List": {
"facets": ["qLegalStatus"]
}
}
}
}
}

Wyświetl plik

@ -19,6 +19,7 @@ Contents
getting_started
json_api
sql_queries
facets
full_text_search
metadata
custom_templates

Wyświetl plik

@ -1,3 +1,5 @@
.. _metadata:
Metadata
========

Wyświetl plik

@ -267,6 +267,29 @@ CREATE TABLE [select] (
);
INSERT INTO [select] VALUES ('group', 'having', 'and');
CREATE TABLE facetable (
pk integer primary key,
state text,
city text,
neighborhood text
);
INSERT INTO facetable (state, city, neighborhood) VALUES
('CA', 'San Francisco', 'Mission'),
('CA', 'San Francisco', 'Dogpatch'),
('CA', 'San Francisco', 'SOMA'),
('CA', 'San Francisco', 'Tenderloin'),
('CA', 'San Francisco', 'Bernal Heights'),
('CA', 'San Francisco', 'Hayes Valley'),
('CA', 'Los Angeles', 'Hollywood'),
('CA', 'Los Angeles', 'Downtown'),
('CA', 'Los Angeles', 'Los Feliz'),
('CA', 'Los Angeles', 'Koreatown'),
('MI', 'Detroit', 'Downtown'),
('MI', 'Detroit', 'Greektown'),
('MI', 'Detroit', 'Corktown'),
('MI', 'Detroit', 'Mexicantown')
;
INSERT INTO simple_primary_key VALUES (1, 'hello');
INSERT INTO simple_primary_key VALUES (2, 'world');
INSERT INTO simple_primary_key VALUES (3, '');

Wyświetl plik

@ -19,7 +19,7 @@ def test_homepage(app_client):
assert response.json.keys() == {'test_tables': 0}.keys()
d = response.json['test_tables']
assert d['name'] == 'test_tables'
assert d['tables_count'] == 15
assert d['tables_count'] == 16
def test_database_page(app_client):
@ -104,6 +104,15 @@ def test_database_page(app_client):
'label_column': None,
'fts_table': None,
'primary_keys': ['pk'],
}, {
'columns': ['pk', 'state', 'city', 'neighborhood'],
'name': 'facetable',
'count': 14,
'foreign_keys': {'incoming': [], 'outgoing': []},
'fts_table': None,
'hidden': False,
'label_column': None,
'primary_keys': ['pk'],
}, {
'columns': ['pk', 'foreign_key_with_label', 'foreign_key_with_no_label'],
'name': 'foreign_key_references',
@ -878,3 +887,67 @@ def test_page_size_matching_max_returned_rows(app_client_returend_rows_matches_p
assert len(response.json['rows']) in (1, 50)
path = response.json['next_url']
assert 201 == len(fetched)
@pytest.mark.parametrize('path,expected_facet_results', [
(
"/test_tables/facetable.json?_facet=state&_facet=city",
{
"state": [
{
"value": "CA",
"count": 10,
"toggle_url": "_facet=state&_facet=city&state=CA",
},
{
"value": "MI",
"count": 4,
"toggle_url": "_facet=state&_facet=city&state=MI",
},
],
"city": [
{
"value": "San Francisco",
"count": 6,
"toggle_url": "_facet=state&_facet=city&city=San+Francisco",
},
{
"value": "Detroit",
"count": 4,
"toggle_url": "_facet=state&_facet=city&city=Detroit",
},
{
"value": "Los Angeles",
"count": 4,
"toggle_url": "_facet=state&_facet=city&city=Los+Angeles",
},
],
},
), (
"/test_tables/facetable.json?_facet=state&_facet=city&state=MI",
{
"state": [
{
"value": "MI",
"count": 4,
"toggle_url": "_facet=state&_facet=city&state=MI",
},
],
"city": [
{
"value": "Detroit",
"count": 4,
"toggle_url": "_facet=state&_facet=city&state=MI&city=Detroit",
},
],
},
)
])
def test_facets(app_client, path, expected_facet_results):
response = app_client.get(path, gather_request=False)
facet_results = response.json['facet_results']
# We only compare the querystring portion of the taggle_url
for facet_name, facet_values in facet_results.items():
for facet_value in facet_values:
facet_value['toggle_url'] = facet_value['toggle_url'].split('?')[1]
assert expected_facet_results == facet_results