From eaaa3ea1498df0d4790f814e3d2f3fcc5ea28429 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 12 May 2018 19:29:06 -0300 Subject: [PATCH] Initial implementation of facets, plus tests and docs Refs #255 --- datasette/app.py | 41 +++++++++++++++++-- datasette/templates/table.html | 9 ++++ docs/facets.rst | 70 +++++++++++++++++++++++++++++++ docs/index.rst | 1 + docs/metadata.rst | 2 + tests/fixtures.py | 23 +++++++++++ tests/test_api.py | 75 +++++++++++++++++++++++++++++++++- 7 files changed, 217 insertions(+), 4 deletions(-) create mode 100644 docs/facets.rst diff --git a/datasette/app.py b/datasette/app.py index 32ef8619..beb7e924 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -733,8 +733,8 @@ class TableView(RowTableShared): forward_querystring=False ) - units = self.table_metadata(name, table).get('units', {}) - + table_metadata = self.table_metadata(name, table) + units = table_metadata.get('units', {}) filters = Filters(sorted(other_args.items()), units, ureg) where_clauses, params = filters.build_where_clauses() @@ -799,12 +799,13 @@ class TableView(RowTableShared): raise DatasetteError('Cannot use _sort and _sort_desc at the same time') order_by = '{} desc'.format(escape_sqlite(sort_desc)) - count_sql = 'select count(*) from {table_name} {where}'.format( + from_sql = 'from {table_name} {where}'.format( table_name=escape_sqlite(table), where=( 'where {} '.format(' and '.join(where_clauses)) ) if where_clauses else '', ) + count_sql = 'select count(*) {}'.format(from_sql) _next = special_args.get('_next') offset = '' @@ -933,6 +934,39 @@ class TableView(RowTableShared): name, sql, params, truncate=True, **extra_args ) + # facets support + try: + facets = request.args['_facet'] + except KeyError: + facets = table_metadata.get('facets', []) + facet_results = {} + for column in facets: + facet_sql = ''' + select {col} as value, count(*) as count + {from_sql} + group by {col} order by count desc limit 20 + '''.format(col=escape_sqlite(column), from_sql=from_sql) + try: + facet_rows = await self.execute( + name, + facet_sql, + params, + truncate=False, + custom_time_limit=200 + ) + facet_results[column] = [{ + 'value': row['value'], + 'count': row['count'], + 'toggle_url': urllib.parse.urljoin( + request.url, path_with_added_args( + request, {column: row['value']} + ) + ) + } for row in facet_rows] + except sqlite3.OperationalError: + # Hit time limit + pass + columns = [r[0] for r in description] rows = list(rows) @@ -1043,6 +1077,7 @@ class TableView(RowTableShared): 'sql': sql, 'params': params, }, + 'facet_results': facet_results, 'next': next_value and str(next_value) or None, 'next_url': next_url, }, extra_template, ( diff --git a/datasette/templates/table.html b/datasette/templates/table.html index 5a02fdb9..ba582ba9 100644 --- a/datasette/templates/table.html +++ b/datasette/templates/table.html @@ -91,6 +91,15 @@

This data as .json

+{% for facet_name, facet_values in facet_results.items() %} +

{{ facet_name }}

+ +{% endfor %} + {% include custom_rows_and_columns_templates %} {% if next_url %} diff --git a/docs/facets.rst b/docs/facets.rst new file mode 100644 index 00000000..40499e55 --- /dev/null +++ b/docs/facets.rst @@ -0,0 +1,70 @@ +.. _facets: + +Facets +====== + +This feature is currently under development, see `#255 `_ + +Datasette facets can be used to add a faceted browse interface to any Datasette table. With facets, tables are displayed along with a summary showing the most common values in specified columns. These values can be selected to further filter the table. + +Facets can be specified in two ways: using queryset parameters, or in ``metadata.json`` configuration for the table. + +Facets in querystrings +---------------------- + +To turn on faceting for specific columns on a Datasette table view, add one or more ``_facet=COLUMN`` parameters to the URL. For example, if you want to turn on facets for the ``city`` and ``state`` columns, construct a URL that looks like this:: + + /dbname/tablename?_facet=state&_facet=city + +This works for both the HTML interface and the ``.json`` view. When enabled, facets will cause a ``facet_results`` block to be added to the JSON output, looking something like this:: + + "facet_results": { + "state": [ + { + "value": "CA", + "count": 10, + "toggle_url": "http://...&state=CA" + }, + { + "value": "MI", + "count": 4, + "toggle_url": "http://...&state=MI" + } + ], + "city": [ + { + "value": "San Francisco", + "count": 6, + "toggle_url": "http://...=San+Francisco" + }, + { + "value": "Detroit", + "count": 4, + "toggle_url": "http://...&city=Detroit" + }, + { + "value": "Los Angeles", + "count": 4, + "toggle_url": "http://...=Los+Angeles" + } + ] + } + +Facets in metadata.json +----------------------- + +You can turn facets on by default for specific tables by adding them to a ``"facets"`` key in a Datasette :ref:`metadata` file. + +Here's an example that turns on faceting by default for the ``qLegalStatus`` column in the ``Street_Tree_List`` table in the ``sf-trees`` database:: + + { + "databases": { + "sf-trees": { + "tables": { + "Street_Tree_List": { + "facets": ["qLegalStatus"] + } + } + } + } + } diff --git a/docs/index.rst b/docs/index.rst index 5216fef3..a3c93a1e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -19,6 +19,7 @@ Contents getting_started json_api sql_queries + facets full_text_search metadata custom_templates diff --git a/docs/metadata.rst b/docs/metadata.rst index 3dd62940..6781ba83 100644 --- a/docs/metadata.rst +++ b/docs/metadata.rst @@ -1,3 +1,5 @@ +.. _metadata: + Metadata ======== diff --git a/tests/fixtures.py b/tests/fixtures.py index 48ca0fe8..2f2c32ff 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -267,6 +267,29 @@ CREATE TABLE [select] ( ); INSERT INTO [select] VALUES ('group', 'having', 'and'); +CREATE TABLE facetable ( + pk integer primary key, + state text, + city text, + neighborhood text +); +INSERT INTO facetable (state, city, neighborhood) VALUES + ('CA', 'San Francisco', 'Mission'), + ('CA', 'San Francisco', 'Dogpatch'), + ('CA', 'San Francisco', 'SOMA'), + ('CA', 'San Francisco', 'Tenderloin'), + ('CA', 'San Francisco', 'Bernal Heights'), + ('CA', 'San Francisco', 'Hayes Valley'), + ('CA', 'Los Angeles', 'Hollywood'), + ('CA', 'Los Angeles', 'Downtown'), + ('CA', 'Los Angeles', 'Los Feliz'), + ('CA', 'Los Angeles', 'Koreatown'), + ('MI', 'Detroit', 'Downtown'), + ('MI', 'Detroit', 'Greektown'), + ('MI', 'Detroit', 'Corktown'), + ('MI', 'Detroit', 'Mexicantown') +; + INSERT INTO simple_primary_key VALUES (1, 'hello'); INSERT INTO simple_primary_key VALUES (2, 'world'); INSERT INTO simple_primary_key VALUES (3, ''); diff --git a/tests/test_api.py b/tests/test_api.py index 30ad13b2..3f91b075 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -19,7 +19,7 @@ def test_homepage(app_client): assert response.json.keys() == {'test_tables': 0}.keys() d = response.json['test_tables'] assert d['name'] == 'test_tables' - assert d['tables_count'] == 15 + assert d['tables_count'] == 16 def test_database_page(app_client): @@ -104,6 +104,15 @@ def test_database_page(app_client): 'label_column': None, 'fts_table': None, 'primary_keys': ['pk'], + }, { + 'columns': ['pk', 'state', 'city', 'neighborhood'], + 'name': 'facetable', + 'count': 14, + 'foreign_keys': {'incoming': [], 'outgoing': []}, + 'fts_table': None, + 'hidden': False, + 'label_column': None, + 'primary_keys': ['pk'], }, { 'columns': ['pk', 'foreign_key_with_label', 'foreign_key_with_no_label'], 'name': 'foreign_key_references', @@ -878,3 +887,67 @@ def test_page_size_matching_max_returned_rows(app_client_returend_rows_matches_p assert len(response.json['rows']) in (1, 50) path = response.json['next_url'] assert 201 == len(fetched) + + +@pytest.mark.parametrize('path,expected_facet_results', [ + ( + "/test_tables/facetable.json?_facet=state&_facet=city", + { + "state": [ + { + "value": "CA", + "count": 10, + "toggle_url": "_facet=state&_facet=city&state=CA", + }, + { + "value": "MI", + "count": 4, + "toggle_url": "_facet=state&_facet=city&state=MI", + }, + ], + "city": [ + { + "value": "San Francisco", + "count": 6, + "toggle_url": "_facet=state&_facet=city&city=San+Francisco", + }, + { + "value": "Detroit", + "count": 4, + "toggle_url": "_facet=state&_facet=city&city=Detroit", + }, + { + "value": "Los Angeles", + "count": 4, + "toggle_url": "_facet=state&_facet=city&city=Los+Angeles", + }, + ], + }, + ), ( + "/test_tables/facetable.json?_facet=state&_facet=city&state=MI", + { + "state": [ + { + "value": "MI", + "count": 4, + "toggle_url": "_facet=state&_facet=city&state=MI", + }, + ], + "city": [ + { + "value": "Detroit", + "count": 4, + "toggle_url": "_facet=state&_facet=city&state=MI&city=Detroit", + }, + ], + }, + ) +]) +def test_facets(app_client, path, expected_facet_results): + response = app_client.get(path, gather_request=False) + facet_results = response.json['facet_results'] + # We only compare the querystring portion of the taggle_url + for facet_name, facet_values in facet_results.items(): + for facet_value in facet_values: + facet_value['toggle_url'] = facet_value['toggle_url'].split('?')[1] + assert expected_facet_results == facet_results