From e7151ccccf4a0d10e1fcf3832e18c5199c16dea0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 1 May 2019 17:39:39 -0700 Subject: [PATCH] Index page no longer uses inspect data - refs #420 Also introduced a mechanism whereby table counts are calculated against a time limit but immutable databases have their table counts calculated on server startup. --- datasette/app.py | 78 +++++++++++++++++++++++++++++++++- datasette/templates/index.html | 2 +- datasette/views/index.py | 58 +++++++++++++++++++------ tests/test_api.py | 18 +++++--- 4 files changed, 133 insertions(+), 23 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 737b5654..bd950b87 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -30,6 +30,7 @@ from .renderer import json_renderer from .utils import ( InterruptedError, Results, + detect_spatialite, escape_css_string, escape_sqlite, get_outbound_foreign_keys, @@ -123,17 +124,38 @@ async def favicon(request): class ConnectedDatabase: - def __init__(self, path=None, is_mutable=False, is_memory=False): + def __init__(self, ds, path=None, is_mutable=False, is_memory=False): + self.ds = ds self.path = path self.is_mutable = is_mutable self.is_memory = is_memory self.hash = None self.size = None + self.cached_table_counts = None if not self.is_mutable: p = Path(path) self.hash = inspect_hash(p) self.size = p.stat().st_size + async def table_counts(self, limit=10): + if not self.is_mutable and self.cached_table_counts is not None: + return self.cached_table_counts + # Try to get counts for each table, $limit timeout for each count + counts = {} + for table in await self.table_names(): + try: + table_count = (await self.ds.execute( + self.name, + "select count(*) from [{}]".format(table), + custom_time_limit=limit, + )).rows[0][0] + counts[table] = table_count + except InterruptedError: + counts[table] = None + if not self.is_mutable: + self.cached_table_counts = counts + return counts + @property def mtime_ns(self): return Path(self.path).stat().st_mtime_ns @@ -145,6 +167,50 @@ class ConnectedDatabase: else: return Path(self.path).stem + async def table_names(self): + results = await self.ds.execute(self.name, "select name from sqlite_master where type='table'") + return [r[0] for r in results.rows] + + async def hidden_table_names(self): + # Mark tables 'hidden' if they relate to FTS virtual tables + hidden_tables = [r[0] for r in ( + await self.ds.execute(self.name, """ + select name from sqlite_master + where rootpage = 0 + and sql like '%VIRTUAL TABLE%USING FTS%' + """) + ).rows] + has_spatialite = await self.ds.execute_against_connection_in_thread( + self.name, detect_spatialite + ) + if has_spatialite: + # Also hide Spatialite internal tables + hidden_tables += [ + "ElementaryGeometries", + "SpatialIndex", + "geometry_columns", + "spatial_ref_sys", + "spatialite_history", + "sql_statements_log", + "sqlite_sequence", + "views_geometry_columns", + "virts_geometry_columns", + ] + [ + r[0] + for r in ( + await self.ds.execute(self.name, """ + select name from sqlite_master + where name like "idx_%" + and type = "table" + """) + ).rows + ] + return hidden_tables + + async def view_names(self): + results = await self.ds.execute(self.name, "select name from sqlite_master where type='view'") + return [r[0] for r in results.rows] + def __repr__(self): tags = [] if self.is_mutable: @@ -195,7 +261,8 @@ class Datasette: if file is MEMORY: path = None is_memory = True - db = ConnectedDatabase(path, is_mutable=path not in self.immutables, is_memory=is_memory) + is_mutable = path not in self.immutables + db = ConnectedDatabase(self, path, is_mutable=is_mutable, is_memory=is_memory) if db.name in self.databases: raise Exception("Multiple files with same stem: {}".format(db.name)) self.databases[db.name] = db @@ -813,4 +880,11 @@ class Datasette: template = self.jinja_env.select_template(templates) return response.html(template.render(info), status=status) + # First time server starts up, calculate table counts for immutable databases + @app.listener("before_server_start") + async def setup_db(app, loop): + for dbname, database in self.databases.items(): + if not database.is_mutable: + await database.table_counts(limit=60*60*1000) + return app diff --git a/datasette/templates/index.html b/datasette/templates/index.html index cb52740a..1133b84d 100644 --- a/datasette/templates/index.html +++ b/datasette/templates/index.html @@ -10,7 +10,7 @@ {% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %} {% for database in databases %} -

{{ database.name }}

+

{{ database.name }}

{{ "{:,}".format(database.table_rows_sum) }} rows in {{ database.tables_count }} table{% if database.tables_count != 1 %}s{% endif %}{% if database.tables_count and database.hidden_tables_count %}, {% endif %} {% if database.hidden_tables_count %} diff --git a/datasette/views/index.py b/datasette/views/index.py index 4eb116f3..6f42d152 100644 --- a/datasette/views/index.py +++ b/datasette/views/index.py @@ -1,8 +1,14 @@ +import hashlib import json from sanic import response -from datasette.utils import CustomJSONEncoder +from datasette.utils import ( + CustomJSONEncoder, + InterruptedError, + detect_primary_keys, + detect_fts, +) from datasette.version import __version__ from .base import HASH_LENGTH, RenderMixin @@ -16,26 +22,51 @@ class IndexView(RenderMixin): async def get(self, request, as_format): databases = [] - for key, info in sorted(self.ds.inspect().items()): - tables = [t for t in info["tables"].values() if not t["hidden"]] - hidden_tables = [t for t in info["tables"].values() if t["hidden"]] - database = { - "name": key, - "hash": info["hash"], - "path": self.database_url(key), + for name, db in self.ds.databases.items(): + table_counts = await db.table_counts(5) + views = await db.view_names() + tables = {} + hidden_table_names = set(await db.hidden_table_names()) + for table in table_counts: + table_columns = await self.ds.table_columns(name, table) + tables[table] = { + "name": table, + "columns": table_columns, + "primary_keys": await self.ds.execute_against_connection_in_thread( + name, lambda conn: detect_primary_keys(conn, table) + ), + "count": table_counts[table], + "hidden": table in hidden_table_names, + "fts_table": await self.ds.execute_against_connection_in_thread( + name, lambda conn: detect_fts(conn, table) + ), + } + # Also mark as hidden any tables which start with the name of a hidden table + # e.g. "searchable_fts" implies "searchable_fts_content" should be hidden + for t in tables.keys(): + for hidden_table in hidden_table_names: + if t == hidden_table or t.startswith(hidden_table): + tables[t]["hidden"] = True + continue + hidden_tables = [t for t in tables.values() if t["hidden"]] + + databases.append({ + "name": name, + "hash": db.hash, + "color": db.hash[:6] if db.hash else hashlib.md5(name.encode("utf8")).hexdigest()[:6], + "path": self.database_url(name), "tables_truncated": sorted( - tables, key=lambda t: t["count"], reverse=True + tables.values(), key=lambda t: t["count"] or 0, reverse=True )[ :5 ], "tables_count": len(tables), "tables_more": len(tables) > 5, - "table_rows_sum": sum(t["count"] for t in tables), + "table_rows_sum": sum((t["count"] or 0) for t in tables.values()), "hidden_table_rows_sum": sum(t["count"] for t in hidden_tables), "hidden_tables_count": len(hidden_tables), - "views_count": len(info["views"]), - } - databases.append(database) + "views_count": len(views), + }) if as_format: headers = {} if self.ds.cors: @@ -45,7 +76,6 @@ class IndexView(RenderMixin): content_type="application/json", headers=headers, ) - else: return self.render( ["index.html"], diff --git a/tests/test_api.py b/tests/test_api.py index 5043b7c2..26819507 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -18,12 +18,17 @@ import urllib def test_homepage(app_client): - response = app_client.get('/.json') + response = app_client.get("/.json") assert response.status == 200 - assert response.json.keys() == {'fixtures': 0}.keys() - d = response.json['fixtures'] - assert d['name'] == 'fixtures' - assert d['tables_count'] == 20 + assert response.json.keys() == {"fixtures": 0}.keys() + d = response.json["fixtures"] + assert d["name"] == "fixtures" + assert d["tables_count"] == 25 + assert len(d["tables_truncated"]) == 5 + assert d["tables_more"] is True + assert d["hidden_table_rows_sum"] == 5 + assert d["hidden_tables_count"] == 4 + assert d["views_count"] == 4 def test_database_page(app_client): @@ -351,7 +356,8 @@ def test_no_files_uses_memory_database(app_client_no_files): assert response.status == 200 assert { ":memory:": { - "hash": "000", + "hash": None, + "color": "f7935d", "hidden_table_rows_sum": 0, "hidden_tables_count": 0, "name": ":memory:",