From 5a1519796037105bc20bcf2f91a76e022926c204 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 24 Oct 2020 16:09:18 -0700 Subject: [PATCH] /db/table/-/blob/pk/column.blob download URL, refs #1036 --- datasette/app.py | 6 ++- datasette/views/base.py | 6 +-- datasette/views/index.py | 3 -- datasette/views/special.py | 18 -------- datasette/views/table.py | 95 ++++++++++++++++++++++++++++++-------- docs/internals.rst | 10 +++- docs/pages.rst | 11 +++++ tests/test_html.py | 40 ++++++++++++++++ tests/test_permissions.py | 62 ++++++++++++++++--------- 9 files changed, 184 insertions(+), 67 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 37b3f3db..da62934e 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -38,7 +38,7 @@ from .views.special import ( PermissionsDebugView, MessagesDebugView, ) -from .views.table import RowView, TableView +from .views.table import RowView, TableView, BlobView from .renderer import json_renderer from .database import Database, QueryInterrupted @@ -923,6 +923,10 @@ class Datasette: + renderer_regex + r")?$", ) + add_route( + BlobView.as_view(self), + r"/(?P[^/]+)/(?P[^/]+?)/\-/blob/(?P[^/]+?)/(?P[^/]+)\.blob$", + ) self._register_custom_units() async def setup_db(): diff --git a/datasette/views/base.py b/datasette/views/base.py index 06968e03..f9bbe45d 100644 --- a/datasette/views/base.py +++ b/datasette/views/base.py @@ -51,6 +51,9 @@ class DatasetteError(Exception): class BaseView: ds = None + def __init__(self, datasette): + self.ds = datasette + async def head(self, *args, **kwargs): response = await self.get(*args, **kwargs) response.body = b"" @@ -151,9 +154,6 @@ class DataView(BaseView): name = "" re_named_parameter = re.compile(":([a-zA-Z0-9_]+)") - def __init__(self, datasette): - self.ds = datasette - def options(self, request, *args, **kwargs): r = Response.text("ok") if self.ds.cors: diff --git a/datasette/views/index.py b/datasette/views/index.py index 92f60855..b6b8cbe5 100644 --- a/datasette/views/index.py +++ b/datasette/views/index.py @@ -18,9 +18,6 @@ COUNT_DB_SIZE_LIMIT = 100 * 1024 * 1024 class IndexView(BaseView): name = "index" - def __init__(self, datasette): - self.ds = datasette - async def get(self, request, as_format): await self.check_permission(request, "view-instance") databases = [] diff --git a/datasette/views/special.py b/datasette/views/special.py index 28af1e99..a9fc59b7 100644 --- a/datasette/views/special.py +++ b/datasette/views/special.py @@ -44,9 +44,6 @@ class JsonDataView(BaseView): class PatternPortfolioView(BaseView): name = "patterns" - def __init__(self, datasette): - self.ds = datasette - async def get(self, request): await self.check_permission(request, "view-instance") return await self.render(["patterns.html"], request=request) @@ -55,9 +52,6 @@ class PatternPortfolioView(BaseView): class AuthTokenView(BaseView): name = "auth_token" - def __init__(self, datasette): - self.ds = datasette - async def get(self, request): token = request.args.get("token") or "" if not self.ds._root_token: @@ -76,9 +70,6 @@ class AuthTokenView(BaseView): class LogoutView(BaseView): name = "logout" - def __init__(self, datasette): - self.ds = datasette - async def get(self, request): if not request.actor: return Response.redirect(self.ds.urls.instance()) @@ -98,9 +89,6 @@ class LogoutView(BaseView): class PermissionsDebugView(BaseView): name = "permissions_debug" - def __init__(self, datasette): - self.ds = datasette - async def get(self, request): await self.check_permission(request, "view-instance") if not await self.ds.permission_allowed(request.actor, "permissions-debug"): @@ -115,9 +103,6 @@ class PermissionsDebugView(BaseView): class AllowDebugView(BaseView): name = "allow_debug" - def __init__(self, datasette): - self.ds = datasette - async def get(self, request): errors = [] actor_input = request.args.get("actor") or '{"id": "root"}' @@ -152,9 +137,6 @@ class AllowDebugView(BaseView): class MessagesDebugView(BaseView): name = "messages_debug" - def __init__(self, datasette): - self.ds = datasette - async def get(self, request): await self.check_permission(request, "view-instance") return await self.render(["messages_debug.html"], request) diff --git a/datasette/views/table.py b/datasette/views/table.py index ea11a51d..b8c9ba55 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -23,9 +23,9 @@ from datasette.utils import ( urlsafe_components, value_as_boolean, ) -from datasette.utils.asgi import NotFound +from datasette.utils.asgi import NotFound, Response from datasette.filters import Filters -from .base import DataView, DatasetteError, ureg +from .base import BaseView, DataView, DatasetteError, ureg from .database import QueryView LINK_WITH_LABEL = ( @@ -903,28 +903,38 @@ class TableView(RowTableShared): ) +async def _sql_params_pks(db, table, pk_values): + pks = await db.primary_keys(table) + use_rowid = not pks + select = "*" + if use_rowid: + select = "rowid, *" + pks = ["rowid"] + wheres = ['"{}"=:p{}'.format(pk, i) for i, pk in enumerate(pks)] + sql = "select {} from {} where {}".format( + select, escape_sqlite(table), " AND ".join(wheres) + ) + params = {} + for i, pk_value in enumerate(pk_values): + params["p{}".format(i)] = pk_value + return sql, params, pks + + class RowView(RowTableShared): name = "row" async def data(self, request, database, hash, table, pk_path, default_labels=False): - pk_values = urlsafe_components(pk_path) - await self.check_permission(request, "view-instance") - await self.check_permission(request, "view-database", database) - await self.check_permission(request, "view-table", (database, table)) - db = self.ds.databases[database] - pks = await db.primary_keys(table) - use_rowid = not pks - select = "*" - if use_rowid: - select = "rowid, *" - pks = ["rowid"] - wheres = ['"{}"=:p{}'.format(pk, i) for i, pk in enumerate(pks)] - sql = "select {} from {} where {}".format( - select, escape_sqlite(table), " AND ".join(wheres) + await self.check_permissions( + request, + [ + ("view-table", (database, table)), + ("view-database", database), + "view-instance", + ], ) - params = {} - for i, pk_value in enumerate(pk_values): - params["p{}".format(i)] = pk_value + pk_values = urlsafe_components(pk_path) + db = self.ds.databases[database] + sql, params, pks = await _sql_params_pks(db, table, pk_values) results = await db.execute(sql, params, truncate=True) columns = [r[0] for r in results.description] rows = list(results.rows) @@ -1024,3 +1034,50 @@ class RowView(RowTableShared): ) foreign_key_tables.append({**fk, **{"count": count}}) return foreign_key_tables + + +class BlobView(BaseView): + async def get(self, request, db_name, table, pk_path, column): + await self.check_permissions( + request, + [ + ("view-table", (db_name, table)), + ("view-database", db_name), + "view-instance", + ], + ) + try: + db = self.ds.get_database(db_name) + except KeyError: + raise NotFound("Database {} does not exist".format(db_name)) + if not await db.table_exists(table): + raise NotFound("Table {} does not exist".format(table)) + # Ensure the column exists and is of type BLOB + column_types = {c.name: c.type for c in await db.table_column_details(table)} + if column not in column_types: + raise NotFound("Table {} does not have column {}".format(table, column)) + if column_types[column].upper() not in ("BLOB", ""): + raise NotFound( + "Table {} does not have column {} of type BLOB".format(table, column) + ) + # Ensure the row exists for the pk_path + pk_values = urlsafe_components(pk_path) + sql, params, _ = await _sql_params_pks(db, table, pk_values) + results = await db.execute(sql, params, truncate=True) + rows = list(results.rows) + if not rows: + raise NotFound("Record not found: {}".format(pk_values)) + + # Serve back the binary data + filename_bits = [to_css_class(table), pk_path, to_css_class(column)] + filename = "-".join(filename_bits) + ".blob" + headers = { + "X-Content-Type-Options": "nosniff", + "Content-Disposition": 'attachment; filename="{}"'.format(filename), + } + return Response( + body=rows[0][column], + status=200, + headers=headers, + content_type="application/binary", + ) diff --git a/docs/internals.rst b/docs/internals.rst index 439fe5b3..dbb9478c 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -385,7 +385,15 @@ The ``datasette.urls`` object contains methods for building URLs to pages within For example, ``datasette.urls.path("-/logout")`` will return the path to the logout page, which will be ``"/-/logout"`` by default or ``/prefix-path/-/logout`` if ``base_url`` is set to ``/prefix-path/`` ``datasette.urls.logout()`` - Returns the URL to the logout page, usually ``"/-/logout"``. + Returns the URL to the logout page, usually ``"/-/logout"`` + +``datasette.urls.static(path)`` + Returns the URL of one of Datasette's default static assets, for example ``"/-/static/app.css"`` + +``datasette.urls.static_plugins(plugin_name, path)`` + Returns the URL of one of the static assets belonging to a plugin. + + ``datasette.url.static_plugins("datasette_cluster_map", "datasette-cluster-map.js")`` would return ``"/-/static-plugins/datasette_cluster_map/datasette-cluster-map.js"`` ``datasette.urls.static(path)`` Returns the URL of one of Datasette's default static assets, for example ``"/-/static/app.css"``. diff --git a/docs/pages.rst b/docs/pages.rst index db970ead..3ad58565 100644 --- a/docs/pages.rst +++ b/docs/pages.rst @@ -77,3 +77,14 @@ Note that this URL includes the encoded primary key of the record. Here's that same page as JSON: `../people/uk.org.publicwhip%2Fperson%2F10001.json `_ + +.. _BlobView: + +Blob +==== + +SQLite databases can contain binary data, stored in a ``BLOB`` column. Datasette makes the content of these columns available to download directly, at URLs that look like the following:: + + /database-name/table-name/-/blob/row-identifier/column-name.blob + +Binary content is also made available as a base64 encoded string in the ``.json`` representation of the row. diff --git a/tests/test_html.py b/tests/test_html.py index 8708967e..29c19844 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -1244,6 +1244,46 @@ def test_binary_data_display(app_client): ] +def test_blob_download(app_client): + response = app_client.get("/fixtures/binary_data/-/blob/1/data.blob") + assert response.status == 200 + assert response.body == b"\x15\x1c\x02\xc7\xad\x05\xfe" + assert response.headers["x-content-type-options"] == "nosniff" + assert ( + response.headers["content-disposition"] + == 'attachment; filename="binary_data-1-data.blob"' + ) + assert response.headers["content-type"] == "application/binary" + + +@pytest.mark.parametrize( + "path,expected_message", + [ + ("/baddb/binary_data/-/blob/1/data.blob", "Database baddb does not exist"), + ( + "/fixtures/binary_data_bad/-/blob/1/data.blob", + "Table binary_data_bad does not exist", + ), + ( + "/fixtures/binary_data/-/blob/1/bad.blob", + "Table binary_data does not have column bad", + ), + ( + "/fixtures/facetable/-/blob/1/state.blob", + "Table facetable does not have column state of type BLOB", + ), + ( + "/fixtures/binary_data/-/blob/101/data.blob", + "Record not found: ['101']", + ), + ], +) +def test_blob_download_not_found_messages(app_client, path, expected_message): + response = app_client.get(path) + assert response.status == 404 + assert expected_message in response.text + + def test_metadata_json_html(app_client): response = app_client.get("/-/metadata") assert response.status == 200 diff --git a/tests/test_permissions.py b/tests/test_permissions.py index 3ef2394a..3c11985c 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -399,31 +399,49 @@ def cascade_app_client(): @pytest.mark.parametrize( - "path,expected_status,permissions", + "path,permissions,expected_status", [ - ("/", 403, []), - ("/", 200, ["instance"]), + ("/", [], 403), + ("/", ["instance"], 200), # Can view table even if not allowed database or instance - ("/fixtures/facet_cities", 403, []), - ("/fixtures/facet_cities", 403, ["database"]), - ("/fixtures/facet_cities", 403, ["instance"]), - ("/fixtures/facet_cities", 200, ["table"]), - ("/fixtures/facet_cities", 200, ["table", "database"]), - ("/fixtures/facet_cities", 200, ["table", "database", "instance"]), + ("/fixtures/binary_data", [], 403), + ("/fixtures/binary_data", ["database"], 403), + ("/fixtures/binary_data", ["instance"], 403), + ("/fixtures/binary_data", ["table"], 200), + ("/fixtures/binary_data", ["table", "database"], 200), + ("/fixtures/binary_data", ["table", "database", "instance"], 200), + # ... same for row + ("/fixtures/binary_data/1", [], 403), + ("/fixtures/binary_data/1", ["database"], 403), + ("/fixtures/binary_data/1", ["instance"], 403), + ("/fixtures/binary_data/1", ["table"], 200), + ("/fixtures/binary_data/1", ["table", "database"], 200), + ("/fixtures/binary_data/1", ["table", "database", "instance"], 200), + # ... and for binary blob + ("/fixtures/binary_data/-/blob/1/data.blob", [], 403), + ("/fixtures/binary_data/-/blob/1/data.blob", ["database"], 403), + ("/fixtures/binary_data/-/blob/1/data.blob", ["instance"], 403), + ("/fixtures/binary_data/-/blob/1/data.blob", ["table"], 200), + ("/fixtures/binary_data/-/blob/1/data.blob", ["table", "database"], 200), + ( + "/fixtures/binary_data/-/blob/1/data.blob", + ["table", "database", "instance"], + 200, + ), # Can view query even if not allowed database or instance - ("/fixtures/magic_parameters", 403, []), - ("/fixtures/magic_parameters", 403, ["database"]), - ("/fixtures/magic_parameters", 403, ["instance"]), - ("/fixtures/magic_parameters", 200, ["query"]), - ("/fixtures/magic_parameters", 200, ["query", "database"]), - ("/fixtures/magic_parameters", 200, ["query", "database", "instance"]), + ("/fixtures/magic_parameters", [], 403), + ("/fixtures/magic_parameters", ["database"], 403), + ("/fixtures/magic_parameters", ["instance"], 403), + ("/fixtures/magic_parameters", ["query"], 200), + ("/fixtures/magic_parameters", ["query", "database"], 200), + ("/fixtures/magic_parameters", ["query", "database", "instance"], 200), # Can view database even if not allowed instance - ("/fixtures", 403, []), - ("/fixtures", 403, ["instance"]), - ("/fixtures", 200, ["database"]), + ("/fixtures", [], 403), + ("/fixtures", ["instance"], 403), + ("/fixtures", ["database"], 200), ], ) -def test_permissions_cascade(cascade_app_client, path, expected_status, permissions): +def test_permissions_cascade(cascade_app_client, path, permissions, expected_status): "Test that e.g. having view-table but NOT view-database lets you view table page, etc" allow = {"id": "*"} deny = {} @@ -435,9 +453,9 @@ def test_permissions_cascade(cascade_app_client, path, expected_status, permissi updated_metadata["databases"]["fixtures"]["allow"] = ( allow if "database" in permissions else deny ) - updated_metadata["databases"]["fixtures"]["tables"]["facet_cities"]["allow"] = ( - allow if "table" in permissions else deny - ) + updated_metadata["databases"]["fixtures"]["tables"]["binary_data"] = { + "allow": (allow if "table" in permissions else deny) + } updated_metadata["databases"]["fixtures"]["queries"]["magic_parameters"][ "allow" ] = (allow if "query" in permissions else deny)