diff --git a/datasette/app.py b/datasette/app.py
index 37b199a4..c292542f 100644
--- a/datasette/app.py
+++ b/datasette/app.py
@@ -69,6 +69,9 @@ CONFIG_OPTIONS = (
ConfigOption("facet_suggest_time_limit_ms", 50, """
Time limit for calculating a suggested facet
""".strip()),
+ ConfigOption("hash_urls", False, """
+ Include DB file contents hash in URLs, for far-future caching
+ """.strip()),
ConfigOption("allow_facet", True, """
Allow users to specify columns to facet using ?_facet= parameter
""".strip()),
@@ -81,9 +84,12 @@ CONFIG_OPTIONS = (
ConfigOption("allow_sql", True, """
Allow arbitrary SQL queries via ?sql= parameter
""".strip()),
- ConfigOption("default_cache_ttl", 365 * 24 * 60 * 60, """
+ ConfigOption("default_cache_ttl", 5, """
Default HTTP cache TTL (used in Cache-Control: max-age= header)
""".strip()),
+ ConfigOption("default_cache_ttl_hashed", 365 * 24 * 60 * 60, """
+ Default HTTP cache TTL for hashed URL pages
+ """.strip()),
ConfigOption("cache_size_kb", 0, """
SQLite cache size in KB (0 == use SQLite default)
""".strip()),
diff --git a/datasette/templates/database.html b/datasette/templates/database.html
index f827e584..0e80c8b6 100644
--- a/datasette/templates/database.html
+++ b/datasette/templates/database.html
@@ -12,12 +12,12 @@
{% block content %}
-{{ metadata.title or database }}
+{{ metadata.title or database }}
{% block description_source_license %}{% include "_description_source_license.html" %}{% endblock %}
{% if config.allow_sql %}
-
{% if query.sql and config.allow_sql %}
- ✎ View and edit SQL
+ ✎ View and edit SQL
{% endif %}
This data as JSON{% if display_rows %}, CSV (advanced){% endif %}
diff --git a/datasette/utils.py b/datasette/utils.py
index 30fc4231..8bcaefc2 100644
--- a/datasette/utils.py
+++ b/datasette/utils.py
@@ -208,8 +208,14 @@ def path_with_added_args(request, args, path=None):
def path_with_removed_args(request, args, path=None):
+ query_string = request.query_string
+ if path is None:
+ path = request.path
+ else:
+ if "?" in path:
+ bits = path.split("?", 1)
+ path, query_string = bits
# args can be a dict or a set
- path = path or request.path
current = []
if isinstance(args, set):
def should_remove(key, value):
@@ -218,7 +224,7 @@ def path_with_removed_args(request, args, path=None):
# Must match key AND value
def should_remove(key, value):
return args.get(key) == value
- for key, value in urllib.parse.parse_qsl(request.query_string):
+ for key, value in urllib.parse.parse_qsl(query_string):
if not should_remove(key, value):
current.append((key, value))
query_string = urllib.parse.urlencode(current)
diff --git a/datasette/views/base.py b/datasette/views/base.py
index 4db1b654..33b7524e 100644
--- a/datasette/views/base.py
+++ b/datasette/views/base.py
@@ -75,6 +75,17 @@ class RenderMixin(HTTPMethodView):
else:
yield {"url": url}
+ def database_url(self, database):
+ if not self.ds.config("hash_urls"):
+ return "/{}".format(database)
+ else:
+ return "/{}-{}".format(
+ database, self.ds.inspect()[database]["hash"][:HASH_LENGTH]
+ )
+
+ def database_color(self, database):
+ return 'ff0000'
+
def render(self, templates, **context):
template = self.ds.jinja_env.select_template(templates)
select_templates = [
@@ -105,6 +116,8 @@ class RenderMixin(HTTPMethodView):
"extra_js_urls", template, context
),
"format_bytes": format_bytes,
+ "database_url": self.database_url,
+ "database_color": self.database_color,
}
}
)
@@ -131,16 +144,18 @@ class BaseView(RenderMixin):
r.headers["Access-Control-Allow-Origin"] = "*"
return r
- def redirect(self, request, path, forward_querystring=True):
+ def redirect(self, request, path, forward_querystring=True, remove_args=None):
if request.query_string and "?" not in path and forward_querystring:
path = "{}?{}".format(path, request.query_string)
+ if remove_args:
+ path = path_with_removed_args(request, remove_args, path=path)
r = response.redirect(path)
r.headers["Link"] = "<{}>; rel=preload".format(path)
if self.ds.cors:
r.headers["Access-Control-Allow-Origin"] = "*"
return r
- def resolve_db_name(self, db_name, **kwargs):
+ def resolve_db_name(self, request, db_name, **kwargs):
databases = self.ds.inspect()
hash = None
name = None
@@ -161,7 +176,9 @@ class BaseView(RenderMixin):
raise NotFound("Database not found: {}".format(name))
expected = info["hash"][:HASH_LENGTH]
- if expected != hash:
+ correct_hash_provided = (expected == hash)
+
+ if not correct_hash_provided:
if "table_and_format" in kwargs:
table, _format = resolve_table_and_format(
table_and_format=urllib.parse.unquote_plus(
@@ -188,9 +205,11 @@ class BaseView(RenderMixin):
should_redirect += kwargs["as_format"]
if "as_db" in kwargs:
should_redirect += kwargs["as_db"]
- return name, expected, should_redirect
- return name, expected, None
+ if self.ds.config("hash_urls") or "_hash" in request.args:
+ return name, expected, correct_hash_provided, should_redirect
+
+ return name, expected, correct_hash_provided, None
def absolute_url(self, request, path):
url = urllib.parse.urljoin(request.url, path)
@@ -202,11 +221,13 @@ class BaseView(RenderMixin):
assert NotImplemented
async def get(self, request, db_name, **kwargs):
- database, hash, should_redirect = self.resolve_db_name(db_name, **kwargs)
+ database, hash, correct_hash_provided, should_redirect = self.resolve_db_name(
+ request, db_name, **kwargs
+ )
if should_redirect:
- return self.redirect(request, should_redirect)
+ return self.redirect(request, should_redirect, remove_args={"_hash"})
- return await self.view_get(request, database, hash, **kwargs)
+ return await self.view_get(request, database, hash, correct_hash_provided, **kwargs)
async def as_csv(self, request, database, hash, **kwargs):
stream = request.args.get("_stream")
@@ -301,7 +322,7 @@ class BaseView(RenderMixin):
content_type=content_type
)
- async def view_get(self, request, database, hash, **kwargs):
+ async def view_get(self, request, database, hash, correct_hash_provided, **kwargs):
# If ?_format= is provided, use that as the format
_format = request.args.get("_format", None)
if not _format:
@@ -418,7 +439,6 @@ class BaseView(RenderMixin):
"ok": False,
"error": error,
"database": database,
- "database_hash": hash,
}
elif shape == "array":
data = data["rows"]
@@ -489,10 +509,13 @@ class BaseView(RenderMixin):
r = self.render(templates, **context)
r.status = status_code
# Set far-future cache expiry
- if self.ds.cache_headers:
+ if self.ds.cache_headers and r.status == 200:
ttl = request.args.get("_ttl", None)
if ttl is None or not ttl.isdigit():
- ttl = self.ds.config("default_cache_ttl")
+ if correct_hash_provided:
+ ttl = self.ds.config("default_cache_ttl_hashed")
+ else:
+ ttl = self.ds.config("default_cache_ttl")
else:
ttl = int(ttl)
if ttl == 0:
@@ -572,7 +595,6 @@ class BaseView(RenderMixin):
display_rows.append(display_row)
return {
"display_rows": display_rows,
- "database_hash": hash,
"custom_sql": True,
"named_parameter_values": named_parameter_values,
"editable": editable,
diff --git a/datasette/views/database.py b/datasette/views/database.py
index 9c44a800..0cbb432b 100644
--- a/datasette/views/database.py
+++ b/datasette/views/database.py
@@ -30,7 +30,6 @@ class DatabaseView(BaseView):
"views": info["views"],
"queries": self.ds.get_canned_queries(database),
}, {
- "database_hash": hash,
"show_hidden": request.args.get("_show_hidden"),
"editable": True,
"metadata": metadata,
@@ -41,7 +40,7 @@ class DatabaseView(BaseView):
class DatabaseDownload(BaseView):
- async def view_get(self, request, database, hash, **kwargs):
+ async def view_get(self, request, database, hash, correct_hash_present, **kwargs):
if not self.ds.config("allow_download"):
raise DatasetteError("Database download is forbidden", status=403)
filepath = self.ds.inspect()[database]["file"]
diff --git a/datasette/views/index.py b/datasette/views/index.py
index 32c04585..70f7e943 100644
--- a/datasette/views/index.py
+++ b/datasette/views/index.py
@@ -21,7 +21,7 @@ class IndexView(RenderMixin):
database = {
"name": key,
"hash": info["hash"],
- "path": "{}-{}".format(key, info["hash"][:HASH_LENGTH]),
+ "path": self.database_url(key),
"tables_truncated": sorted(
tables, key=lambda t: t["count"], reverse=True
)[
diff --git a/datasette/views/table.py b/datasette/views/table.py
index cb744708..14f3be6f 100644
--- a/datasette/views/table.py
+++ b/datasette/views/table.py
@@ -750,7 +750,6 @@ class TableView(RowTableShared):
)
self.ds.update_with_inherited_metadata(metadata)
return {
- "database_hash": hash,
"supports_search": bool(fts_table),
"search": search or "",
"use_rowid": use_rowid,
@@ -851,7 +850,6 @@ class RowView(RowTableShared):
for column in display_columns:
column["sortable"] = False
return {
- "database_hash": hash,
"foreign_key_tables": await self.foreign_key_tables(
database, table, pk_values
),
diff --git a/docs/config.rst b/docs/config.rst
index b934ef2a..dac4e044 100644
--- a/docs/config.rst
+++ b/docs/config.rst
@@ -115,11 +115,21 @@ Enable/disable the ability for users to run custom SQL directly against a databa
default_cache_ttl
-----------------
-Default HTTP caching max-age header in seconds, used for ``Cache-Control: max-age=X``. Can be over-ridden on a per-request basis using the ``?_ttl=`` querystring parameter. Set this to ``0`` to disable HTTP caching entirely. Defaults to 365 days (31536000 seconds).
+Default HTTP caching max-age header in seconds, used for ``Cache-Control: max-age=X``. Can be over-ridden on a per-request basis using the ``?_ttl=`` querystring parameter. Set this to ``0`` to disable HTTP caching entirely. Defaults to 5 seconds.
::
- datasette mydatabase.db --config default_cache_ttl:10
+ datasette mydatabase.db --config default_cache_ttl:60
+
+default_cache_ttl_hashed
+------------------------
+
+Default HTTP caching max-age for responses served using using the :ref:`hashed-urls mechanism `. Defaults to 365 days (31536000 seconds).
+
+::
+
+ datasette mydatabase.db --config default_cache_ttl_hashed:10000
+
cache_size_kb
-------------
@@ -179,3 +189,19 @@ HTTP but is served to the outside world via a proxy that enables HTTPS.
::
datasette mydatabase.db --config force_https_urls:1
+
+.. _config_hash_urls:
+
+hash_urls
+---------
+
+When enabled, this setting causes Datasette to append a content hash of the
+database file to the URL path for every table and query within that database.
+
+When combined with far-future expire headers this ensures that queries can be
+cached forever, safe in the knowledge that any modifications to the database
+itself will result in new, uncachcacheed URL paths.
+
+::
+
+ datasette mydatabase.db --config hash_urls:1
diff --git a/tests/fixtures.py b/tests/fixtures.py
index efd85fab..81432e30 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -73,6 +73,13 @@ def app_client_no_files():
yield client
+@pytest.fixture(scope="session")
+def app_client_with_hash():
+ yield from make_app_client(config={
+ 'hash_urls': True
+ })
+
+
@pytest.fixture(scope='session')
def app_client_shorter_time_limit():
yield from make_app_client(20)
diff --git a/tests/test_api.py b/tests/test_api.py
index a6ba3f37..b92b9ffb 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -1,6 +1,7 @@
from .fixtures import ( # noqa
app_client,
app_client_no_files,
+ app_client_with_hash,
app_client_shorter_time_limit,
app_client_larger_cache_size,
app_client_returned_rows_matches_page_size,
@@ -378,7 +379,7 @@ def test_no_files_uses_memory_database(app_client_no_files):
"hidden_table_rows_sum": 0,
"hidden_tables_count": 0,
"name": ":memory:",
- "path": ":memory:-000",
+ "path": "/:memory:",
"table_rows_sum": 0,
"tables_count": 0,
"tables_more": False,
@@ -388,7 +389,7 @@ def test_no_files_uses_memory_database(app_client_no_files):
} == response.json
# Try that SQL query
response = app_client_no_files.get(
- "/:memory:-0.json?sql=select+sqlite_version()&_shape=array"
+ "/:memory:.json?sql=select+sqlite_version()&_shape=array"
)
assert 1 == len(response.json)
assert ["sqlite_version()"] == list(response.json[0].keys())
@@ -501,12 +502,12 @@ def test_table_not_exists_json(app_client):
} == app_client.get('/fixtures/blah.json').json
-def test_jsono_redirects_to_shape_objects(app_client):
- response_1 = app_client.get(
+def test_jsono_redirects_to_shape_objects(app_client_with_hash):
+ response_1 = app_client_with_hash.get(
'/fixtures/simple_primary_key.jsono',
allow_redirects=False
)
- response = app_client.get(
+ response = app_client_with_hash.get(
response_1.headers['Location'],
allow_redirects=False
)
@@ -1049,13 +1050,15 @@ def test_config_json(app_client):
"allow_facet": True,
"suggest_facets": True,
"allow_sql": True,
- "default_cache_ttl": 365 * 24 * 60 * 60,
+ "default_cache_ttl": 5,
+ "default_cache_ttl_hashed": 365 * 24 * 60 * 60,
"num_sql_threads": 3,
"cache_size_kb": 0,
"allow_csv_stream": True,
"max_csv_mb": 100,
"truncate_cells_html": 2048,
"force_https_urls": False,
+ "hash_urls": False,
} == response.json
@@ -1300,8 +1303,8 @@ def test_expand_label(app_client):
@pytest.mark.parametrize('path,expected_cache_control', [
- ("/fixtures/facetable.json", "max-age=31536000"),
- ("/fixtures/facetable.json?_ttl=invalid", "max-age=31536000"),
+ ("/fixtures/facetable.json", "max-age=5"),
+ ("/fixtures/facetable.json?_ttl=invalid", "max-age=5"),
("/fixtures/facetable.json?_ttl=10", "max-age=10"),
("/fixtures/facetable.json?_ttl=0", "no-cache"),
])
@@ -1310,6 +1313,19 @@ def test_ttl_parameter(app_client, path, expected_cache_control):
assert expected_cache_control == response.headers['Cache-Control']
+@pytest.mark.parametrize("path,expected_redirect", [
+ ("/fixtures/facetable.json?_hash=1", "/fixtures-HASH/facetable.json"),
+ ("/fixtures/facetable.json?city_id=1&_hash=1", "/fixtures-HASH/facetable.json?city_id=1"),
+])
+def test_hash_parameter(app_client, path, expected_redirect):
+ # First get the current hash for the fixtures database
+ current_hash = app_client.get("/-/inspect.json").json["fixtures"]["hash"][:7]
+ response = app_client.get(path, allow_redirects=False)
+ assert response.status == 302
+ location = response.headers["Location"]
+ assert expected_redirect.replace("HASH", current_hash) == location
+
+
test_json_columns_default_expected = [{
"intval": 1,
"strval": "s",
diff --git a/tests/test_html.py b/tests/test_html.py
index 36335201..18e6bb04 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -2,6 +2,7 @@ from bs4 import BeautifulSoup as Soup
from .fixtures import ( # noqa
app_client,
app_client_shorter_time_limit,
+ app_client_with_hash,
make_app_client,
)
import pytest
@@ -15,10 +16,10 @@ def test_homepage(app_client):
assert 'fixtures' in response.text
-def test_database_page(app_client):
- response = app_client.get('/fixtures', allow_redirects=False)
+def test_database_page_redirects_with_url_hash(app_client_with_hash):
+ response = app_client_with_hash.get('/fixtures', allow_redirects=False)
assert response.status == 302
- response = app_client.get('/fixtures')
+ response = app_client_with_hash.get('/fixtures')
assert 'fixtures' in response.text
@@ -41,19 +42,19 @@ def test_sql_time_limit(app_client_shorter_time_limit):
assert expected_html_fragment in response.text
-def test_row(app_client):
- response = app_client.get(
+def test_row_redirects_with_url_hash(app_client_with_hash):
+ response = app_client_with_hash.get(
'/fixtures/simple_primary_key/1',
allow_redirects=False
)
assert response.status == 302
assert response.headers['Location'].endswith('/1')
- response = app_client.get('/fixtures/simple_primary_key/1')
+ response = app_client_with_hash.get('/fixtures/simple_primary_key/1')
assert response.status == 200
-def test_row_strange_table_name(app_client):
- response = app_client.get(
+def test_row_strange_table_name_with_url_hash(app_client_with_hash):
+ response = app_client_with_hash.get(
'/fixtures/table%2Fwith%2Fslashes.csv/3',
allow_redirects=False
)
@@ -61,7 +62,7 @@ def test_row_strange_table_name(app_client):
assert response.headers['Location'].endswith(
'/table%2Fwith%2Fslashes.csv/3'
)
- response = app_client.get('/fixtures/table%2Fwith%2Fslashes.csv/3')
+ response = app_client_with_hash.get('/fixtures/table%2Fwith%2Fslashes.csv/3')
assert response.status == 200
@@ -105,10 +106,7 @@ def test_add_filter_redirects(app_client):
'_filter_op': 'startswith',
'_filter_value': 'x'
})
- # First we need to resolve the correct path before testing more redirects
- path_base = app_client.get(
- '/fixtures/simple_primary_key', allow_redirects=False
- ).headers['Location']
+ path_base = '/fixtures/simple_primary_key'
path = path_base + '?' + filter_args
response = app_client.get(path, allow_redirects=False)
assert response.status == 302
@@ -146,9 +144,7 @@ def test_existing_filter_redirects(app_client):
'_filter_op_4': 'contains',
'_filter_value_4': 'world',
}
- path_base = app_client.get(
- '/fixtures/simple_primary_key', allow_redirects=False
- ).headers['Location']
+ path_base = '/fixtures/simple_primary_key'
path = path_base + '?' + urllib.parse.urlencode(filter_args)
response = app_client.get(path, allow_redirects=False)
assert response.status == 302
@@ -174,9 +170,7 @@ def test_existing_filter_redirects(app_client):
def test_empty_search_parameter_gets_removed(app_client):
- path_base = app_client.get(
- '/fixtures/simple_primary_key', allow_redirects=False
- ).headers['Location']
+ path_base = '/fixtures/simple_primary_key'
path = path_base + '?' + urllib.parse.urlencode({
'_search': '',
'_filter_column': 'name',
@@ -191,9 +185,7 @@ def test_empty_search_parameter_gets_removed(app_client):
def test_sort_by_desc_redirects(app_client):
- path_base = app_client.get(
- '/fixtures/sortable', allow_redirects=False
- ).headers['Location']
+ path_base = '/fixtures/sortable'
path = path_base + '?' + urllib.parse.urlencode({
'_sort': 'sortable',
'_sort_by_desc': '1',
diff --git a/tests/test_utils.py b/tests/test_utils.py
index b406d70b..1f0079c9 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -59,6 +59,13 @@ def test_path_with_removed_args(path, args, expected):
)
actual = utils.path_with_removed_args(request, args)
assert expected == actual
+ # Run the test again but this time use the path= argument
+ request = Request(
+ "/".encode('utf8'),
+ {}, '1.1', 'GET', None
+ )
+ actual = utils.path_with_removed_args(request, args, path=path)
+ assert expected == actual
@pytest.mark.parametrize('path,args,expected', [