Closes #1657

Refs #1439
pull/1665/head
Simon Willison 2022-03-15 11:01:57 -07:00 zatwierdzone przez GitHub
rodzic c10cd48baf
commit a35393b29c
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
14 zmienionych plików z 125 dodań i 141 usunięć

Wyświetl plik

@ -1211,11 +1211,14 @@ class DatasetteRouter:
return await self.handle_404(request, send) return await self.handle_404(request, send)
async def handle_404(self, request, send, exception=None): async def handle_404(self, request, send, exception=None):
# If path contains % encoding, redirect to dash encoding # If path contains % encoding, redirect to tilde encoding
if "%" in request.path: if "%" in request.path:
# Try the same path but with "%" replaced by "-" # Try the same path but with "%" replaced by "~"
# and "-" replaced with "-2D" # and "~" replaced with "~7E"
new_path = request.path.replace("-", "-2D").replace("%", "-") # and "." replaced with "~2E"
new_path = (
request.path.replace("~", "~7E").replace("%", "~").replace(".", "~2E")
)
if request.query_string: if request.query_string:
new_path += "?{}".format(request.query_string) new_path += "?{}".format(request.query_string)
await asgi_send_redirect(send, new_path) await asgi_send_redirect(send, new_path)

Wyświetl plik

@ -1,4 +1,4 @@
from .utils import dash_encode, path_with_format, HASH_LENGTH, PrefixedUrlString from .utils import tilde_encode, path_with_format, HASH_LENGTH, PrefixedUrlString
import urllib import urllib
@ -31,20 +31,20 @@ class Urls:
db = self.ds.databases[database] db = self.ds.databases[database]
if self.ds.setting("hash_urls") and db.hash: if self.ds.setting("hash_urls") and db.hash:
path = self.path( path = self.path(
f"{dash_encode(database)}-{db.hash[:HASH_LENGTH]}", format=format f"{tilde_encode(database)}-{db.hash[:HASH_LENGTH]}", format=format
) )
else: else:
path = self.path(dash_encode(database), format=format) path = self.path(tilde_encode(database), format=format)
return path return path
def table(self, database, table, format=None): def table(self, database, table, format=None):
path = f"{self.database(database)}/{dash_encode(table)}" path = f"{self.database(database)}/{tilde_encode(table)}"
if format is not None: if format is not None:
path = path_with_format(path=path, format=format) path = path_with_format(path=path, format=format)
return PrefixedUrlString(path) return PrefixedUrlString(path)
def query(self, database, query, format=None): def query(self, database, query, format=None):
path = f"{self.database(database)}/{dash_encode(query)}" path = f"{self.database(database)}/{tilde_encode(query)}"
if format is not None: if format is not None:
path = path_with_format(path=path, format=format) path = path_with_format(path=path, format=format)
return PrefixedUrlString(path) return PrefixedUrlString(path)

Wyświetl plik

@ -15,6 +15,7 @@ import tempfile
import typing import typing
import time import time
import types import types
import secrets
import shutil import shutil
import urllib import urllib
import yaml import yaml
@ -112,12 +113,12 @@ async def await_me_maybe(value: typing.Any) -> typing.Any:
def urlsafe_components(token): def urlsafe_components(token):
"""Splits token on commas and dash-decodes each component""" """Splits token on commas and tilde-decodes each component"""
return [dash_decode(b) for b in token.split(",")] return [tilde_decode(b) for b in token.split(",")]
def path_from_row_pks(row, pks, use_rowid, quote=True): def path_from_row_pks(row, pks, use_rowid, quote=True):
"""Generate an optionally dash-quoted unique identifier """Generate an optionally tilde-encoded unique identifier
for a row from its primary keys.""" for a row from its primary keys."""
if use_rowid: if use_rowid:
bits = [row["rowid"]] bits = [row["rowid"]]
@ -126,7 +127,7 @@ def path_from_row_pks(row, pks, use_rowid, quote=True):
row[pk]["value"] if isinstance(row[pk], dict) else row[pk] for pk in pks row[pk]["value"] if isinstance(row[pk], dict) else row[pk] for pk in pks
] ]
if quote: if quote:
bits = [dash_encode(str(bit)) for bit in bits] bits = [tilde_encode(str(bit)) for bit in bits]
else: else:
bits = [str(bit) for bit in bits] bits = [str(bit) for bit in bits]
@ -1142,34 +1143,38 @@ def add_cors_headers(headers):
headers["Access-Control-Expose-Headers"] = "Link" headers["Access-Control-Expose-Headers"] = "Link"
_DASH_ENCODING_SAFE = frozenset( _TILDE_ENCODING_SAFE = frozenset(
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
b"abcdefghijklmnopqrstuvwxyz" b"abcdefghijklmnopqrstuvwxyz"
b"0123456789_" b"0123456789_-"
# This is the same as Python percent-encoding but I removed # This is the same as Python percent-encoding but I removed
# '.' and '-' and '~' # '.' and '~'
) )
class DashEncoder(dict): class TildeEncoder(dict):
# Keeps a cache internally, via __missing__ # Keeps a cache internally, via __missing__
def __missing__(self, b): def __missing__(self, b):
# Handle a cache miss, store encoded string in cache and return. # Handle a cache miss, store encoded string in cache and return.
res = chr(b) if b in _DASH_ENCODING_SAFE else "-{:02X}".format(b) res = chr(b) if b in _TILDE_ENCODING_SAFE else "~{:02X}".format(b)
self[b] = res self[b] = res
return res return res
_dash_encoder = DashEncoder().__getitem__ _tilde_encoder = TildeEncoder().__getitem__
@documented @documented
def dash_encode(s: str) -> str: def tilde_encode(s: str) -> str:
"Returns dash-encoded string - for example ``/foo/bar`` -> ``-2Ffoo-2Fbar``" "Returns tilde-encoded string - for example ``/foo/bar`` -> ``~2Ffoo~2Fbar``"
return "".join(_dash_encoder(char) for char in s.encode("utf-8")) return "".join(_tilde_encoder(char) for char in s.encode("utf-8"))
@documented @documented
def dash_decode(s: str) -> str: def tilde_decode(s: str) -> str:
"Decodes a dash-encoded string, so ``-2Ffoo-2Fbar`` -> ``/foo/bar``" "Decodes a tilde-encoded string, so ``~2Ffoo~2Fbar`` -> ``/foo/bar``"
return urllib.parse.unquote(s.replace("-", "%")) # Avoid accidentally decoding a %2f style sequence
temp = secrets.token_hex(16)
s = s.replace("%", temp)
decoded = urllib.parse.unquote(s.replace("~", "%"))
return decoded.replace(temp, "%")

Wyświetl plik

@ -10,6 +10,7 @@ import pint
from datasette import __version__ from datasette import __version__
from datasette.database import QueryInterrupted from datasette.database import QueryInterrupted
from datasette.utils.asgi import Request
from datasette.utils import ( from datasette.utils import (
add_cors_headers, add_cors_headers,
await_me_maybe, await_me_maybe,
@ -17,8 +18,8 @@ from datasette.utils import (
InvalidSql, InvalidSql,
LimitedWriter, LimitedWriter,
call_with_supported_arguments, call_with_supported_arguments,
dash_decode, tilde_decode,
dash_encode, tilde_encode,
path_from_row_pks, path_from_row_pks,
path_with_added_args, path_with_added_args,
path_with_removed_args, path_with_removed_args,
@ -205,14 +206,14 @@ class DataView(BaseView):
async def resolve_db_name(self, request, db_name, **kwargs): async def resolve_db_name(self, request, db_name, **kwargs):
hash = None hash = None
name = None name = None
decoded_name = dash_decode(db_name) decoded_name = tilde_decode(db_name)
if decoded_name not in self.ds.databases and "-" in db_name: if decoded_name not in self.ds.databases and "-" in db_name:
# No matching DB found, maybe it's a name-hash? # No matching DB found, maybe it's a name-hash?
name_bit, hash_bit = db_name.rsplit("-", 1) name_bit, hash_bit = db_name.rsplit("-", 1)
if dash_decode(name_bit) not in self.ds.databases: if tilde_decode(name_bit) not in self.ds.databases:
raise NotFound(f"Database not found: {name}") raise NotFound(f"Database not found: {name}")
else: else:
name = dash_decode(name_bit) name = tilde_decode(name_bit)
hash = hash_bit hash = hash_bit
else: else:
name = decoded_name name = decoded_name
@ -235,7 +236,7 @@ class DataView(BaseView):
return await db.table_exists(t) return await db.table_exists(t)
table, _format = await resolve_table_and_format( table, _format = await resolve_table_and_format(
table_and_format=dash_decode(kwargs["table_and_format"]), table_and_format=tilde_decode(kwargs["table_and_format"]),
table_exists=async_table_exists, table_exists=async_table_exists,
allowed_formats=self.ds.renderers.keys(), allowed_formats=self.ds.renderers.keys(),
) )
@ -243,11 +244,11 @@ class DataView(BaseView):
if _format: if _format:
kwargs["as_format"] = f".{_format}" kwargs["as_format"] = f".{_format}"
elif kwargs.get("table"): elif kwargs.get("table"):
kwargs["table"] = dash_decode(kwargs["table"]) kwargs["table"] = tilde_decode(kwargs["table"])
should_redirect = self.ds.urls.path(f"{name}-{expected}") should_redirect = self.ds.urls.path(f"{name}-{expected}")
if kwargs.get("table"): if kwargs.get("table"):
should_redirect += "/" + dash_encode(kwargs["table"]) should_redirect += "/" + tilde_encode(kwargs["table"])
if kwargs.get("pk_path"): if kwargs.get("pk_path"):
should_redirect += "/" + kwargs["pk_path"] should_redirect += "/" + kwargs["pk_path"]
if kwargs.get("as_format"): if kwargs.get("as_format"):
@ -291,6 +292,7 @@ class DataView(BaseView):
if not request.args.get(key) if not request.args.get(key)
] ]
if extra_parameters: if extra_parameters:
# Replace request object with a new one with modified scope
if not request.query_string: if not request.query_string:
new_query_string = "&".join(extra_parameters) new_query_string = "&".join(extra_parameters)
else: else:
@ -300,7 +302,8 @@ class DataView(BaseView):
new_scope = dict( new_scope = dict(
request.scope, query_string=new_query_string.encode("latin-1") request.scope, query_string=new_query_string.encode("latin-1")
) )
request.scope = new_scope receive = request.receive
request = Request(new_scope, receive)
if stream: if stream:
# Some quick soundness checks # Some quick soundness checks
if not self.ds.setting("allow_csv_stream"): if not self.ds.setting("allow_csv_stream"):
@ -467,7 +470,7 @@ class DataView(BaseView):
return await db.table_exists(t) return await db.table_exists(t)
table, _ext_format = await resolve_table_and_format( table, _ext_format = await resolve_table_and_format(
table_and_format=dash_decode(args["table_and_format"]), table_and_format=tilde_decode(args["table_and_format"]),
table_exists=async_table_exists, table_exists=async_table_exists,
allowed_formats=self.ds.renderers.keys(), allowed_formats=self.ds.renderers.keys(),
) )
@ -475,7 +478,7 @@ class DataView(BaseView):
args["table"] = table args["table"] = table
del args["table_and_format"] del args["table_and_format"]
elif "table" in args: elif "table" in args:
args["table"] = dash_decode(args["table"]) args["table"] = tilde_decode(args["table"])
return _format, args return _format, args
async def view_get(self, request, database, hash, correct_hash_provided, **kwargs): async def view_get(self, request, database, hash, correct_hash_provided, **kwargs):

Wyświetl plik

@ -12,7 +12,8 @@ from datasette.utils import (
MultiParams, MultiParams,
append_querystring, append_querystring,
compound_keys_after_sql, compound_keys_after_sql,
dash_encode, tilde_decode,
tilde_encode,
escape_sqlite, escape_sqlite,
filters_should_redirect, filters_should_redirect,
is_url, is_url,
@ -143,7 +144,7 @@ class RowTableShared(DataView):
'<a href="{base_url}{database}/{table}/{flat_pks_quoted}">{flat_pks}</a>'.format( '<a href="{base_url}{database}/{table}/{flat_pks_quoted}">{flat_pks}</a>'.format(
base_url=base_url, base_url=base_url,
database=database, database=database,
table=dash_encode(table), table=tilde_encode(table),
flat_pks=str(markupsafe.escape(pk_path)), flat_pks=str(markupsafe.escape(pk_path)),
flat_pks_quoted=path_from_row_pks(row, pks, not pks), flat_pks_quoted=path_from_row_pks(row, pks, not pks),
) )
@ -200,8 +201,8 @@ class RowTableShared(DataView):
link_template.format( link_template.format(
database=database, database=database,
base_url=base_url, base_url=base_url,
table=dash_encode(other_table), table=tilde_encode(other_table),
link_id=dash_encode(str(value)), link_id=tilde_encode(str(value)),
id=str(markupsafe.escape(value)), id=str(markupsafe.escape(value)),
label=str(markupsafe.escape(label)) or "-", label=str(markupsafe.escape(label)) or "-",
) )
@ -346,6 +347,8 @@ class TableView(RowTableShared):
write=bool(canned_query.get("write")), write=bool(canned_query.get("write")),
) )
table = tilde_decode(table)
db = self.ds.databases[database] db = self.ds.databases[database]
is_view = bool(await db.get_view_definition(table)) is_view = bool(await db.get_view_definition(table))
table_exists = bool(await db.table_exists(table)) table_exists = bool(await db.table_exists(table))
@ -766,7 +769,7 @@ class TableView(RowTableShared):
if prefix is None: if prefix is None:
prefix = "$null" prefix = "$null"
else: else:
prefix = dash_encode(str(prefix)) prefix = tilde_encode(str(prefix))
next_value = f"{prefix},{next_value}" next_value = f"{prefix},{next_value}"
added_args = {"_next": next_value} added_args = {"_next": next_value}
if sort: if sort:
@ -938,6 +941,7 @@ class RowView(RowTableShared):
name = "row" name = "row"
async def data(self, request, database, hash, table, pk_path, default_labels=False): async def data(self, request, database, hash, table, pk_path, default_labels=False):
table = tilde_decode(table)
await self.check_permissions( await self.check_permissions(
request, request,
[ [

Wyświetl plik

@ -59,21 +59,3 @@ truncation error message.
You can increase or remove this limit using the :ref:`setting_max_csv_mb` config You can increase or remove this limit using the :ref:`setting_max_csv_mb` config
setting. You can also disable the CSV export feature entirely using setting. You can also disable the CSV export feature entirely using
:ref:`setting_allow_csv_stream`. :ref:`setting_allow_csv_stream`.
A note on URLs
--------------
The default URL for the CSV representation of a table is that table with
``.csv`` appended to it:
* https://latest.datasette.io/fixtures/facetable - HTML interface
* https://latest.datasette.io/fixtures/facetable.csv - CSV export
* https://latest.datasette.io/fixtures/facetable.json - JSON API
This pattern doesn't work for tables with names that already end in ``.csv`` or
``.json``. For those tables, you can instead use the ``_format=`` query string
parameter:
* https://latest.datasette.io/fixtures/table%2Fwith%2Fslashes.csv - HTML interface
* https://latest.datasette.io/fixtures/table%2Fwith%2Fslashes.csv?_format=csv - CSV export
* https://latest.datasette.io/fixtures/table%2Fwith%2Fslashes.csv?_format=json - JSON API

Wyświetl plik

@ -545,7 +545,7 @@ These functions can be accessed via the ``{{ urls }}`` object in Datasette templ
<a href="{{ urls.table("fixtures", "facetable") }}">facetable table</a> <a href="{{ urls.table("fixtures", "facetable") }}">facetable table</a>
<a href="{{ urls.query("fixtures", "pragma_cache_size") }}">pragma_cache_size query</a> <a href="{{ urls.query("fixtures", "pragma_cache_size") }}">pragma_cache_size query</a>
Use the ``format="json"`` (or ``"csv"`` or other formats supported by plugins) arguments to get back URLs to the JSON representation. This is usually the path with ``.json`` added on the end, but it may use ``?_format=json`` in cases where the path already includes ``.json``, for example a URL to a table named ``table.json``. Use the ``format="json"`` (or ``"csv"`` or other formats supported by plugins) arguments to get back URLs to the JSON representation. This is the path with ``.json`` added on the end.
These methods each return a ``datasette.utils.PrefixedUrlString`` object, which is a subclass of the Python ``str`` type. This allows the logic that considers the ``base_url`` setting to detect if that prefix has already been applied to the path. These methods each return a ``datasette.utils.PrefixedUrlString`` object, which is a subclass of the Python ``str`` type. This allows the logic that considers the ``base_url`` setting to detect if that prefix has already been applied to the path.
@ -876,31 +876,31 @@ Utility function for calling ``await`` on a return value if it is awaitable, oth
.. autofunction:: datasette.utils.await_me_maybe .. autofunction:: datasette.utils.await_me_maybe
.. _internals_dash_encoding: .. _internals_tilde_encoding:
Dash encoding Tilde encoding
------------- --------------
Datasette uses a custom encoding scheme in some places, called **dash encoding**. This is primarily used for table names and row primary keys, to avoid any confusion between ``/`` characters in those values and the Datasette URLs that reference them. Datasette uses a custom encoding scheme in some places, called **tilde encoding**. This is primarily used for table names and row primary keys, to avoid any confusion between ``/`` characters in those values and the Datasette URLs that reference them.
Dash encoding uses the same algorithm as `URL percent-encoding <https://developer.mozilla.org/en-US/docs/Glossary/percent-encoding>`__, but with the ``-`` hyphen character used in place of ``%``. Tilde encoding uses the same algorithm as `URL percent-encoding <https://developer.mozilla.org/en-US/docs/Glossary/percent-encoding>`__, but with the ``~`` tilde character used in place of ``%``.
Any character other than ``ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz 0123456789_`` will be replaced by the numeric equivalent preceded by a hyphen. For example: Any character other than ``ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz 0123456789_-`` will be replaced by the numeric equivalent preceded by a tilde. For example:
- ``/`` becomes ``-2F`` - ``/`` becomes ``~2F``
- ``.`` becomes ``-2E`` - ``.`` becomes ``~2E``
- ``%`` becomes ``-25`` - ``%`` becomes ``~25``
- ``-`` becomes ``-2D`` - ``~`` becomes ``~7E``
- Space character becomes ``-20`` - Space character becomes ``~20``
- ``polls/2022.primary`` becomes ``polls-2F2022-2Eprimary`` - ``polls/2022.primary`` becomes ``polls~2F2022~2Eprimary``
.. _internals_utils_dash_encode: .. _internals_utils_tilde_encode:
.. autofunction:: datasette.utils.dash_encode .. autofunction:: datasette.utils.tilde_encode
.. _internals_utils_dash_decode: .. _internals_utils_tilde_decode:
.. autofunction:: datasette.utils.dash_decode .. autofunction:: datasette.utils.tilde_decode
.. _internals_tracer: .. _internals_tracer:

Wyświetl plik

@ -679,18 +679,9 @@ def test_row(app_client):
assert [{"id": "1", "content": "hello"}] == response.json["rows"] assert [{"id": "1", "content": "hello"}] == response.json["rows"]
def test_row_format_in_querystring(app_client):
# regression test for https://github.com/simonw/datasette/issues/563
response = app_client.get(
"/fixtures/simple_primary_key/1?_format=json&_shape=objects"
)
assert response.status == 200
assert [{"id": "1", "content": "hello"}] == response.json["rows"]
def test_row_strange_table_name(app_client): def test_row_strange_table_name(app_client):
response = app_client.get( response = app_client.get(
"/fixtures/table%2Fwith%2Fslashes.csv/3.json?_shape=objects" "/fixtures/table~2Fwith~2Fslashes~2Ecsv/3.json?_shape=objects"
) )
assert response.status == 200 assert response.status == 200
assert [{"pk": "3", "content": "hey"}] == response.json["rows"] assert [{"pk": "3", "content": "hey"}] == response.json["rows"]
@ -942,7 +933,7 @@ def test_cors(app_client_with_cors, path, status_code):
) )
def test_database_with_space_in_name(app_client_two_attached_databases, path): def test_database_with_space_in_name(app_client_two_attached_databases, path):
response = app_client_two_attached_databases.get( response = app_client_two_attached_databases.get(
"/extra-20database" + path, follow_redirects=True "/extra~20database" + path, follow_redirects=True
) )
assert response.status == 200 assert response.status == 200
@ -953,7 +944,7 @@ def test_common_prefix_database_names(app_client_conflicting_database_names):
d["name"] d["name"]
for d in app_client_conflicting_database_names.get("/-/databases.json").json for d in app_client_conflicting_database_names.get("/-/databases.json").json
] ]
for db_name, path in (("foo", "/foo.json"), ("foo-bar", "/foo-2Dbar.json")): for db_name, path in (("foo", "/foo.json"), ("foo-bar", "/foo-bar.json")):
data = app_client_conflicting_database_names.get(path).json data = app_client_conflicting_database_names.get(path).json
assert db_name == data["database"] assert db_name == data["database"]
@ -996,7 +987,7 @@ async def test_hidden_sqlite_stat1_table():
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.parametrize("db_name", ("foo", r"fo%o", "f~/c.d")) @pytest.mark.parametrize("db_name", ("foo", r"fo%o", "f~/c.d"))
async def test_dash_encoded_database_names(db_name): async def test_tilde_encoded_database_names(db_name):
ds = Datasette() ds = Datasette()
ds.add_memory_database(db_name) ds.add_memory_database(db_name)
response = await ds.client.get("/.json") response = await ds.client.get("/.json")

Wyświetl plik

@ -9,7 +9,7 @@ from datasette.app import SETTINGS
from datasette.plugins import DEFAULT_PLUGINS from datasette.plugins import DEFAULT_PLUGINS
from datasette.cli import cli, serve from datasette.cli import cli, serve
from datasette.version import __version__ from datasette.version import __version__
from datasette.utils import dash_encode from datasette.utils import tilde_encode
from datasette.utils.sqlite import sqlite3 from datasette.utils.sqlite import sqlite3
from click.testing import CliRunner from click.testing import CliRunner
import io import io
@ -295,12 +295,12 @@ def test_weird_database_names(ensure_eventloop, tmpdir, filename):
assert result1.exit_code == 0, result1.output assert result1.exit_code == 0, result1.output
filename_no_stem = filename.rsplit(".", 1)[0] filename_no_stem = filename.rsplit(".", 1)[0]
expected_link = '<a href="/{}">{}</a>'.format( expected_link = '<a href="/{}">{}</a>'.format(
dash_encode(filename_no_stem), filename_no_stem tilde_encode(filename_no_stem), filename_no_stem
) )
assert expected_link in result1.output assert expected_link in result1.output
# Now try hitting that database page # Now try hitting that database page
result2 = runner.invoke( result2 = runner.invoke(
cli, [db_path, "--get", "/{}".format(dash_encode(filename_no_stem))] cli, [db_path, "--get", "/{}".format(tilde_encode(filename_no_stem))]
) )
assert result2.exit_code == 0, result2.output assert result2.exit_code == 0, result2.output

Wyświetl plik

@ -29,7 +29,7 @@ def test_homepage(app_client_two_attached_databases):
) )
# Should be two attached databases # Should be two attached databases
assert [ assert [
{"href": r"/extra-20database", "text": "extra database"}, {"href": "/extra~20database", "text": "extra database"},
{"href": "/fixtures", "text": "fixtures"}, {"href": "/fixtures", "text": "fixtures"},
] == [{"href": a["href"], "text": a.text.strip()} for a in soup.select("h2 a")] ] == [{"href": a["href"], "text": a.text.strip()} for a in soup.select("h2 a")]
# Database should show count text and attached tables # Database should show count text and attached tables
@ -44,8 +44,8 @@ def test_homepage(app_client_two_attached_databases):
{"href": a["href"], "text": a.text.strip()} for a in links_p.findAll("a") {"href": a["href"], "text": a.text.strip()} for a in links_p.findAll("a")
] ]
assert [ assert [
{"href": r"/extra-20database/searchable", "text": "searchable"}, {"href": r"/extra~20database/searchable", "text": "searchable"},
{"href": r"/extra-20database/searchable_view", "text": "searchable_view"}, {"href": r"/extra~20database/searchable_view", "text": "searchable_view"},
] == table_links ] == table_links
@ -139,15 +139,15 @@ def test_database_page(app_client):
queries_ul = soup.find("h2", text="Queries").find_next_sibling("ul") queries_ul = soup.find("h2", text="Queries").find_next_sibling("ul")
assert queries_ul is not None assert queries_ul is not None
assert [ assert [
(
"/fixtures/-F0-9D-90-9C-F0-9D-90-A2-F0-9D-90-AD-F0-9D-90-A2-F0-9D-90-9E-F0-9D-90-AC",
"𝐜𝐢𝐭𝐢𝐞𝐬",
),
("/fixtures/from_async_hook", "from_async_hook"), ("/fixtures/from_async_hook", "from_async_hook"),
("/fixtures/from_hook", "from_hook"), ("/fixtures/from_hook", "from_hook"),
("/fixtures/magic_parameters", "magic_parameters"), ("/fixtures/magic_parameters", "magic_parameters"),
("/fixtures/neighborhood_search#fragment-goes-here", "Search neighborhoods"), ("/fixtures/neighborhood_search#fragment-goes-here", "Search neighborhoods"),
("/fixtures/pragma_cache_size", "pragma_cache_size"), ("/fixtures/pragma_cache_size", "pragma_cache_size"),
(
"/fixtures/~F0~9D~90~9C~F0~9D~90~A2~F0~9D~90~AD~F0~9D~90~A2~F0~9D~90~9E~F0~9D~90~AC",
"𝐜𝐢𝐭𝐢𝐞𝐬",
),
] == sorted( ] == sorted(
[(a["href"], a.text) for a in queries_ul.find_all("a")], key=lambda p: p[0] [(a["href"], a.text) for a in queries_ul.find_all("a")], key=lambda p: p[0]
) )
@ -193,11 +193,11 @@ def test_row_redirects_with_url_hash(app_client_with_hash):
def test_row_strange_table_name_with_url_hash(app_client_with_hash): def test_row_strange_table_name_with_url_hash(app_client_with_hash):
response = app_client_with_hash.get("/fixtures/table-2Fwith-2Fslashes-2Ecsv/3") response = app_client_with_hash.get("/fixtures/table~2Fwith~2Fslashes~2Ecsv/3")
assert response.status == 302 assert response.status == 302
assert response.headers["Location"].endswith("/table-2Fwith-2Fslashes-2Ecsv/3") assert response.headers["Location"].endswith("/table~2Fwith~2Fslashes~2Ecsv/3")
response = app_client_with_hash.get( response = app_client_with_hash.get(
"/fixtures/table-2Fwith-2Fslashes-2Ecsv/3", follow_redirects=True "/fixtures/table~2Fwith~2Fslashes~2Ecsv/3", follow_redirects=True
) )
assert response.status == 200 assert response.status == 200
@ -229,7 +229,7 @@ def test_row_page_does_not_truncate():
["query", "db-fixtures", "query-neighborhood_search"], ["query", "db-fixtures", "query-neighborhood_search"],
), ),
( (
"/fixtures/table%2Fwith%2Fslashes.csv", "/fixtures/table~2Fwith~2Fslashes~2Ecsv",
["table", "db-fixtures", "table-tablewithslashescsv-fa7563"], ["table", "db-fixtures", "table-tablewithslashescsv-fa7563"],
), ),
( (
@ -255,7 +255,7 @@ def test_css_classes_on_body(app_client, path, expected_classes):
"table-fixtures-simple_primary_key.html, *table.html", "table-fixtures-simple_primary_key.html, *table.html",
), ),
( (
"/fixtures/table%2Fwith%2Fslashes.csv", "/fixtures/table~2Fwith~2Fslashes~2Ecsv",
"table-fixtures-tablewithslashescsv-fa7563.html, *table.html", "table-fixtures-tablewithslashescsv-fa7563.html, *table.html",
), ),
( (
@ -359,7 +359,7 @@ def test_row_links_from_other_tables(app_client, path, expected_text, expected_l
], ],
), ),
( (
"/fixtures/compound_primary_key/a-2Fb,-2Ec-2Dd", "/fixtures/compound_primary_key/a~2Fb,~2Ec~2Dd",
[ [
[ [
'<td class="col-pk1 type-str">a/b</td>', '<td class="col-pk1 type-str">a/b</td>',
@ -816,7 +816,8 @@ def test_base_url_affects_metadata_extra_css_urls(app_client_base_url_prefix):
), ),
("/fixtures/pragma_cache_size", None), ("/fixtures/pragma_cache_size", None),
( (
"/fixtures/𝐜𝐢𝐭𝐢𝐞𝐬", # /fixtures/𝐜𝐢𝐭𝐢𝐞𝐬
"/fixtures/~F0~9D~90~9C~F0~9D~90~A2~F0~9D~90~AD~F0~9D~90~A2~F0~9D~90~9E~F0~9D~90~AC",
"/fixtures?sql=select+id%2C+name+from+facet_cities+order+by+id+limit+1%3B", "/fixtures?sql=select+id%2C+name+from+facet_cities+order+by+id+limit+1%3B",
), ),
("/fixtures/magic_parameters", None), ("/fixtures/magic_parameters", None),
@ -824,6 +825,7 @@ def test_base_url_affects_metadata_extra_css_urls(app_client_base_url_prefix):
) )
def test_edit_sql_link_on_canned_queries(app_client, path, expected): def test_edit_sql_link_on_canned_queries(app_client, path, expected):
response = app_client.get(path) response = app_client.get(path)
assert response.status == 200
expected_link = f'<a href="{expected}" class="canned-query-edit-sql">Edit SQL</a>' expected_link = f'<a href="{expected}" class="canned-query-edit-sql">Edit SQL</a>'
if expected: if expected:
assert expected_link in response.text assert expected_link in response.text
@ -898,8 +900,8 @@ def test_trace_correctly_escaped(app_client):
# Table page # Table page
("/fixtures/facetable", "http://localhost/fixtures/facetable.json"), ("/fixtures/facetable", "http://localhost/fixtures/facetable.json"),
( (
"/fixtures/table%2Fwith%2Fslashes.csv", "/fixtures/table~2Fwith~2Fslashes~2Ecsv",
"http://localhost/fixtures/table%2Fwith%2Fslashes.csv?_format=json", "http://localhost/fixtures/table~2Fwith~2Fslashes~2Ecsv.json",
), ),
# Row page # Row page
( (
@ -930,6 +932,7 @@ def test_trace_correctly_escaped(app_client):
) )
def test_alternate_url_json(app_client, path, expected): def test_alternate_url_json(app_client, path, expected):
response = app_client.get(path) response = app_client.get(path)
assert response.status == 200
link = response.headers["link"] link = response.headers["link"]
assert link == '{}; rel="alternate"; type="application/json+datasette"'.format( assert link == '{}; rel="alternate"; type="application/json+datasette"'.format(
expected expected
@ -959,13 +962,17 @@ def test_no_alternate_url_json(app_client, path):
( (
( (
"/fivethirtyeight/twitter-ratio%2Fsenators", "/fivethirtyeight/twitter-ratio%2Fsenators",
"/fivethirtyeight/twitter-2Dratio-2Fsenators", "/fivethirtyeight/twitter-ratio~2Fsenators",
),
(
"/fixtures/table%2Fwith%2Fslashes.csv",
"/fixtures/table~2Fwith~2Fslashes~2Ecsv",
), ),
# query string should be preserved # query string should be preserved
("/foo/bar%2Fbaz?id=5", "/foo/bar-2Fbaz?id=5"), ("/foo/bar%2Fbaz?id=5", "/foo/bar~2Fbaz?id=5"),
), ),
) )
def test_redirect_percent_encoding_to_dash_encoding(app_client, path, expected): def test_redirect_percent_encoding_to_tilde_encoding(app_client, path, expected):
response = app_client.get(path) response = app_client.get(path)
assert response.status == 302 assert response.status == 302
assert response.headers["location"] == expected assert response.headers["location"] == expected

Wyświetl plik

@ -121,7 +121,7 @@ def test_database(ds, base_url, format, expected):
("/", "name", None, "/_memory/name"), ("/", "name", None, "/_memory/name"),
("/prefix/", "name", None, "/prefix/_memory/name"), ("/prefix/", "name", None, "/prefix/_memory/name"),
("/", "name", "json", "/_memory/name.json"), ("/", "name", "json", "/_memory/name.json"),
("/", "name.json", "json", "/_memory/name-2Ejson.json"), ("/", "name.json", "json", "/_memory/name~2Ejson.json"),
], ],
) )
def test_table_and_query(ds, base_url, name, format, expected): def test_table_and_query(ds, base_url, name, format, expected):

Wyświetl plik

@ -138,13 +138,13 @@ def test_table_shape_object_compound_primary_key(app_client):
response = app_client.get("/fixtures/compound_primary_key.json?_shape=object") response = app_client.get("/fixtures/compound_primary_key.json?_shape=object")
assert response.json == { assert response.json == {
"a,b": {"pk1": "a", "pk2": "b", "content": "c"}, "a,b": {"pk1": "a", "pk2": "b", "content": "c"},
"a-2Fb,-2Ec-2Dd": {"pk1": "a/b", "pk2": ".c-d", "content": "c"}, "a~2Fb,~2Ec-d": {"pk1": "a/b", "pk2": ".c-d", "content": "c"},
} }
def test_table_with_slashes_in_name(app_client): def test_table_with_slashes_in_name(app_client):
response = app_client.get( response = app_client.get(
"/fixtures/table%2Fwith%2Fslashes.csv?_shape=objects&_format=json" "/fixtures/table~2Fwith~2Fslashes~2Ecsv.json?_shape=objects"
) )
assert response.status == 200 assert response.status == 200
data = response.json data = response.json
@ -1032,7 +1032,10 @@ def test_infinity_returned_as_invalid_json_if_requested(app_client):
def test_custom_query_with_unicode_characters(app_client): def test_custom_query_with_unicode_characters(app_client):
response = app_client.get("/fixtures/𝐜𝐢𝐭𝐢𝐞𝐬.json?_shape=array") # /fixtures/𝐜𝐢𝐭𝐢𝐞𝐬.json
response = app_client.get(
"/fixtures/~F0~9D~90~9C~F0~9D~90~A2~F0~9D~90~AD~F0~9D~90~A2~F0~9D~90~9E~F0~9D~90~AC.json?_shape=array"
)
assert [{"id": 1, "name": "San Francisco"}] == response.json assert [{"id": 1, "name": "San Francisco"}] == response.json

Wyświetl plik

@ -565,7 +565,7 @@ def test_table_html_compound_primary_key(app_client):
'<td class="col-content type-str">c</td>', '<td class="col-content type-str">c</td>',
], ],
[ [
'<td class="col-Link type-pk"><a href="/fixtures/compound_primary_key/a-2Fb,-2Ec-2Dd">a/b,.c-d</a></td>', '<td class="col-Link type-pk"><a href="/fixtures/compound_primary_key/a~2Fb,~2Ec-d">a/b,.c-d</a></td>',
'<td class="col-pk1 type-str">a/b</td>', '<td class="col-pk1 type-str">a/b</td>',
'<td class="col-pk2 type-str">.c-d</td>', '<td class="col-pk2 type-str">.c-d</td>',
'<td class="col-content type-str">c</td>', '<td class="col-content type-str">c</td>',

Wyświetl plik

@ -19,8 +19,8 @@ from unittest.mock import patch
("foo", ["foo"]), ("foo", ["foo"]),
("foo,bar", ["foo", "bar"]), ("foo,bar", ["foo", "bar"]),
("123,433,112", ["123", "433", "112"]), ("123,433,112", ["123", "433", "112"]),
("123%2C433,112", ["123,433", "112"]), ("123~2C433,112", ["123,433", "112"]),
("123%2F433%2F112", ["123/433/112"]), ("123~2F433~2F112", ["123/433/112"]),
], ],
) )
def test_urlsafe_components(path, expected): def test_urlsafe_components(path, expected):
@ -93,7 +93,7 @@ def test_path_with_replaced_args(path, args, expected):
"row,pks,expected_path", "row,pks,expected_path",
[ [
({"A": "foo", "B": "bar"}, ["A", "B"], "foo,bar"), ({"A": "foo", "B": "bar"}, ["A", "B"], "foo,bar"),
({"A": "f,o", "B": "bar"}, ["A", "B"], "f-2Co,bar"), ({"A": "f,o", "B": "bar"}, ["A", "B"], "f~2Co,bar"),
({"A": 123}, ["A"], "123"), ({"A": 123}, ["A"], "123"),
( (
utils.CustomRow( utils.CustomRow(
@ -393,9 +393,7 @@ def test_table_columns():
("/foo?sql=select+1", "json", {}, "/foo.json?sql=select+1"), ("/foo?sql=select+1", "json", {}, "/foo.json?sql=select+1"),
("/foo/bar", "json", {}, "/foo/bar.json"), ("/foo/bar", "json", {}, "/foo/bar.json"),
("/foo/bar", "csv", {}, "/foo/bar.csv"), ("/foo/bar", "csv", {}, "/foo/bar.csv"),
("/foo/bar.csv", "json", {}, "/foo/bar.csv?_format=json"),
("/foo/bar", "csv", {"_dl": 1}, "/foo/bar.csv?_dl=1"), ("/foo/bar", "csv", {"_dl": 1}, "/foo/bar.csv?_dl=1"),
("/foo/b.csv", "json", {"_dl": 1}, "/foo/b.csv?_dl=1&_format=json"),
( (
"/sf-trees/Street_Tree_List?_search=cherry&_size=1000", "/sf-trees/Street_Tree_List?_search=cherry&_size=1000",
"csv", "csv",
@ -410,18 +408,6 @@ def test_path_with_format(path, format, extra_qs, expected):
assert expected == actual assert expected == actual
def test_path_with_format_replace_format():
request = Request.fake("/foo/bar.csv")
assert (
utils.path_with_format(request=request, format="blob")
== "/foo/bar.csv?_format=blob"
)
assert (
utils.path_with_format(request=request, format="blob", replace_format="csv")
== "/foo/bar.blob"
)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"bytes,expected", "bytes,expected",
[ [
@ -652,15 +638,15 @@ async def test_derive_named_parameters(sql, expected):
"original,expected", "original,expected",
( (
("abc", "abc"), ("abc", "abc"),
("/foo/bar", "-2Ffoo-2Fbar"), ("/foo/bar", "~2Ffoo~2Fbar"),
("/-/bar", "-2F-2D-2Fbar"), ("/-/bar", "~2F-~2Fbar"),
("-/db-/table.csv", "-2D-2Fdb-2D-2Ftable-2Ecsv"), ("-/db-/table.csv", "-~2Fdb-~2Ftable~2Ecsv"),
(r"%~-/", "-25-7E-2D-2F"), (r"%~-/", "~25~7E-~2F"),
("-25-7E-2D-2F", "-2D25-2D7E-2D2D-2D2F"), ("~25~7E~2D~2F", "~7E25~7E7E~7E2D~7E2F"),
), ),
) )
def test_dash_encoding(original, expected): def test_tilde_encoding(original, expected):
actual = utils.dash_encode(original) actual = utils.tilde_encode(original)
assert actual == expected assert actual == expected
# And test round-trip # And test round-trip
assert original == utils.dash_decode(actual) assert original == utils.tilde_decode(actual)