diff --git a/datasette/utils/__init__.py b/datasette/utils/__init__.py index 9109f823..4745254e 100644 --- a/datasette/utils/__init__.py +++ b/datasette/utils/__init__.py @@ -1113,12 +1113,20 @@ _TILDE_ENCODING_SAFE = frozenset( # '.' and '~' ) +_space = ord(" ") + class TildeEncoder(dict): # Keeps a cache internally, via __missing__ def __missing__(self, b): + print("b is ", b) # Handle a cache miss, store encoded string in cache and return. - res = chr(b) if b in _TILDE_ENCODING_SAFE else "~{:02X}".format(b) + if b in _TILDE_ENCODING_SAFE: + res = chr(b) + elif b == _space: + res = "+" + else: + res = "~{:02X}".format(b) self[b] = res return res @@ -1138,7 +1146,7 @@ def tilde_decode(s: str) -> str: # Avoid accidentally decoding a %2f style sequence temp = secrets.token_hex(16) s = s.replace("%", temp) - decoded = urllib.parse.unquote(s.replace("~", "%")) + decoded = urllib.parse.unquote_plus(s.replace("~", "%")) return decoded.replace(temp, "%") diff --git a/docs/internals.rst b/docs/internals.rst index 854b96f8..76e27e5f 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -980,15 +980,17 @@ Datasette uses a custom encoding scheme in some places, called **tilde encoding* Tilde encoding uses the same algorithm as `URL percent-encoding `__, but with the ``~`` tilde character used in place of ``%``. -Any character other than ``ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz 0123456789_-`` will be replaced by the numeric equivalent preceded by a tilde. For example: +Any character other than ``ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz0123456789_-`` will be replaced by the numeric equivalent preceded by a tilde. For example: - ``/`` becomes ``~2F`` - ``.`` becomes ``~2E`` - ``%`` becomes ``~25`` - ``~`` becomes ``~7E`` -- Space character becomes ``~20`` +- Space becomes ``+`` - ``polls/2022.primary`` becomes ``polls~2F2022~2Eprimary`` +Note that the space character is a special case: it will be replaced with a ``+`` symbol. + .. _internals_utils_tilde_encode: .. autofunction:: datasette.utils.tilde_encode diff --git a/tests/test_html.py b/tests/test_html.py index 6e4c22b1..42f1a3ee 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -28,7 +28,7 @@ def test_homepage(app_client_two_attached_databases): ) # Should be two attached databases assert [ - {"href": "/extra~20database", "text": "extra database"}, + {"href": "/extra+database", "text": "extra database"}, {"href": "/fixtures", "text": "fixtures"}, ] == [{"href": a["href"], "text": a.text.strip()} for a in soup.select("h2 a")] # Database should show count text and attached tables @@ -43,8 +43,8 @@ def test_homepage(app_client_two_attached_databases): {"href": a["href"], "text": a.text.strip()} for a in links_p.findAll("a") ] assert [ - {"href": r"/extra~20database/searchable", "text": "searchable"}, - {"href": r"/extra~20database/searchable_view", "text": "searchable_view"}, + {"href": r"/extra+database/searchable", "text": "searchable"}, + {"href": r"/extra+database/searchable_view", "text": "searchable_view"}, ] == table_links diff --git a/tests/test_utils.py b/tests/test_utils.py index 7b41a87f..df788767 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -618,6 +618,7 @@ async def test_derive_named_parameters(sql, expected): ("-/db-/table.csv", "-~2Fdb-~2Ftable~2Ecsv"), (r"%~-/", "~25~7E-~2F"), ("~25~7E~2D~2F", "~7E25~7E7E~7E2D~7E2F"), + ("with space", "with+space"), ), ) def test_tilde_encoding(original, expected):