Tilde encoding now encodes space as plus, closes #1701

Refs #1657
pull/1703/head
Simon Willison 2022-04-06 08:55:01 -07:00
rodzic df88d03298
commit 90d1be9952
4 zmienionych plików z 18 dodań i 7 usunięć

Wyświetl plik

@ -1113,12 +1113,20 @@ _TILDE_ENCODING_SAFE = frozenset(
# '.' and '~' # '.' and '~'
) )
_space = ord(" ")
class TildeEncoder(dict): class TildeEncoder(dict):
# Keeps a cache internally, via __missing__ # Keeps a cache internally, via __missing__
def __missing__(self, b): def __missing__(self, b):
print("b is ", b)
# Handle a cache miss, store encoded string in cache and return. # Handle a cache miss, store encoded string in cache and return.
res = chr(b) if b in _TILDE_ENCODING_SAFE else "~{:02X}".format(b) if b in _TILDE_ENCODING_SAFE:
res = chr(b)
elif b == _space:
res = "+"
else:
res = "~{:02X}".format(b)
self[b] = res self[b] = res
return res return res
@ -1138,7 +1146,7 @@ def tilde_decode(s: str) -> str:
# Avoid accidentally decoding a %2f style sequence # Avoid accidentally decoding a %2f style sequence
temp = secrets.token_hex(16) temp = secrets.token_hex(16)
s = s.replace("%", temp) s = s.replace("%", temp)
decoded = urllib.parse.unquote(s.replace("~", "%")) decoded = urllib.parse.unquote_plus(s.replace("~", "%"))
return decoded.replace(temp, "%") return decoded.replace(temp, "%")

Wyświetl plik

@ -980,15 +980,17 @@ Datasette uses a custom encoding scheme in some places, called **tilde encoding*
Tilde encoding uses the same algorithm as `URL percent-encoding <https://developer.mozilla.org/en-US/docs/Glossary/percent-encoding>`__, but with the ``~`` tilde character used in place of ``%``. Tilde encoding uses the same algorithm as `URL percent-encoding <https://developer.mozilla.org/en-US/docs/Glossary/percent-encoding>`__, but with the ``~`` tilde character used in place of ``%``.
Any character other than ``ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz 0123456789_-`` will be replaced by the numeric equivalent preceded by a tilde. For example: Any character other than ``ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz0123456789_-`` will be replaced by the numeric equivalent preceded by a tilde. For example:
- ``/`` becomes ``~2F`` - ``/`` becomes ``~2F``
- ``.`` becomes ``~2E`` - ``.`` becomes ``~2E``
- ``%`` becomes ``~25`` - ``%`` becomes ``~25``
- ``~`` becomes ``~7E`` - ``~`` becomes ``~7E``
- Space character becomes ``~20`` - Space becomes ``+``
- ``polls/2022.primary`` becomes ``polls~2F2022~2Eprimary`` - ``polls/2022.primary`` becomes ``polls~2F2022~2Eprimary``
Note that the space character is a special case: it will be replaced with a ``+`` symbol.
.. _internals_utils_tilde_encode: .. _internals_utils_tilde_encode:
.. autofunction:: datasette.utils.tilde_encode .. autofunction:: datasette.utils.tilde_encode

Wyświetl plik

@ -28,7 +28,7 @@ def test_homepage(app_client_two_attached_databases):
) )
# Should be two attached databases # Should be two attached databases
assert [ assert [
{"href": "/extra~20database", "text": "extra database"}, {"href": "/extra+database", "text": "extra database"},
{"href": "/fixtures", "text": "fixtures"}, {"href": "/fixtures", "text": "fixtures"},
] == [{"href": a["href"], "text": a.text.strip()} for a in soup.select("h2 a")] ] == [{"href": a["href"], "text": a.text.strip()} for a in soup.select("h2 a")]
# Database should show count text and attached tables # Database should show count text and attached tables
@ -43,8 +43,8 @@ def test_homepage(app_client_two_attached_databases):
{"href": a["href"], "text": a.text.strip()} for a in links_p.findAll("a") {"href": a["href"], "text": a.text.strip()} for a in links_p.findAll("a")
] ]
assert [ assert [
{"href": r"/extra~20database/searchable", "text": "searchable"}, {"href": r"/extra+database/searchable", "text": "searchable"},
{"href": r"/extra~20database/searchable_view", "text": "searchable_view"}, {"href": r"/extra+database/searchable_view", "text": "searchable_view"},
] == table_links ] == table_links

Wyświetl plik

@ -618,6 +618,7 @@ async def test_derive_named_parameters(sql, expected):
("-/db-/table.csv", "-~2Fdb-~2Ftable~2Ecsv"), ("-/db-/table.csv", "-~2Fdb-~2Ftable~2Ecsv"),
(r"%~-/", "~25~7E-~2F"), (r"%~-/", "~25~7E-~2F"),
("~25~7E~2D~2F", "~7E25~7E7E~7E2D~7E2F"), ("~25~7E~2D~2F", "~7E25~7E7E~7E2D~7E2F"),
("with space", "with+space"),
), ),
) )
def test_tilde_encoding(original, expected): def test_tilde_encoding(original, expected):