Tilde encoding now encodes space as plus, closes #1701

Refs #1657
pull/1703/head
Simon Willison 2022-04-06 08:55:01 -07:00
rodzic df88d03298
commit 90d1be9952
4 zmienionych plików z 18 dodań i 7 usunięć

Wyświetl plik

@ -1113,12 +1113,20 @@ _TILDE_ENCODING_SAFE = frozenset(
# '.' and '~'
)
_space = ord(" ")
class TildeEncoder(dict):
# Keeps a cache internally, via __missing__
def __missing__(self, b):
print("b is ", b)
# Handle a cache miss, store encoded string in cache and return.
res = chr(b) if b in _TILDE_ENCODING_SAFE else "~{:02X}".format(b)
if b in _TILDE_ENCODING_SAFE:
res = chr(b)
elif b == _space:
res = "+"
else:
res = "~{:02X}".format(b)
self[b] = res
return res
@ -1138,7 +1146,7 @@ def tilde_decode(s: str) -> str:
# Avoid accidentally decoding a %2f style sequence
temp = secrets.token_hex(16)
s = s.replace("%", temp)
decoded = urllib.parse.unquote(s.replace("~", "%"))
decoded = urllib.parse.unquote_plus(s.replace("~", "%"))
return decoded.replace(temp, "%")

Wyświetl plik

@ -980,15 +980,17 @@ Datasette uses a custom encoding scheme in some places, called **tilde encoding*
Tilde encoding uses the same algorithm as `URL percent-encoding <https://developer.mozilla.org/en-US/docs/Glossary/percent-encoding>`__, but with the ``~`` tilde character used in place of ``%``.
Any character other than ``ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz 0123456789_-`` will be replaced by the numeric equivalent preceded by a tilde. For example:
Any character other than ``ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz0123456789_-`` will be replaced by the numeric equivalent preceded by a tilde. For example:
- ``/`` becomes ``~2F``
- ``.`` becomes ``~2E``
- ``%`` becomes ``~25``
- ``~`` becomes ``~7E``
- Space character becomes ``~20``
- Space becomes ``+``
- ``polls/2022.primary`` becomes ``polls~2F2022~2Eprimary``
Note that the space character is a special case: it will be replaced with a ``+`` symbol.
.. _internals_utils_tilde_encode:
.. autofunction:: datasette.utils.tilde_encode

Wyświetl plik

@ -28,7 +28,7 @@ def test_homepage(app_client_two_attached_databases):
)
# Should be two attached databases
assert [
{"href": "/extra~20database", "text": "extra database"},
{"href": "/extra+database", "text": "extra database"},
{"href": "/fixtures", "text": "fixtures"},
] == [{"href": a["href"], "text": a.text.strip()} for a in soup.select("h2 a")]
# Database should show count text and attached tables
@ -43,8 +43,8 @@ def test_homepage(app_client_two_attached_databases):
{"href": a["href"], "text": a.text.strip()} for a in links_p.findAll("a")
]
assert [
{"href": r"/extra~20database/searchable", "text": "searchable"},
{"href": r"/extra~20database/searchable_view", "text": "searchable_view"},
{"href": r"/extra+database/searchable", "text": "searchable"},
{"href": r"/extra+database/searchable_view", "text": "searchable_view"},
] == table_links

Wyświetl plik

@ -618,6 +618,7 @@ async def test_derive_named_parameters(sql, expected):
("-/db-/table.csv", "-~2Fdb-~2Ftable~2Ecsv"),
(r"%~-/", "~25~7E-~2F"),
("~25~7E~2D~2F", "~7E25~7E7E~7E2D~7E2F"),
("with space", "with+space"),
),
)
def test_tilde_encoding(original, expected):