dash encoding is now like percent encoding but with dashes

Refs https://github.com/simonw/datasette/issues/1439#issuecomment-1059851259
pull/1648/head
Simon Willison 2022-03-06 10:53:25 -08:00
rodzic f568c76cfb
commit d2e3fe3fac
3 zmienionych plików z 38 dodań i 12 usunięć

Wyświetl plik

@ -1141,13 +1141,34 @@ def add_cors_headers(headers):
headers["Access-Control-Expose-Headers"] = "Link"
_DASH_ENCODING_SAFE = frozenset(
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
b"abcdefghijklmnopqrstuvwxyz"
b"0123456789_"
# This is the same as Python percent-encoding but I removed
# '.' and '-' and '~'
)
class DashEncoder(dict):
# Keeps a cache internally, via __missing__
def __missing__(self, b):
# Handle a cache miss, store encoded string in cache and return.
res = chr(b) if b in _DASH_ENCODING_SAFE else "-{:02X}".format(b)
self[b] = res
return res
_dash_encoder = DashEncoder().__getitem__
@documented
def dash_encode(s: str) -> str:
"Returns dash-encoded string - for example ``/foo/bar`` -> ``-/foo-/bar``"
return s.replace("-", "--").replace(".", "-.").replace("/", "-/")
"Returns dash-encoded string - for example ``/foo/bar`` -> ``-2Ffoo-2Fbar``"
return "".join(_dash_encoder(char) for char in s.encode("utf-8"))
@documented
def dash_decode(s: str) -> str:
"Decodes a dash-encoded string, so ``-/foo-/bar`` -> ``/foo/bar``"
return s.replace("-/", "/").replace("-.", ".").replace("--", "-")
"Decodes a dash-encoded string, so ``-2Ffoo-2Fbar`` -> ``/foo/bar``"
return urllib.parse.unquote(s.replace("-", "%"))

Wyświetl plik

@ -883,13 +883,16 @@ Dash encoding
Datasette uses a custom encoding scheme in some places, called **dash encoding**. This is primarily used for table names and row primary keys, to avoid any confusion between ``/`` characters in those values and the Datasette URLs that reference them.
Dash encoding applies the following rules, in order:
Dash encoding uses the same algorithm as `URL percent-encoding <https://developer.mozilla.org/en-US/docs/Glossary/percent-encoding>`__, but with the ``-`` hyphen character used in place of ``%``.
- All single ``-`` characters are replaced by ``--``
- ``.`` characters are replaced by ``-.``
- ``/`` characters are replaced by ``./``
Any character other than ``ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz 0123456789_`` will be replaced by the numeric equivalent preceded by a hyphen. For example:
These rules are applied in reverse order to decode a dash encoded string.
- ``/`` becomes ``-2F``
- ``.`` becomes ``-2E``
- ``%`` becomes ``-25``
- ``-`` becomes ``-2D``
- Space character becomes ``-20``
- ``polls/2022.primary`` becomes ``polls-2F2022-2Eprimary``
.. _internals_utils_dash_encode:

Wyświetl plik

@ -652,9 +652,11 @@ async def test_derive_named_parameters(sql, expected):
"original,expected",
(
("abc", "abc"),
("/foo/bar", "-/foo-/bar"),
("/-/bar", "-/---/bar"),
("-/db-/table---.csv-.csv", "---/db---/table-------.csv---.csv"),
("/foo/bar", "-2Ffoo-2Fbar"),
("/-/bar", "-2F-2D-2Fbar"),
("-/db-/table.csv", "-2D-2Fdb-2D-2Ftable-2Ecsv"),
(r"%~-/", "-25-7E-2D-2F"),
("-25-7E-2D-2F", "-2D25-2D7E-2D2D-2D2F"),
),
)
def test_dash_encoding(original, expected):