From 9fdb47ca952b93b7b60adddb965ea6642b1ff523 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 5 May 2019 18:36:04 -0400 Subject: [PATCH] New encode/decode_path_component functions ASGI cannot differentiate between / and %2F in a URL, so we need an alternative scheme for encoding the names of tables that contain special characters such as / For background, see https://github.com/django/asgiref/issues/51#issuecomment-450603464 Some examples: "table/and/slashes" => "tableU+002FandU+002Fslashes" "~table" => "U+007Etable" "+bobcats!" => "U+002Bbobcats!" "U+007Etable" => "UU+002B007Etable" --- datasette/utils.py | 21 +++++++++++++++++++++ tests/test_utils.py | 16 ++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/datasette/utils.py b/datasette/utils.py index ae471b10..aef5d0ca 100644 --- a/datasette/utils.py +++ b/datasette/utils.py @@ -261,6 +261,27 @@ def escape_sqlite(s): return "[{}]".format(s) +_decode_path_component_re = re.compile(r"U\+([\da-h]{4})", re.IGNORECASE) +_encode_path_component_re = re.compile( + "[{}]".format( + "".join( + re.escape(c) + for c in (";", "/", "?", ":", "@", "&", "=", "+", "$", ",", "~") + ) + ) +) + + +def decode_path_component(table_name): + return _decode_path_component_re.sub(lambda m: chr(int(m.group(1), 16)), table_name) + + +def encode_path_component(table_name): + return _encode_path_component_re.sub( + lambda m: "U+{0:0{1}x}".format(ord(m.group(0)), 4).upper(), table_name + ) + + def make_dockerfile( files, metadata_file, diff --git a/tests/test_utils.py b/tests/test_utils.py index a5f603e6..73aee12a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -381,3 +381,19 @@ def test_path_with_format(path, format, extra_qs, expected): ) def test_format_bytes(bytes, expected): assert expected == utils.format_bytes(bytes) + + +@pytest.mark.parametrize( + "name,expected", + [ + ("table", "table"), + ("table/and/slashes", "tableU+002FandU+002Fslashes"), + ("~table", "U+007Etable"), + ("+bobcats!", "U+002Bbobcats!"), + ("U+007Etable", "UU+002B007Etable"), + ], +) +def test_encode_decode_path_component(name, expected): + encoded = utils.encode_path_component(name) + assert encoded == expected + assert name == utils.decode_path_component(encoded)