From 12395ba6ed073487f4defd74be139229092203ba Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 20 Sep 2023 15:10:55 -0700 Subject: [PATCH] Stop using parallel SQL queries for tables Refs: - #2189 --- datasette/views/table.py | 16 ++++++---------- docs/internals.rst | 1 + 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/datasette/views/table.py b/datasette/views/table.py index 50ba2b78..4f4baeed 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -74,11 +74,10 @@ class Row: return json.dumps(d, default=repr, indent=2) -async def _gather_parallel(*args): - return await asyncio.gather(*args) - - -async def _gather_sequential(*args): +async def run_sequential(*args): + # This used to be swappable for asyncio.gather() to run things in + # parallel, but this lead to hard-to-debug locking issues with + # in-memory databases: https://github.com/simonw/datasette/issues/2189 results = [] for fn in args: results.append(await fn) @@ -1183,9 +1182,6 @@ async def table_view_data( ) rows = rows[:page_size] - # For performance profiling purposes, ?_noparallel=1 turns off asyncio.gather - gather = _gather_sequential if request.args.get("_noparallel") else _gather_parallel - # Resolve extras extras = _get_extras(request) if any(k for k in request.args.keys() if k == "_facet" or k.startswith("_facet_")): @@ -1249,7 +1245,7 @@ async def table_view_data( if not nofacet: # Run them in parallel facet_awaitables = [facet.facet_results() for facet in facet_instances] - facet_awaitable_results = await gather(*facet_awaitables) + facet_awaitable_results = await run_sequential(*facet_awaitables) for ( instance_facet_results, instance_facets_timed_out, @@ -1282,7 +1278,7 @@ async def table_view_data( ): # Run them in parallel facet_suggest_awaitables = [facet.suggest() for facet in facet_instances] - for suggest_result in await gather(*facet_suggest_awaitables): + for suggest_result in await run_sequential(*facet_suggest_awaitables): suggested_facets.extend(suggest_result) return suggested_facets diff --git a/docs/internals.rst b/docs/internals.rst index 13f1d4a1..41ae1f10 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1317,6 +1317,7 @@ This example uses the :ref:`register_routes() ` plugin h (r"/parallel-queries$", parallel_queries), ] +Note that running parallel SQL queries in this way has `been known to cause problems in the past `__, so treat this example with caution. Adding ``?_trace=1`` will show that the trace covers both of those child tasks.