tracer.trace_child_tasks() for asyncio.gather tracing

Also added documentation for datasette.tracer module. Closes #1576
2022-02-04 21:19:49 -08:00 · 2022-02-04 21:19:49 -08:00 · da53e0360d
commit da53e0360d
--- a/datasette/tracer.py
+++ b/datasette/tracer.py
@ -1,5 +1,6 @@
 import asyncio
 from contextlib import contextmanager
+from contextvars import ContextVar
 from markupsafe import escape
 import time
 import json
@ -9,20 +10,25 @@ tracers = {}

 TRACE_RESERVED_KEYS = {"type", "start", "end", "duration_ms", "traceback"}

-
-# asyncio.current_task was introduced in Python 3.7:
-for obj in (asyncio, asyncio.Task):
-    current_task = getattr(obj, "current_task", None)
-    if current_task is not None:
-        break
+trace_task_id = ContextVar("trace_task_id", default=None)


 def get_task_id():
+    current = trace_task_id.get(None)
+    if current is not None:
+        return current
    try:
        loop = asyncio.get_event_loop()
    except RuntimeError:
        return None
-    return id(current_task(loop=loop))
+    return id(asyncio.current_task(loop=loop))
+
+
+@contextmanager
+def trace_child_tasks():
+    token = trace_task_id.set(get_task_id())
+    yield
+    trace_task_id.reset(token)


@contextmanager
--- a/docs/internals.rst
+++ b/docs/internals.rst
@ -864,3 +864,74 @@ parse_metadata(content)
 This function accepts a string containing either JSON or YAML, expected to be of the format described in :ref:`metadata`. It returns a nested Python dictionary representing the parsed data from that string.

 If the metadata cannot be parsed as either JSON or YAML the function will raise a ``utils.BadMetadataError`` exception.
+
+.. _internals_tracer
+
+datasette.tracer
+================
+
+Running Datasette with ``--setting trace_debug 1`` enables trace debug output, which can then be viewed by adding ``?_trace=1`` to the query string for any page.
+
+You can see an example of this at the bottom of `latest.datasette.io/fixtures/facetable?_trace=1 <https://latest.datasette.io/fixtures/facetable?_trace=1>`__. The JSON output shows full details of every SQL query that was executed to generate the page.
+
+The `datasette-pretty-traces <https://datasette.io/plugins/datasette-pretty-traces>`__ plugin can be installed to provide a more readable display of this information. You can see `a demo of that here <https://latest-with-plugins.datasette.io/github/commits?_trace=1>`__.
+
+You can add your own custom traces to the JSON output using the ``trace()`` context manager. This takes a string that identifies the type of trace being recorded, and records any keyword arguments as additional JSON keys on the resulting trace object.
+
+The start and end time, duration and a traceback of where the trace was executed will be automatically attached to the JSON object.
+
+This example uses trace to record the start, end and duration of any HTTP GET requests made using the function:
+
+.. code-block:: python
+
+    from datasette.tracer import trace
+    import httpx
+
+    async def fetch_url(url):
+        with trace("fetch-url", url=url):
+            async with httpx.AsyncClient() as client:
+                return await client.get(url)
+
+.. _internals_tracer_trace_child_tasks
+
+Tracing child tasks
+-------------------
+
+If your code uses a mechanism such as ``asyncio.gather()`` to execute code in additional tasks you may find that some of the traces are missing from the display.
+
+You can use the ``trace_child_tasks()`` context manager to ensure these child tasks are correctly handled.
+
+.. code-block:: python
+
+    from datasette import tracer
+
+    with tracer.trace_child_tasks():
+        results = await asyncio.gather(
+            # ... async tasks here
+        )
+
+This example uses the :ref:`register_routes() <plugin_register_routes>` plugin hook to add a page at ``/parallel-queries`` which executes two SQL queries in parallel using ``asyncio.gather()`` and returns their results.
+
+.. code-block:: python
+
+    from datasette import hookimpl
+    from datasette import tracer
+
+    @hookimpl
+    def register_routes():
+
+        async def parallel_queries(datasette):
+            db = datasette.get_database()
+            with tracer.trace_child_tasks():
+                one, two = await asyncio.gather(
+                    db.execute("select 1"),
+                    db.execute("select 2"),
+                )
+            return Response.json({"one": one.single_value(), "two": two.single_value()})
+
+        return [
+            (r"/parallel-queries$", parallel_queries),
+        ]
+
+
+Adding ``?_trace=1`` will show that the trace covers both of those child tasks.
--- a/tests/plugins/my_plugin.py
+++ b/tests/plugins/my_plugin.py
@ -1,5 +1,7 @@
+import asyncio
 from datasette import hookimpl
 from datasette.facets import Facet
+from datasette import tracer
 from datasette.utils import path_with_added_args
 from datasette.utils.asgi import asgi_send_json, Response
 import base64
@ -270,6 +272,15 @@ def register_routes():
    def asgi_scope(scope):
        return Response.json(scope, default=repr)

+    async def parallel_queries(datasette):
+        db = datasette.get_database()
+        with tracer.trace_child_tasks():
+            one, two = await asyncio.gather(
+                db.execute("select coalesce(sleep(0.1), 1)"),
+                db.execute("select coalesce(sleep(0.1), 2)"),
+            )
+        return Response.json({"one": one.single_value(), "two": two.single_value()})
+
    return [
        (r"/one/$", one),
        (r"/two/(?P<name>.*)$", two),
@ -281,6 +292,7 @@ def register_routes():
        (r"/add-message/$", add_message),
        (r"/render-message/$", render_message),
        (r"/asgi-scope$", asgi_scope),
+        (r"/parallel-queries$", parallel_queries),
    ]


--- a/tests/test_tracer.py
+++ b/tests/test_tracer.py
@ -51,3 +51,18 @@ def test_trace(trace_debug):
    execute_manys = [trace for trace in traces if trace.get("executemany")]
    assert execute_manys
    assert all(isinstance(trace["count"], int) for trace in execute_manys)
+
+
+def test_trace_parallel_queries():
+    with make_app_client(settings={"trace_debug": True}) as client:
+        response = client.get("/parallel-queries?_trace=1")
+        assert response.status == 200
+
+    data = response.json
+    assert data["one"] == 1
+    assert data["two"] == 2
+    trace_info = data["_trace"]
+    traces = [trace for trace in trace_info["traces"] if "sql" in trace]
+    one, two = traces
+    # "two" should have started before "one" ended
+    assert two["start"] < one["end"]