tracer.trace_child_tasks() for asyncio.gather tracing

Also added documentation for datasette.tracer module.

Closes #1576
pull/1631/head
Simon Willison 2022-02-04 21:19:49 -08:00
rodzic ac239d34ab
commit da53e0360d
4 zmienionych plików z 111 dodań i 7 usunięć

Wyświetl plik

@ -1,5 +1,6 @@
import asyncio
from contextlib import contextmanager
from contextvars import ContextVar
from markupsafe import escape
import time
import json
@ -9,20 +10,25 @@ tracers = {}
TRACE_RESERVED_KEYS = {"type", "start", "end", "duration_ms", "traceback"}
# asyncio.current_task was introduced in Python 3.7:
for obj in (asyncio, asyncio.Task):
current_task = getattr(obj, "current_task", None)
if current_task is not None:
break
trace_task_id = ContextVar("trace_task_id", default=None)
def get_task_id():
current = trace_task_id.get(None)
if current is not None:
return current
try:
loop = asyncio.get_event_loop()
except RuntimeError:
return None
return id(current_task(loop=loop))
return id(asyncio.current_task(loop=loop))
@contextmanager
def trace_child_tasks():
token = trace_task_id.set(get_task_id())
yield
trace_task_id.reset(token)
@contextmanager

Wyświetl plik

@ -864,3 +864,74 @@ parse_metadata(content)
This function accepts a string containing either JSON or YAML, expected to be of the format described in :ref:`metadata`. It returns a nested Python dictionary representing the parsed data from that string.
If the metadata cannot be parsed as either JSON or YAML the function will raise a ``utils.BadMetadataError`` exception.
.. _internals_tracer
datasette.tracer
================
Running Datasette with ``--setting trace_debug 1`` enables trace debug output, which can then be viewed by adding ``?_trace=1`` to the query string for any page.
You can see an example of this at the bottom of `latest.datasette.io/fixtures/facetable?_trace=1 <https://latest.datasette.io/fixtures/facetable?_trace=1>`__. The JSON output shows full details of every SQL query that was executed to generate the page.
The `datasette-pretty-traces <https://datasette.io/plugins/datasette-pretty-traces>`__ plugin can be installed to provide a more readable display of this information. You can see `a demo of that here <https://latest-with-plugins.datasette.io/github/commits?_trace=1>`__.
You can add your own custom traces to the JSON output using the ``trace()`` context manager. This takes a string that identifies the type of trace being recorded, and records any keyword arguments as additional JSON keys on the resulting trace object.
The start and end time, duration and a traceback of where the trace was executed will be automatically attached to the JSON object.
This example uses trace to record the start, end and duration of any HTTP GET requests made using the function:
.. code-block:: python
from datasette.tracer import trace
import httpx
async def fetch_url(url):
with trace("fetch-url", url=url):
async with httpx.AsyncClient() as client:
return await client.get(url)
.. _internals_tracer_trace_child_tasks
Tracing child tasks
-------------------
If your code uses a mechanism such as ``asyncio.gather()`` to execute code in additional tasks you may find that some of the traces are missing from the display.
You can use the ``trace_child_tasks()`` context manager to ensure these child tasks are correctly handled.
.. code-block:: python
from datasette import tracer
with tracer.trace_child_tasks():
results = await asyncio.gather(
# ... async tasks here
)
This example uses the :ref:`register_routes() <plugin_register_routes>` plugin hook to add a page at ``/parallel-queries`` which executes two SQL queries in parallel using ``asyncio.gather()`` and returns their results.
.. code-block:: python
from datasette import hookimpl
from datasette import tracer
@hookimpl
def register_routes():
async def parallel_queries(datasette):
db = datasette.get_database()
with tracer.trace_child_tasks():
one, two = await asyncio.gather(
db.execute("select 1"),
db.execute("select 2"),
)
return Response.json({"one": one.single_value(), "two": two.single_value()})
return [
(r"/parallel-queries$", parallel_queries),
]
Adding ``?_trace=1`` will show that the trace covers both of those child tasks.

Wyświetl plik

@ -1,5 +1,7 @@
import asyncio
from datasette import hookimpl
from datasette.facets import Facet
from datasette import tracer
from datasette.utils import path_with_added_args
from datasette.utils.asgi import asgi_send_json, Response
import base64
@ -270,6 +272,15 @@ def register_routes():
def asgi_scope(scope):
return Response.json(scope, default=repr)
async def parallel_queries(datasette):
db = datasette.get_database()
with tracer.trace_child_tasks():
one, two = await asyncio.gather(
db.execute("select coalesce(sleep(0.1), 1)"),
db.execute("select coalesce(sleep(0.1), 2)"),
)
return Response.json({"one": one.single_value(), "two": two.single_value()})
return [
(r"/one/$", one),
(r"/two/(?P<name>.*)$", two),
@ -281,6 +292,7 @@ def register_routes():
(r"/add-message/$", add_message),
(r"/render-message/$", render_message),
(r"/asgi-scope$", asgi_scope),
(r"/parallel-queries$", parallel_queries),
]

Wyświetl plik

@ -51,3 +51,18 @@ def test_trace(trace_debug):
execute_manys = [trace for trace in traces if trace.get("executemany")]
assert execute_manys
assert all(isinstance(trace["count"], int) for trace in execute_manys)
def test_trace_parallel_queries():
with make_app_client(settings={"trace_debug": True}) as client:
response = client.get("/parallel-queries?_trace=1")
assert response.status == 200
data = response.json
assert data["one"] == 1
assert data["two"] == 2
trace_info = data["_trace"]
traces = [trace for trace in trace_info["traces"] if "sql" in trace]
one, two = traces
# "two" should have started before "one" ended
assert two["start"] < one["end"]