kopia lustrzana https://github.com/simonw/datasette
tracer.trace_child_tasks() for asyncio.gather tracing
Also added documentation for datasette.tracer module. Closes #1576pull/1631/head
rodzic
ac239d34ab
commit
da53e0360d
|
@ -1,5 +1,6 @@
|
|||
import asyncio
|
||||
from contextlib import contextmanager
|
||||
from contextvars import ContextVar
|
||||
from markupsafe import escape
|
||||
import time
|
||||
import json
|
||||
|
@ -9,20 +10,25 @@ tracers = {}
|
|||
|
||||
TRACE_RESERVED_KEYS = {"type", "start", "end", "duration_ms", "traceback"}
|
||||
|
||||
|
||||
# asyncio.current_task was introduced in Python 3.7:
|
||||
for obj in (asyncio, asyncio.Task):
|
||||
current_task = getattr(obj, "current_task", None)
|
||||
if current_task is not None:
|
||||
break
|
||||
trace_task_id = ContextVar("trace_task_id", default=None)
|
||||
|
||||
|
||||
def get_task_id():
|
||||
current = trace_task_id.get(None)
|
||||
if current is not None:
|
||||
return current
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
except RuntimeError:
|
||||
return None
|
||||
return id(current_task(loop=loop))
|
||||
return id(asyncio.current_task(loop=loop))
|
||||
|
||||
|
||||
@contextmanager
|
||||
def trace_child_tasks():
|
||||
token = trace_task_id.set(get_task_id())
|
||||
yield
|
||||
trace_task_id.reset(token)
|
||||
|
||||
|
||||
@contextmanager
|
||||
|
|
|
@ -864,3 +864,74 @@ parse_metadata(content)
|
|||
This function accepts a string containing either JSON or YAML, expected to be of the format described in :ref:`metadata`. It returns a nested Python dictionary representing the parsed data from that string.
|
||||
|
||||
If the metadata cannot be parsed as either JSON or YAML the function will raise a ``utils.BadMetadataError`` exception.
|
||||
|
||||
.. _internals_tracer
|
||||
|
||||
datasette.tracer
|
||||
================
|
||||
|
||||
Running Datasette with ``--setting trace_debug 1`` enables trace debug output, which can then be viewed by adding ``?_trace=1`` to the query string for any page.
|
||||
|
||||
You can see an example of this at the bottom of `latest.datasette.io/fixtures/facetable?_trace=1 <https://latest.datasette.io/fixtures/facetable?_trace=1>`__. The JSON output shows full details of every SQL query that was executed to generate the page.
|
||||
|
||||
The `datasette-pretty-traces <https://datasette.io/plugins/datasette-pretty-traces>`__ plugin can be installed to provide a more readable display of this information. You can see `a demo of that here <https://latest-with-plugins.datasette.io/github/commits?_trace=1>`__.
|
||||
|
||||
You can add your own custom traces to the JSON output using the ``trace()`` context manager. This takes a string that identifies the type of trace being recorded, and records any keyword arguments as additional JSON keys on the resulting trace object.
|
||||
|
||||
The start and end time, duration and a traceback of where the trace was executed will be automatically attached to the JSON object.
|
||||
|
||||
This example uses trace to record the start, end and duration of any HTTP GET requests made using the function:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from datasette.tracer import trace
|
||||
import httpx
|
||||
|
||||
async def fetch_url(url):
|
||||
with trace("fetch-url", url=url):
|
||||
async with httpx.AsyncClient() as client:
|
||||
return await client.get(url)
|
||||
|
||||
.. _internals_tracer_trace_child_tasks
|
||||
|
||||
Tracing child tasks
|
||||
-------------------
|
||||
|
||||
If your code uses a mechanism such as ``asyncio.gather()`` to execute code in additional tasks you may find that some of the traces are missing from the display.
|
||||
|
||||
You can use the ``trace_child_tasks()`` context manager to ensure these child tasks are correctly handled.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from datasette import tracer
|
||||
|
||||
with tracer.trace_child_tasks():
|
||||
results = await asyncio.gather(
|
||||
# ... async tasks here
|
||||
)
|
||||
|
||||
This example uses the :ref:`register_routes() <plugin_register_routes>` plugin hook to add a page at ``/parallel-queries`` which executes two SQL queries in parallel using ``asyncio.gather()`` and returns their results.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from datasette import hookimpl
|
||||
from datasette import tracer
|
||||
|
||||
@hookimpl
|
||||
def register_routes():
|
||||
|
||||
async def parallel_queries(datasette):
|
||||
db = datasette.get_database()
|
||||
with tracer.trace_child_tasks():
|
||||
one, two = await asyncio.gather(
|
||||
db.execute("select 1"),
|
||||
db.execute("select 2"),
|
||||
)
|
||||
return Response.json({"one": one.single_value(), "two": two.single_value()})
|
||||
|
||||
return [
|
||||
(r"/parallel-queries$", parallel_queries),
|
||||
]
|
||||
|
||||
|
||||
Adding ``?_trace=1`` will show that the trace covers both of those child tasks.
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
import asyncio
|
||||
from datasette import hookimpl
|
||||
from datasette.facets import Facet
|
||||
from datasette import tracer
|
||||
from datasette.utils import path_with_added_args
|
||||
from datasette.utils.asgi import asgi_send_json, Response
|
||||
import base64
|
||||
|
@ -270,6 +272,15 @@ def register_routes():
|
|||
def asgi_scope(scope):
|
||||
return Response.json(scope, default=repr)
|
||||
|
||||
async def parallel_queries(datasette):
|
||||
db = datasette.get_database()
|
||||
with tracer.trace_child_tasks():
|
||||
one, two = await asyncio.gather(
|
||||
db.execute("select coalesce(sleep(0.1), 1)"),
|
||||
db.execute("select coalesce(sleep(0.1), 2)"),
|
||||
)
|
||||
return Response.json({"one": one.single_value(), "two": two.single_value()})
|
||||
|
||||
return [
|
||||
(r"/one/$", one),
|
||||
(r"/two/(?P<name>.*)$", two),
|
||||
|
@ -281,6 +292,7 @@ def register_routes():
|
|||
(r"/add-message/$", add_message),
|
||||
(r"/render-message/$", render_message),
|
||||
(r"/asgi-scope$", asgi_scope),
|
||||
(r"/parallel-queries$", parallel_queries),
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -51,3 +51,18 @@ def test_trace(trace_debug):
|
|||
execute_manys = [trace for trace in traces if trace.get("executemany")]
|
||||
assert execute_manys
|
||||
assert all(isinstance(trace["count"], int) for trace in execute_manys)
|
||||
|
||||
|
||||
def test_trace_parallel_queries():
|
||||
with make_app_client(settings={"trace_debug": True}) as client:
|
||||
response = client.get("/parallel-queries?_trace=1")
|
||||
assert response.status == 200
|
||||
|
||||
data = response.json
|
||||
assert data["one"] == 1
|
||||
assert data["two"] == 2
|
||||
trace_info = data["_trace"]
|
||||
traces = [trace for trace in trace_info["traces"] if "sql" in trace]
|
||||
one, two = traces
|
||||
# "two" should have started before "one" ended
|
||||
assert two["start"] < one["end"]
|
||||
|
|
Ładowanie…
Reference in New Issue