From 01535a4732a1d9c95ebd3a2473cce690c02ebac2 Mon Sep 17 00:00:00 2001 From: Teknium Date: Sat, 25 Apr 2026 18:38:56 -0700 Subject: [PATCH] fix(api_server): cap stop-run wait at 5s so interrupt can't hang handler task.cancel() can't preempt the run_in_executor thread running run_conversation(), so we rely on agent.interrupt() to wake the loop. Without a timeout, a slow/unresponsive interrupt blocks the HTTP response indefinitely. Wrap the await in wait_for(shield(task), 5.0) and log a warning on timeout. Also tidy one extra space in the module docstring's /stop entry. --- gateway/platforms/api_server.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index 659197f69..b7a6a0969 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -9,7 +9,7 @@ Exposes an HTTP server with endpoints: - GET /v1/models — lists hermes-agent as an available model - POST /v1/runs — start a run, returns run_id immediately (202) - GET /v1/runs/{run_id}/events — SSE stream of structured lifecycle events -- POST /v1/runs/{run_id}/stop — interrupt a running agent +- POST /v1/runs/{run_id}/stop — interrupt a running agent - GET /health — health check - GET /health/detailed — rich status for cross-container dashboard probing @@ -2569,8 +2569,18 @@ class APIServerAdapter(BasePlatformAdapter): if task is not None and not task.done(): task.cancel() + # Bounded wait: run_conversation() executes in the default + # executor thread which task.cancel() cannot preempt — we rely on + # agent.interrupt() above to break the loop. Cap the wait so a + # slow/unresponsive interrupt can't hang this handler. try: - await task + await asyncio.wait_for(asyncio.shield(task), timeout=5.0) + except asyncio.TimeoutError: + logger.warning( + "[api_server] stop for run %s timed out after 5s; " + "agent may still be finishing the current step", + run_id, + ) except (asyncio.CancelledError, Exception): pass