test(guardrail): assert halt message reaches stream_delta_callback
Some checks failed
Lint (ruff + ty) / ruff + ty diff (push) Has been cancelled
Lint (ruff + ty) / ruff enforcement (blocking) (push) Has been cancelled
Lint (ruff + ty) / Windows footguns (blocking) (push) Has been cancelled
Nix / nix (macos-latest) (push) Has been cancelled
Nix / nix (ubuntu-latest) (push) Has been cancelled
Tests / test (1) (push) Has been cancelled
Tests / test (2) (push) Has been cancelled
Tests / test (3) (push) Has been cancelled
Tests / test (4) (push) Has been cancelled
Tests / test (5) (push) Has been cancelled
Tests / test (6) (push) Has been cancelled
Tests / save-durations (push) Has been cancelled
Tests / e2e (push) Has been cancelled
Build Skills Index / build-index (push) Has been cancelled
Build Skills Index / deploy-with-index (push) Has been cancelled
Docker Build and Publish / build-amd64 (push) Has been cancelled
Docker Build and Publish / build-arm64 (push) Has been cancelled
Docker Build and Publish / merge (push) Has been cancelled
Docker Build and Publish / move-latest (push) Has been cancelled
Some checks failed
Lint (ruff + ty) / ruff + ty diff (push) Has been cancelled
Lint (ruff + ty) / ruff enforcement (blocking) (push) Has been cancelled
Lint (ruff + ty) / Windows footguns (blocking) (push) Has been cancelled
Nix / nix (macos-latest) (push) Has been cancelled
Nix / nix (ubuntu-latest) (push) Has been cancelled
Tests / test (1) (push) Has been cancelled
Tests / test (2) (push) Has been cancelled
Tests / test (3) (push) Has been cancelled
Tests / test (4) (push) Has been cancelled
Tests / test (5) (push) Has been cancelled
Tests / test (6) (push) Has been cancelled
Tests / save-durations (push) Has been cancelled
Tests / e2e (push) Has been cancelled
Build Skills Index / build-index (push) Has been cancelled
Build Skills Index / deploy-with-index (push) Has been cancelled
Docker Build and Publish / build-amd64 (push) Has been cancelled
Docker Build and Publish / build-arm64 (push) Has been cancelled
Docker Build and Publish / merge (push) Has been cancelled
Docker Build and Publish / move-latest (push) Has been cancelled
Regression guard for #30770 — verifies the guardrail-halt branch in agent/conversation_loop.py pushes the synthesized halt message through stream_delta_callback before breaking out of the loop. Without the emit, chat-completions SSE writers drain an empty queue and clients (Open WebUI, etc.) see a finish chunk with zero content delta — indistinguishable from a crash. Verified: the test fails when the production fix is reverted.
This commit is contained in:
@@ -304,3 +304,52 @@ def test_config_enabled_hard_stop_run_conversation_returns_controlled_guardrail_
|
|||||||
call_ids = [tc["id"] for tc in assistant_msg["tool_calls"]]
|
call_ids = [tc["id"] for tc in assistant_msg["tool_calls"]]
|
||||||
following_results = [m for m in result["messages"] if m.get("role") == "tool" and m.get("tool_call_id") in call_ids]
|
following_results = [m for m in result["messages"] if m.get("role") == "tool" and m.get("tool_call_id") in call_ids]
|
||||||
assert len(following_results) == len(call_ids)
|
assert len(following_results) == len(call_ids)
|
||||||
|
|
||||||
|
|
||||||
|
def test_guardrail_halt_emits_final_response_through_stream_delta_callback():
|
||||||
|
"""Regression for #30770: when the guardrail halts the loop, the
|
||||||
|
synthesized halt message must be pushed through ``stream_delta_callback``
|
||||||
|
so SSE/TUI clients see why the agent stopped instead of a silent stream
|
||||||
|
close. Without this the chat-completions SSE writer drains an empty
|
||||||
|
queue and emits a finish chunk with zero content (indistinguishable
|
||||||
|
from a crash for Open WebUI and similar clients).
|
||||||
|
"""
|
||||||
|
agent = _make_agent("web_search", max_iterations=10, config=_hard_stop_config())
|
||||||
|
same_args = {"query": "same"}
|
||||||
|
responses = [
|
||||||
|
_mock_response(
|
||||||
|
content="",
|
||||||
|
finish_reason="tool_calls",
|
||||||
|
tool_calls=[_mock_tool_call("web_search", json.dumps(same_args), f"c{i}")],
|
||||||
|
)
|
||||||
|
for i in range(1, 10)
|
||||||
|
]
|
||||||
|
agent.client.chat.completions.create.side_effect = responses
|
||||||
|
|
||||||
|
deltas: list = []
|
||||||
|
agent.stream_delta_callback = lambda d: deltas.append(d)
|
||||||
|
# The mocked client returns SimpleNamespace responses which aren't
|
||||||
|
# iterable as streaming chunks; force the non-streaming code path so
|
||||||
|
# the guardrail-halt branch is reached without engaging the real
|
||||||
|
# streaming machinery.
|
||||||
|
agent._disable_streaming = True
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("run_agent.handle_function_call", return_value=json.dumps({"error": "boom"})),
|
||||||
|
patch.object(agent, "_persist_session"),
|
||||||
|
patch.object(agent, "_save_trajectory"),
|
||||||
|
patch.object(agent, "_cleanup_task_resources"),
|
||||||
|
):
|
||||||
|
result = agent.run_conversation("search repeatedly")
|
||||||
|
|
||||||
|
assert result["turn_exit_reason"] == "guardrail_halt"
|
||||||
|
halt_text = result["final_response"]
|
||||||
|
assert "stopped retrying" in halt_text
|
||||||
|
|
||||||
|
# The halt message must have been pushed through the callback at least
|
||||||
|
# once. Empty-queue SSE writers were the bug — clients saw no content
|
||||||
|
# delta before the finish chunk.
|
||||||
|
text_deltas = [d for d in deltas if isinstance(d, str)]
|
||||||
|
assert halt_text in text_deltas, (
|
||||||
|
f"halt message was never streamed; callback only saw {deltas!r}"
|
||||||
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user