fix(acp): also mark raised-exception tool results as failed
Extends #26573 to also catch the case the original PR deliberately left out: when a tool raises an exception, the agent's tool executor wraps it in a canonical 'Error executing tool '<name>': ...' string prefix (see agent/tool_executor.py around the try/except). That prefix is unique to the wrapper and cannot legitimately appear in well-behaved tool output, so it is a safe signal that the tool blew up. Without this, the canonical 'tool raised' case still rendered as a green 'completed' row in Zed despite being a runtime failure — exactly the class of bug #26573 set out to fix. Adds a positive test (raised-exception prefix -> failed) and a negative test (bare 'Error:' word in legit tool output stays completed) so a future contributor doesn't accidentally widen the rule to false-positive on compiler/linter diagnostics.
This commit is contained in:
@@ -209,6 +209,15 @@ def _tool_result_failed(result: Optional[str], tool_name: str | None = None) ->
|
|||||||
"error" because tests failed or a command printed diagnostics; Zed should
|
"error" because tests failed or a command printed diagnostics; Zed should
|
||||||
only receive ACP failed status for structured tool-level failures.
|
only receive ACP failed status for structured tool-level failures.
|
||||||
"""
|
"""
|
||||||
|
# Raised exceptions from the agent's tool executor get wrapped in a
|
||||||
|
# canonical "Error executing tool '<name>': ..." prefix (see
|
||||||
|
# agent/tool_executor.py around the try/except). That prefix is uniquely
|
||||||
|
# produced by the wrapper itself — it cannot legitimately appear in
|
||||||
|
# well-behaved tool output. Catch it so a tool that blew up shows as
|
||||||
|
# failed in Zed instead of misleadingly green.
|
||||||
|
if isinstance(result, str) and result.startswith("Error executing tool '"):
|
||||||
|
return True
|
||||||
|
|
||||||
data = _json_loads_maybe(result)
|
data = _json_loads_maybe(result)
|
||||||
if not isinstance(data, dict):
|
if not isinstance(data, dict):
|
||||||
return False
|
return False
|
||||||
|
|||||||
@@ -365,6 +365,31 @@ class TestBuildToolComplete:
|
|||||||
result = build_tool_complete("tc-ok", "terminal", "tests failed: 1 assertion error")
|
result = build_tool_complete("tc-ok", "terminal", "tests failed: 1 assertion error")
|
||||||
assert result.status == "completed"
|
assert result.status == "completed"
|
||||||
|
|
||||||
|
def test_build_tool_complete_marks_raised_exception_prefix_as_failed(self):
|
||||||
|
"""The agent's tool executor wraps raised exceptions in a canonical
|
||||||
|
"Error executing tool '<name>': ..." prefix. That prefix is unique to
|
||||||
|
the wrapper and means the tool blew up, so it must surface as failed
|
||||||
|
in Zed regardless of whether the body parses as JSON.
|
||||||
|
"""
|
||||||
|
result = build_tool_complete(
|
||||||
|
"tc-fail-exc",
|
||||||
|
"patch",
|
||||||
|
"Error executing tool 'patch': KeyError: 'foo'",
|
||||||
|
)
|
||||||
|
assert result.status == "failed"
|
||||||
|
|
||||||
|
def test_build_tool_complete_does_not_match_error_word_alone(self):
|
||||||
|
"""Bare 'Error: ...' messages (without the unique 'Error executing
|
||||||
|
tool '<name>':' prefix) must still be reported as completed — they
|
||||||
|
legitimately appear in compiler/linter/test output.
|
||||||
|
"""
|
||||||
|
result = build_tool_complete(
|
||||||
|
"tc-ok-error-word",
|
||||||
|
"terminal",
|
||||||
|
"Error: pytest collected 0 items",
|
||||||
|
)
|
||||||
|
assert result.status == "completed"
|
||||||
|
|
||||||
def test_build_tool_complete_marks_structured_polished_tool_error_as_failed(self):
|
def test_build_tool_complete_marks_structured_polished_tool_error_as_failed(self):
|
||||||
result = build_tool_complete("tc-fail", "read_file", '{"error": "File not found"}')
|
result = build_tool_complete("tc-fail", "read_file", '{"error": "File not found"}')
|
||||||
assert result.status == "failed"
|
assert result.status == "failed"
|
||||||
|
|||||||
Reference in New Issue
Block a user