Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor
This commit is contained in:
@@ -42,9 +42,10 @@ class TestToolProgressCallback:
|
||||
def test_emits_tool_call_start(self, mock_conn, event_loop_fixture):
|
||||
"""Tool progress should emit a ToolCallStart update."""
|
||||
tool_call_ids = {}
|
||||
tool_call_meta = {}
|
||||
loop = event_loop_fixture
|
||||
|
||||
cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids)
|
||||
cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
|
||||
|
||||
# Run callback in the event loop context
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
@@ -66,9 +67,10 @@ class TestToolProgressCallback:
|
||||
def test_handles_string_args(self, mock_conn, event_loop_fixture):
|
||||
"""If args is a JSON string, it should be parsed."""
|
||||
tool_call_ids = {}
|
||||
tool_call_meta = {}
|
||||
loop = event_loop_fixture
|
||||
|
||||
cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids)
|
||||
cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
future = MagicMock(spec=Future)
|
||||
@@ -82,9 +84,10 @@ class TestToolProgressCallback:
|
||||
def test_handles_non_dict_args(self, mock_conn, event_loop_fixture):
|
||||
"""If args is not a dict, it should be wrapped."""
|
||||
tool_call_ids = {}
|
||||
tool_call_meta = {}
|
||||
loop = event_loop_fixture
|
||||
|
||||
cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids)
|
||||
cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
future = MagicMock(spec=Future)
|
||||
@@ -98,10 +101,11 @@ class TestToolProgressCallback:
|
||||
def test_duplicate_same_name_tool_calls_use_fifo_ids(self, mock_conn, event_loop_fixture):
|
||||
"""Multiple same-name tool calls should be tracked independently in order."""
|
||||
tool_call_ids = {}
|
||||
tool_call_meta = {}
|
||||
loop = event_loop_fixture
|
||||
|
||||
progress_cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids)
|
||||
step_cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids)
|
||||
progress_cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
|
||||
step_cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
future = MagicMock(spec=Future)
|
||||
@@ -163,7 +167,7 @@ class TestStepCallback:
|
||||
tool_call_ids = {"terminal": "tc-abc123"}
|
||||
loop = event_loop_fixture
|
||||
|
||||
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids)
|
||||
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, {})
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
future = MagicMock(spec=Future)
|
||||
@@ -181,7 +185,7 @@ class TestStepCallback:
|
||||
tool_call_ids = {}
|
||||
loop = event_loop_fixture
|
||||
|
||||
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids)
|
||||
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, {})
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
cb(1, [{"name": "unknown_tool", "result": "ok"}])
|
||||
@@ -193,7 +197,7 @@ class TestStepCallback:
|
||||
tool_call_ids = {"read_file": "tc-def456"}
|
||||
loop = event_loop_fixture
|
||||
|
||||
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids)
|
||||
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, {})
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
future = MagicMock(spec=Future)
|
||||
@@ -212,7 +216,7 @@ class TestStepCallback:
|
||||
tool_call_ids = {"terminal": deque(["tc-xyz789"])}
|
||||
loop = event_loop_fixture
|
||||
|
||||
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids)
|
||||
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, {})
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts, \
|
||||
patch("acp_adapter.events.build_tool_complete") as mock_btc:
|
||||
@@ -224,7 +228,7 @@ class TestStepCallback:
|
||||
cb(1, [{"name": "terminal", "result": '{"output": "hello"}'}])
|
||||
|
||||
mock_btc.assert_called_once_with(
|
||||
"tc-xyz789", "terminal", result='{"output": "hello"}'
|
||||
"tc-xyz789", "terminal", result='{"output": "hello"}', function_args=None, snapshot=None
|
||||
)
|
||||
|
||||
def test_none_result_passed_through(self, mock_conn, event_loop_fixture):
|
||||
@@ -234,7 +238,7 @@ class TestStepCallback:
|
||||
tool_call_ids = {"web_search": deque(["tc-aaa"])}
|
||||
loop = event_loop_fixture
|
||||
|
||||
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids)
|
||||
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, {})
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts, \
|
||||
patch("acp_adapter.events.build_tool_complete") as mock_btc:
|
||||
@@ -244,7 +248,50 @@ class TestStepCallback:
|
||||
|
||||
cb(1, [{"name": "web_search", "result": None}])
|
||||
|
||||
mock_btc.assert_called_once_with("tc-aaa", "web_search", result=None)
|
||||
mock_btc.assert_called_once_with("tc-aaa", "web_search", result=None, function_args=None, snapshot=None)
|
||||
|
||||
def test_step_callback_passes_arguments_and_snapshot(self, mock_conn, event_loop_fixture):
|
||||
from collections import deque
|
||||
|
||||
tool_call_ids = {"write_file": deque(["tc-write"])}
|
||||
tool_call_meta = {"tc-write": {"args": {"path": "fallback.txt"}, "snapshot": "snap"}}
|
||||
loop = event_loop_fixture
|
||||
|
||||
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts, \
|
||||
patch("acp_adapter.events.build_tool_complete") as mock_btc:
|
||||
future = MagicMock(spec=Future)
|
||||
future.result.return_value = None
|
||||
mock_rcts.return_value = future
|
||||
|
||||
cb(1, [{"name": "write_file", "result": '{"bytes_written": 23}', "arguments": {"path": "diff-test.txt"}}])
|
||||
|
||||
mock_btc.assert_called_once_with(
|
||||
"tc-write",
|
||||
"write_file",
|
||||
result='{"bytes_written": 23}',
|
||||
function_args={"path": "diff-test.txt"},
|
||||
snapshot="snap",
|
||||
)
|
||||
|
||||
def test_tool_progress_captures_snapshot_metadata(self, mock_conn, event_loop_fixture):
|
||||
tool_call_ids = {}
|
||||
tool_call_meta = {}
|
||||
loop = event_loop_fixture
|
||||
|
||||
with patch("acp_adapter.events.make_tool_call_id", return_value="tc-meta"), \
|
||||
patch("acp_adapter.events._send_update") as mock_send, \
|
||||
patch("agent.display.capture_local_edit_snapshot", return_value="snapshot"):
|
||||
cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
|
||||
cb("tool.started", "write_file", None, {"path": "diff-test.txt", "content": "hello"})
|
||||
|
||||
assert list(tool_call_ids["write_file"]) == ["tc-meta"]
|
||||
assert tool_call_meta["tc-meta"] == {
|
||||
"args": {"path": "diff-test.txt", "content": "hello"},
|
||||
"snapshot": "snapshot",
|
||||
}
|
||||
mock_send.assert_called_once()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -29,6 +29,7 @@ from acp.schema import (
|
||||
|
||||
from acp_adapter.server import HermesACPAgent
|
||||
from acp_adapter.session import SessionManager
|
||||
from acp_adapter.tools import build_tool_start
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -181,6 +182,25 @@ class TestMcpRegistrationE2E:
|
||||
assert complete_event.raw_output is not None
|
||||
assert "hello" in str(complete_event.raw_output)
|
||||
|
||||
def test_patch_mode_tool_start_emits_diff_blocks_for_v4a_patch(self):
|
||||
update = build_tool_start(
|
||||
"tc-1",
|
||||
"patch",
|
||||
{
|
||||
"mode": "patch",
|
||||
"patch": "*** Begin Patch\n*** Update File: src/app.py\n@@\n-old line\n+new line\n*** Add File: src/new.py\n+hello\n*** End Patch",
|
||||
},
|
||||
)
|
||||
|
||||
assert len(update.content) == 2
|
||||
assert update.content[0].type == "diff"
|
||||
assert update.content[0].path == "src/app.py"
|
||||
assert update.content[0].old_text == "old line"
|
||||
assert update.content[0].new_text == "new line"
|
||||
assert update.content[1].type == "diff"
|
||||
assert update.content[1].path == "src/new.py"
|
||||
assert update.content[1].new_text == "hello"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prompt_tool_results_paired_by_call_id(self, acp_agent, mock_manager):
|
||||
"""The ToolCallUpdate's toolCallId must match the ToolCallStart's."""
|
||||
|
||||
@@ -20,7 +20,9 @@ from acp.schema import (
|
||||
NewSessionResponse,
|
||||
PromptResponse,
|
||||
ResumeSessionResponse,
|
||||
SessionModelState,
|
||||
SetSessionConfigOptionResponse,
|
||||
SetSessionModelResponse,
|
||||
SetSessionModeResponse,
|
||||
SessionInfo,
|
||||
TextContentBlock,
|
||||
@@ -127,6 +129,25 @@ class TestSessionOps:
|
||||
assert state is not None
|
||||
assert state.cwd == "/home/user/project"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_new_session_returns_model_state(self):
|
||||
manager = SessionManager(
|
||||
agent_factory=lambda: SimpleNamespace(model="gpt-5.4", provider="openai-codex")
|
||||
)
|
||||
acp_agent = HermesACPAgent(session_manager=manager)
|
||||
|
||||
with patch(
|
||||
"hermes_cli.models.curated_models_for_provider",
|
||||
return_value=[("gpt-5.4", "recommended"), ("gpt-5.4-mini", "")],
|
||||
):
|
||||
resp = await acp_agent.new_session(cwd="/tmp")
|
||||
|
||||
assert isinstance(resp.models, SessionModelState)
|
||||
assert resp.models.current_model_id == "openai-codex:gpt-5.4"
|
||||
assert resp.models.available_models[0].model_id == "openai-codex:gpt-5.4"
|
||||
assert resp.models.available_models[0].description is not None
|
||||
assert "Provider:" in resp.models.available_models[0].description
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_available_commands_include_help(self, agent):
|
||||
help_cmd = next(
|
||||
@@ -204,6 +225,33 @@ class TestListAndFork:
|
||||
assert fork_resp.session_id
|
||||
assert fork_resp.session_id != new_resp.session_id
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_sessions_includes_title_and_updated_at(self, agent):
|
||||
with patch.object(
|
||||
agent.session_manager,
|
||||
"list_sessions",
|
||||
return_value=[
|
||||
{
|
||||
"session_id": "session-1",
|
||||
"cwd": "/tmp/project",
|
||||
"title": "Fix Zed session history",
|
||||
"updated_at": 123.0,
|
||||
}
|
||||
],
|
||||
):
|
||||
resp = await agent.list_sessions(cwd="/tmp/project")
|
||||
|
||||
assert isinstance(resp.sessions[0], SessionInfo)
|
||||
assert resp.sessions[0].title == "Fix Zed session history"
|
||||
assert resp.sessions[0].updated_at == "123.0"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_sessions_passes_cwd_filter(self, agent):
|
||||
with patch.object(agent.session_manager, "list_sessions", return_value=[]) as mock_list:
|
||||
await agent.list_sessions(cwd="/mnt/e/Projects/AI/browser-link-3")
|
||||
|
||||
mock_list.assert_called_once_with(cwd="/mnt/e/Projects/AI/browser-link-3")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# session configuration / model routing
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -257,6 +305,53 @@ class TestSessionConfiguration:
|
||||
assert result == {}
|
||||
assert state.model == "gpt-5.4"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_set_session_model_accepts_provider_prefixed_choice(self, tmp_path, monkeypatch):
|
||||
runtime_calls = []
|
||||
|
||||
def fake_resolve_runtime_provider(requested=None, **kwargs):
|
||||
runtime_calls.append(requested)
|
||||
provider = requested or "openrouter"
|
||||
return {
|
||||
"provider": provider,
|
||||
"api_mode": "anthropic_messages" if provider == "anthropic" else "chat_completions",
|
||||
"base_url": f"https://{provider}.example/v1",
|
||||
"api_key": f"{provider}-key",
|
||||
"command": None,
|
||||
"args": [],
|
||||
}
|
||||
|
||||
def fake_agent(**kwargs):
|
||||
return SimpleNamespace(
|
||||
model=kwargs.get("model"),
|
||||
provider=kwargs.get("provider"),
|
||||
base_url=kwargs.get("base_url"),
|
||||
api_mode=kwargs.get("api_mode"),
|
||||
)
|
||||
|
||||
monkeypatch.setattr("hermes_cli.config.load_config", lambda: {
|
||||
"model": {"provider": "openrouter", "default": "openrouter/gpt-5"}
|
||||
})
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
fake_resolve_runtime_provider,
|
||||
)
|
||||
manager = SessionManager(db=SessionDB(tmp_path / "state.db"))
|
||||
|
||||
with patch("run_agent.AIAgent", side_effect=fake_agent):
|
||||
acp_agent = HermesACPAgent(session_manager=manager)
|
||||
state = manager.create_session(cwd="/tmp")
|
||||
result = await acp_agent.set_session_model(
|
||||
model_id="anthropic:claude-sonnet-4-6",
|
||||
session_id=state.session_id,
|
||||
)
|
||||
|
||||
assert isinstance(result, SetSessionModelResponse)
|
||||
assert state.model == "claude-sonnet-4-6"
|
||||
assert state.agent.provider == "anthropic"
|
||||
assert state.agent.base_url == "https://anthropic.example/v1"
|
||||
assert runtime_calls[-1] == "anthropic"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# prompt
|
||||
@@ -354,6 +449,31 @@ class TestPrompt:
|
||||
update = last_call[1].get("update") or last_call[0][1]
|
||||
assert update.session_update == "agent_message_chunk"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prompt_auto_titles_session(self, agent):
|
||||
new_resp = await agent.new_session(cwd=".")
|
||||
state = agent.session_manager.get_session(new_resp.session_id)
|
||||
state.agent.run_conversation = MagicMock(return_value={
|
||||
"final_response": "Here is the fix.",
|
||||
"messages": [
|
||||
{"role": "user", "content": "fix the broken ACP history"},
|
||||
{"role": "assistant", "content": "Here is the fix."},
|
||||
],
|
||||
})
|
||||
|
||||
mock_conn = MagicMock(spec=acp.Client)
|
||||
mock_conn.session_update = AsyncMock()
|
||||
agent._conn = mock_conn
|
||||
|
||||
with patch("agent.title_generator.maybe_auto_title") as mock_title:
|
||||
prompt = [TextContentBlock(type="text", text="fix the broken ACP history")]
|
||||
await agent.prompt(prompt=prompt, session_id=new_resp.session_id)
|
||||
|
||||
mock_title.assert_called_once()
|
||||
assert mock_title.call_args.args[1] == new_resp.session_id
|
||||
assert mock_title.call_args.args[2] == "fix the broken ACP history"
|
||||
assert mock_title.call_args.args[3] == "Here is the fix."
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prompt_populates_usage_from_top_level_run_conversation_fields(self, agent):
|
||||
"""ACP should map top-level token fields into PromptResponse.usage."""
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import contextlib
|
||||
import io
|
||||
import json
|
||||
import time
|
||||
from types import SimpleNamespace
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
@@ -100,15 +101,23 @@ class TestListAndCleanup:
|
||||
def test_list_sessions_returns_created(self, manager):
|
||||
s1 = manager.create_session(cwd="/a")
|
||||
s2 = manager.create_session(cwd="/b")
|
||||
s1.history.append({"role": "user", "content": "hello from a"})
|
||||
s2.history.append({"role": "user", "content": "hello from b"})
|
||||
listing = manager.list_sessions()
|
||||
ids = {s["session_id"] for s in listing}
|
||||
assert s1.session_id in ids
|
||||
assert s2.session_id in ids
|
||||
assert len(listing) == 2
|
||||
|
||||
def test_list_sessions_hides_empty_threads(self, manager):
|
||||
manager.create_session(cwd="/empty")
|
||||
assert manager.list_sessions() == []
|
||||
|
||||
def test_cleanup_clears_all(self, manager):
|
||||
manager.create_session()
|
||||
manager.create_session()
|
||||
s1 = manager.create_session()
|
||||
s2 = manager.create_session()
|
||||
s1.history.append({"role": "user", "content": "one"})
|
||||
s2.history.append({"role": "user", "content": "two"})
|
||||
assert len(manager.list_sessions()) == 2
|
||||
manager.cleanup()
|
||||
assert manager.list_sessions() == []
|
||||
@@ -194,6 +203,8 @@ class TestPersistence:
|
||||
def test_list_sessions_includes_db_only(self, manager):
|
||||
"""Sessions only in DB (not in memory) appear in list_sessions."""
|
||||
state = manager.create_session(cwd="/db-only")
|
||||
state.history.append({"role": "user", "content": "database only thread"})
|
||||
manager.save_session(state.session_id)
|
||||
sid = state.session_id
|
||||
|
||||
# Drop from memory.
|
||||
@@ -204,6 +215,53 @@ class TestPersistence:
|
||||
ids = {s["session_id"] for s in listing}
|
||||
assert sid in ids
|
||||
|
||||
def test_list_sessions_filters_by_cwd(self, manager):
|
||||
keep = manager.create_session(cwd="/keep")
|
||||
drop = manager.create_session(cwd="/drop")
|
||||
keep.history.append({"role": "user", "content": "keep me"})
|
||||
drop.history.append({"role": "user", "content": "drop me"})
|
||||
|
||||
listing = manager.list_sessions(cwd="/keep")
|
||||
ids = {s["session_id"] for s in listing}
|
||||
assert keep.session_id in ids
|
||||
assert drop.session_id not in ids
|
||||
|
||||
def test_list_sessions_matches_windows_and_wsl_paths(self, manager):
|
||||
state = manager.create_session(cwd="/mnt/e/Projects/AI/browser-link-3")
|
||||
state.history.append({"role": "user", "content": "same project from WSL"})
|
||||
|
||||
listing = manager.list_sessions(cwd=r"E:\Projects\AI\browser-link-3")
|
||||
ids = {s["session_id"] for s in listing}
|
||||
assert state.session_id in ids
|
||||
|
||||
def test_list_sessions_prefers_title_then_preview(self, manager):
|
||||
state = manager.create_session(cwd="/named")
|
||||
state.history.append({"role": "user", "content": "Investigate broken ACP history in Zed"})
|
||||
manager.save_session(state.session_id)
|
||||
db = manager._get_db()
|
||||
db.set_session_title(state.session_id, "Fix Zed ACP history")
|
||||
|
||||
listing = manager.list_sessions(cwd="/named")
|
||||
assert listing[0]["title"] == "Fix Zed ACP history"
|
||||
|
||||
db.set_session_title(state.session_id, "")
|
||||
listing = manager.list_sessions(cwd="/named")
|
||||
assert listing[0]["title"].startswith("Investigate broken ACP history")
|
||||
|
||||
def test_list_sessions_sorted_by_most_recent_activity(self, manager):
|
||||
older = manager.create_session(cwd="/ordered")
|
||||
older.history.append({"role": "user", "content": "older"})
|
||||
manager.save_session(older.session_id)
|
||||
time.sleep(0.02)
|
||||
newer = manager.create_session(cwd="/ordered")
|
||||
newer.history.append({"role": "user", "content": "newer"})
|
||||
manager.save_session(newer.session_id)
|
||||
|
||||
listing = manager.list_sessions(cwd="/ordered")
|
||||
assert [item["session_id"] for item in listing[:2]] == [newer.session_id, older.session_id]
|
||||
assert listing[0]["updated_at"]
|
||||
assert listing[1]["updated_at"]
|
||||
|
||||
def test_fork_restores_source_from_db(self, manager):
|
||||
"""Forking a session that is only in DB should work."""
|
||||
original = manager.create_session()
|
||||
|
||||
@@ -215,6 +215,46 @@ class TestBuildToolComplete:
|
||||
assert len(display_text) < 6000
|
||||
assert "truncated" in display_text
|
||||
|
||||
def test_build_tool_complete_for_patch_uses_diff_blocks(self):
|
||||
"""Completed patch calls should keep structured diff content for Zed."""
|
||||
patch_result = (
|
||||
'{"success": true, "diff": "--- a/README.md\\n+++ b/README.md\\n@@ -1 +1,2 @@\\n old line\\n+new line\\n", '
|
||||
'"files_modified": ["README.md"]}'
|
||||
)
|
||||
result = build_tool_complete("tc-p1", "patch", patch_result)
|
||||
assert isinstance(result, ToolCallProgress)
|
||||
assert len(result.content) == 1
|
||||
diff_item = result.content[0]
|
||||
assert isinstance(diff_item, FileEditToolCallContent)
|
||||
assert diff_item.path == "README.md"
|
||||
assert diff_item.old_text == "old line"
|
||||
assert diff_item.new_text == "old line\nnew line"
|
||||
|
||||
def test_build_tool_complete_for_patch_falls_back_to_text_when_no_diff(self):
|
||||
result = build_tool_complete("tc-p2", "patch", '{"success": true}')
|
||||
assert isinstance(result, ToolCallProgress)
|
||||
assert isinstance(result.content[0], ContentToolCallContent)
|
||||
|
||||
def test_build_tool_complete_for_write_file_uses_snapshot_diff(self, tmp_path):
|
||||
target = tmp_path / "diff-test.txt"
|
||||
snapshot = type("Snapshot", (), {"paths": [target], "before": {str(target): None}})()
|
||||
target.write_text("hello from hermes\n", encoding="utf-8")
|
||||
|
||||
result = build_tool_complete(
|
||||
"tc-wf1",
|
||||
"write_file",
|
||||
'{"bytes_written": 18, "dirs_created": false}',
|
||||
function_args={"path": str(target), "content": "hello from hermes\n"},
|
||||
snapshot=snapshot,
|
||||
)
|
||||
assert isinstance(result, ToolCallProgress)
|
||||
assert len(result.content) == 1
|
||||
diff_item = result.content[0]
|
||||
assert isinstance(diff_item, FileEditToolCallContent)
|
||||
assert diff_item.path.endswith("diff-test.txt")
|
||||
assert diff_item.old_text is None
|
||||
assert diff_item.new_text == "hello from hermes"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# extract_locations
|
||||
|
||||
@@ -2,7 +2,8 @@
|
||||
|
||||
Surrogates (U+D800..U+DFFF) are invalid in UTF-8 and crash json.dumps()
|
||||
inside the OpenAI SDK. They can appear via clipboard paste from rich-text
|
||||
editors like Google Docs.
|
||||
editors like Google Docs, OR from byte-level reasoning models (xiaomi/mimo,
|
||||
kimi, glm) emitting lone halves in reasoning output.
|
||||
"""
|
||||
import json
|
||||
import pytest
|
||||
@@ -11,6 +12,7 @@ from unittest.mock import MagicMock, patch
|
||||
from run_agent import (
|
||||
_sanitize_surrogates,
|
||||
_sanitize_messages_surrogates,
|
||||
_sanitize_structure_surrogates,
|
||||
_SURROGATE_RE,
|
||||
)
|
||||
|
||||
@@ -109,6 +111,186 @@ class TestSanitizeMessagesSurrogates:
|
||||
assert "\ufffd" in msgs[0]["content"]
|
||||
|
||||
|
||||
class TestReasoningFieldSurrogates:
|
||||
"""Surrogates in reasoning fields (byte-level reasoning models).
|
||||
|
||||
xiaomi/mimo, kimi, glm and similar byte-level tokenizers can emit lone
|
||||
surrogates in reasoning output. These fields are carried through to the
|
||||
API as `reasoning_content` on assistant messages, and must be sanitized
|
||||
or json.dumps() crashes with 'utf-8' codec can't encode surrogates.
|
||||
"""
|
||||
|
||||
def test_reasoning_field_sanitized(self):
|
||||
msgs = [
|
||||
{"role": "assistant", "content": "ok", "reasoning": "thought \udce2 here"},
|
||||
]
|
||||
assert _sanitize_messages_surrogates(msgs) is True
|
||||
assert "\udce2" not in msgs[0]["reasoning"]
|
||||
assert "\ufffd" in msgs[0]["reasoning"]
|
||||
|
||||
def test_reasoning_content_field_sanitized(self):
|
||||
"""api_messages carry `reasoning_content` built from `reasoning`."""
|
||||
msgs = [
|
||||
{"role": "assistant", "content": "ok", "reasoning_content": "thought \udce2 here"},
|
||||
]
|
||||
assert _sanitize_messages_surrogates(msgs) is True
|
||||
assert "\udce2" not in msgs[0]["reasoning_content"]
|
||||
assert "\ufffd" in msgs[0]["reasoning_content"]
|
||||
|
||||
def test_reasoning_details_nested_sanitized(self):
|
||||
"""reasoning_details is a list of dicts with nested string fields."""
|
||||
msgs = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "ok",
|
||||
"reasoning_details": [
|
||||
{"type": "reasoning.summary", "summary": "summary \udce2 text"},
|
||||
{"type": "reasoning.text", "text": "chain \udc00 of thought"},
|
||||
],
|
||||
},
|
||||
]
|
||||
assert _sanitize_messages_surrogates(msgs) is True
|
||||
assert "\udce2" not in msgs[0]["reasoning_details"][0]["summary"]
|
||||
assert "\ufffd" in msgs[0]["reasoning_details"][0]["summary"]
|
||||
assert "\udc00" not in msgs[0]["reasoning_details"][1]["text"]
|
||||
assert "\ufffd" in msgs[0]["reasoning_details"][1]["text"]
|
||||
|
||||
def test_deeply_nested_reasoning_sanitized(self):
|
||||
"""Nested dicts / lists inside extra fields are recursed into."""
|
||||
msgs = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "ok",
|
||||
"reasoning_details": [
|
||||
{
|
||||
"type": "reasoning.encrypted",
|
||||
"content": {
|
||||
"encrypted_content": "opaque",
|
||||
"text_parts": ["part1", "part2 \udce2 part"],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
assert _sanitize_messages_surrogates(msgs) is True
|
||||
assert (
|
||||
msgs[0]["reasoning_details"][0]["content"]["text_parts"][1]
|
||||
== "part2 \ufffd part"
|
||||
)
|
||||
|
||||
def test_reasoning_end_to_end_json_serialization(self):
|
||||
"""After sanitization, the full message dict must serialize clean."""
|
||||
msgs = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "answer",
|
||||
"reasoning_content": "reasoning with \udce2 surrogate",
|
||||
"reasoning_details": [
|
||||
{"summary": "nested \udcb0 surrogate"},
|
||||
],
|
||||
},
|
||||
]
|
||||
_sanitize_messages_surrogates(msgs)
|
||||
# Must round-trip through json + utf-8 encoding without error
|
||||
payload = json.dumps(msgs, ensure_ascii=False).encode("utf-8")
|
||||
assert b"\\" not in payload[:0] # sanity — just ensure we got bytes
|
||||
assert len(payload) > 0
|
||||
|
||||
def test_no_surrogates_returns_false(self):
|
||||
"""Clean reasoning fields don't trigger a modification."""
|
||||
msgs = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "ok",
|
||||
"reasoning": "clean thought",
|
||||
"reasoning_content": "also clean",
|
||||
"reasoning_details": [{"summary": "clean summary"}],
|
||||
},
|
||||
]
|
||||
assert _sanitize_messages_surrogates(msgs) is False
|
||||
|
||||
|
||||
class TestSanitizeStructureSurrogates:
|
||||
"""Test the _sanitize_structure_surrogates() helper for nested payloads."""
|
||||
|
||||
def test_empty_payload(self):
|
||||
assert _sanitize_structure_surrogates({}) is False
|
||||
assert _sanitize_structure_surrogates([]) is False
|
||||
|
||||
def test_flat_dict(self):
|
||||
payload = {"a": "clean", "b": "dirty \udce2 text"}
|
||||
assert _sanitize_structure_surrogates(payload) is True
|
||||
assert payload["a"] == "clean"
|
||||
assert "\ufffd" in payload["b"]
|
||||
|
||||
def test_flat_list(self):
|
||||
payload = ["clean", "dirty \udce2"]
|
||||
assert _sanitize_structure_surrogates(payload) is True
|
||||
assert payload[0] == "clean"
|
||||
assert "\ufffd" in payload[1]
|
||||
|
||||
def test_nested_dict_in_list(self):
|
||||
payload = [{"x": "dirty \udce2"}, {"x": "clean"}]
|
||||
assert _sanitize_structure_surrogates(payload) is True
|
||||
assert "\ufffd" in payload[0]["x"]
|
||||
assert payload[1]["x"] == "clean"
|
||||
|
||||
def test_deeply_nested(self):
|
||||
payload = {
|
||||
"level1": {
|
||||
"level2": [
|
||||
{"level3": "deep \udce2 surrogate"},
|
||||
],
|
||||
},
|
||||
}
|
||||
assert _sanitize_structure_surrogates(payload) is True
|
||||
assert "\ufffd" in payload["level1"]["level2"][0]["level3"]
|
||||
|
||||
def test_clean_payload_returns_false(self):
|
||||
payload = {"a": "clean", "b": [{"c": "also clean"}]}
|
||||
assert _sanitize_structure_surrogates(payload) is False
|
||||
|
||||
def test_non_string_values_ignored(self):
|
||||
payload = {"int": 42, "list": [1, 2, 3], "dict": {"none": None}, "bool": True}
|
||||
assert _sanitize_structure_surrogates(payload) is False
|
||||
# Non-string values survive unchanged
|
||||
assert payload["int"] == 42
|
||||
assert payload["list"] == [1, 2, 3]
|
||||
|
||||
|
||||
class TestApiMessagesSurrogateRecovery:
|
||||
"""Integration: verify the recovery block sanitizes api_messages.
|
||||
|
||||
The bug this guards against: a surrogate in `reasoning_content` on
|
||||
api_messages (transformed from `reasoning` during build) crashes the
|
||||
OpenAI SDK's json.dumps(), and the recovery block previously only
|
||||
sanitized the canonical `messages` list — not `api_messages` — so the
|
||||
next retry would send the same broken payload and fail 3 times.
|
||||
"""
|
||||
|
||||
def test_api_messages_reasoning_content_sanitized(self):
|
||||
"""The extended sanitizer catches reasoning_content in api_messages."""
|
||||
api_messages = [
|
||||
{"role": "system", "content": "sys"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "response",
|
||||
"reasoning_content": "thought \udce2 trail",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_1",
|
||||
"function": {"name": "tool", "arguments": "{}"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "content": "result", "tool_call_id": "call_1"},
|
||||
]
|
||||
assert _sanitize_messages_surrogates(api_messages) is True
|
||||
assert "\udce2" not in api_messages[1]["reasoning_content"]
|
||||
# Full payload must now serialize clean
|
||||
json.dumps(api_messages, ensure_ascii=False).encode("utf-8")
|
||||
|
||||
|
||||
class TestRunConversationSurrogateSanitization:
|
||||
"""Integration: verify run_conversation sanitizes user_message."""
|
||||
|
||||
|
||||
@@ -34,3 +34,21 @@ def test_messaging_extra_includes_qrcode_for_weixin_setup():
|
||||
|
||||
messaging_extra = optional_dependencies["messaging"]
|
||||
assert any(dep.startswith("qrcode") for dep in messaging_extra)
|
||||
|
||||
|
||||
def test_dingtalk_extra_includes_qrcode_for_qr_auth():
|
||||
"""DingTalk's QR-code device-flow auth (hermes_cli/dingtalk_auth.py)
|
||||
needs the qrcode package."""
|
||||
optional_dependencies = _load_optional_dependencies()
|
||||
|
||||
dingtalk_extra = optional_dependencies["dingtalk"]
|
||||
assert any(dep.startswith("qrcode") for dep in dingtalk_extra)
|
||||
|
||||
|
||||
def test_feishu_extra_includes_qrcode_for_qr_login():
|
||||
"""Feishu's QR login flow (gateway/platforms/feishu.py) needs the
|
||||
qrcode package."""
|
||||
optional_dependencies = _load_optional_dependencies()
|
||||
|
||||
feishu_extra = optional_dependencies["feishu"]
|
||||
assert any(dep.startswith("qrcode") for dep in feishu_extra)
|
||||
|
||||
Reference in New Issue
Block a user