Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor

This commit is contained in:
Brooklyn Nicholson
2026-04-17 15:44:57 -05:00
14 changed files with 1072 additions and 100 deletions

View File

@@ -42,9 +42,10 @@ class TestToolProgressCallback:
def test_emits_tool_call_start(self, mock_conn, event_loop_fixture):
"""Tool progress should emit a ToolCallStart update."""
tool_call_ids = {}
tool_call_meta = {}
loop = event_loop_fixture
cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids)
cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
# Run callback in the event loop context
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
@@ -66,9 +67,10 @@ class TestToolProgressCallback:
def test_handles_string_args(self, mock_conn, event_loop_fixture):
"""If args is a JSON string, it should be parsed."""
tool_call_ids = {}
tool_call_meta = {}
loop = event_loop_fixture
cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids)
cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
future = MagicMock(spec=Future)
@@ -82,9 +84,10 @@ class TestToolProgressCallback:
def test_handles_non_dict_args(self, mock_conn, event_loop_fixture):
"""If args is not a dict, it should be wrapped."""
tool_call_ids = {}
tool_call_meta = {}
loop = event_loop_fixture
cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids)
cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
future = MagicMock(spec=Future)
@@ -98,10 +101,11 @@ class TestToolProgressCallback:
def test_duplicate_same_name_tool_calls_use_fifo_ids(self, mock_conn, event_loop_fixture):
"""Multiple same-name tool calls should be tracked independently in order."""
tool_call_ids = {}
tool_call_meta = {}
loop = event_loop_fixture
progress_cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids)
step_cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids)
progress_cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
step_cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
future = MagicMock(spec=Future)
@@ -163,7 +167,7 @@ class TestStepCallback:
tool_call_ids = {"terminal": "tc-abc123"}
loop = event_loop_fixture
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids)
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, {})
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
future = MagicMock(spec=Future)
@@ -181,7 +185,7 @@ class TestStepCallback:
tool_call_ids = {}
loop = event_loop_fixture
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids)
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, {})
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
cb(1, [{"name": "unknown_tool", "result": "ok"}])
@@ -193,7 +197,7 @@ class TestStepCallback:
tool_call_ids = {"read_file": "tc-def456"}
loop = event_loop_fixture
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids)
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, {})
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
future = MagicMock(spec=Future)
@@ -212,7 +216,7 @@ class TestStepCallback:
tool_call_ids = {"terminal": deque(["tc-xyz789"])}
loop = event_loop_fixture
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids)
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, {})
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts, \
patch("acp_adapter.events.build_tool_complete") as mock_btc:
@@ -224,7 +228,7 @@ class TestStepCallback:
cb(1, [{"name": "terminal", "result": '{"output": "hello"}'}])
mock_btc.assert_called_once_with(
"tc-xyz789", "terminal", result='{"output": "hello"}'
"tc-xyz789", "terminal", result='{"output": "hello"}', function_args=None, snapshot=None
)
def test_none_result_passed_through(self, mock_conn, event_loop_fixture):
@@ -234,7 +238,7 @@ class TestStepCallback:
tool_call_ids = {"web_search": deque(["tc-aaa"])}
loop = event_loop_fixture
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids)
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, {})
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts, \
patch("acp_adapter.events.build_tool_complete") as mock_btc:
@@ -244,7 +248,50 @@ class TestStepCallback:
cb(1, [{"name": "web_search", "result": None}])
mock_btc.assert_called_once_with("tc-aaa", "web_search", result=None)
mock_btc.assert_called_once_with("tc-aaa", "web_search", result=None, function_args=None, snapshot=None)
def test_step_callback_passes_arguments_and_snapshot(self, mock_conn, event_loop_fixture):
from collections import deque
tool_call_ids = {"write_file": deque(["tc-write"])}
tool_call_meta = {"tc-write": {"args": {"path": "fallback.txt"}, "snapshot": "snap"}}
loop = event_loop_fixture
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts, \
patch("acp_adapter.events.build_tool_complete") as mock_btc:
future = MagicMock(spec=Future)
future.result.return_value = None
mock_rcts.return_value = future
cb(1, [{"name": "write_file", "result": '{"bytes_written": 23}', "arguments": {"path": "diff-test.txt"}}])
mock_btc.assert_called_once_with(
"tc-write",
"write_file",
result='{"bytes_written": 23}',
function_args={"path": "diff-test.txt"},
snapshot="snap",
)
def test_tool_progress_captures_snapshot_metadata(self, mock_conn, event_loop_fixture):
tool_call_ids = {}
tool_call_meta = {}
loop = event_loop_fixture
with patch("acp_adapter.events.make_tool_call_id", return_value="tc-meta"), \
patch("acp_adapter.events._send_update") as mock_send, \
patch("agent.display.capture_local_edit_snapshot", return_value="snapshot"):
cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
cb("tool.started", "write_file", None, {"path": "diff-test.txt", "content": "hello"})
assert list(tool_call_ids["write_file"]) == ["tc-meta"]
assert tool_call_meta["tc-meta"] == {
"args": {"path": "diff-test.txt", "content": "hello"},
"snapshot": "snapshot",
}
mock_send.assert_called_once()
# ---------------------------------------------------------------------------

View File

@@ -29,6 +29,7 @@ from acp.schema import (
from acp_adapter.server import HermesACPAgent
from acp_adapter.session import SessionManager
from acp_adapter.tools import build_tool_start
# ---------------------------------------------------------------------------
@@ -181,6 +182,25 @@ class TestMcpRegistrationE2E:
assert complete_event.raw_output is not None
assert "hello" in str(complete_event.raw_output)
def test_patch_mode_tool_start_emits_diff_blocks_for_v4a_patch(self):
update = build_tool_start(
"tc-1",
"patch",
{
"mode": "patch",
"patch": "*** Begin Patch\n*** Update File: src/app.py\n@@\n-old line\n+new line\n*** Add File: src/new.py\n+hello\n*** End Patch",
},
)
assert len(update.content) == 2
assert update.content[0].type == "diff"
assert update.content[0].path == "src/app.py"
assert update.content[0].old_text == "old line"
assert update.content[0].new_text == "new line"
assert update.content[1].type == "diff"
assert update.content[1].path == "src/new.py"
assert update.content[1].new_text == "hello"
@pytest.mark.asyncio
async def test_prompt_tool_results_paired_by_call_id(self, acp_agent, mock_manager):
"""The ToolCallUpdate's toolCallId must match the ToolCallStart's."""

View File

@@ -20,7 +20,9 @@ from acp.schema import (
NewSessionResponse,
PromptResponse,
ResumeSessionResponse,
SessionModelState,
SetSessionConfigOptionResponse,
SetSessionModelResponse,
SetSessionModeResponse,
SessionInfo,
TextContentBlock,
@@ -127,6 +129,25 @@ class TestSessionOps:
assert state is not None
assert state.cwd == "/home/user/project"
@pytest.mark.asyncio
async def test_new_session_returns_model_state(self):
manager = SessionManager(
agent_factory=lambda: SimpleNamespace(model="gpt-5.4", provider="openai-codex")
)
acp_agent = HermesACPAgent(session_manager=manager)
with patch(
"hermes_cli.models.curated_models_for_provider",
return_value=[("gpt-5.4", "recommended"), ("gpt-5.4-mini", "")],
):
resp = await acp_agent.new_session(cwd="/tmp")
assert isinstance(resp.models, SessionModelState)
assert resp.models.current_model_id == "openai-codex:gpt-5.4"
assert resp.models.available_models[0].model_id == "openai-codex:gpt-5.4"
assert resp.models.available_models[0].description is not None
assert "Provider:" in resp.models.available_models[0].description
@pytest.mark.asyncio
async def test_available_commands_include_help(self, agent):
help_cmd = next(
@@ -204,6 +225,33 @@ class TestListAndFork:
assert fork_resp.session_id
assert fork_resp.session_id != new_resp.session_id
@pytest.mark.asyncio
async def test_list_sessions_includes_title_and_updated_at(self, agent):
with patch.object(
agent.session_manager,
"list_sessions",
return_value=[
{
"session_id": "session-1",
"cwd": "/tmp/project",
"title": "Fix Zed session history",
"updated_at": 123.0,
}
],
):
resp = await agent.list_sessions(cwd="/tmp/project")
assert isinstance(resp.sessions[0], SessionInfo)
assert resp.sessions[0].title == "Fix Zed session history"
assert resp.sessions[0].updated_at == "123.0"
@pytest.mark.asyncio
async def test_list_sessions_passes_cwd_filter(self, agent):
with patch.object(agent.session_manager, "list_sessions", return_value=[]) as mock_list:
await agent.list_sessions(cwd="/mnt/e/Projects/AI/browser-link-3")
mock_list.assert_called_once_with(cwd="/mnt/e/Projects/AI/browser-link-3")
# ---------------------------------------------------------------------------
# session configuration / model routing
# ---------------------------------------------------------------------------
@@ -257,6 +305,53 @@ class TestSessionConfiguration:
assert result == {}
assert state.model == "gpt-5.4"
@pytest.mark.asyncio
async def test_set_session_model_accepts_provider_prefixed_choice(self, tmp_path, monkeypatch):
runtime_calls = []
def fake_resolve_runtime_provider(requested=None, **kwargs):
runtime_calls.append(requested)
provider = requested or "openrouter"
return {
"provider": provider,
"api_mode": "anthropic_messages" if provider == "anthropic" else "chat_completions",
"base_url": f"https://{provider}.example/v1",
"api_key": f"{provider}-key",
"command": None,
"args": [],
}
def fake_agent(**kwargs):
return SimpleNamespace(
model=kwargs.get("model"),
provider=kwargs.get("provider"),
base_url=kwargs.get("base_url"),
api_mode=kwargs.get("api_mode"),
)
monkeypatch.setattr("hermes_cli.config.load_config", lambda: {
"model": {"provider": "openrouter", "default": "openrouter/gpt-5"}
})
monkeypatch.setattr(
"hermes_cli.runtime_provider.resolve_runtime_provider",
fake_resolve_runtime_provider,
)
manager = SessionManager(db=SessionDB(tmp_path / "state.db"))
with patch("run_agent.AIAgent", side_effect=fake_agent):
acp_agent = HermesACPAgent(session_manager=manager)
state = manager.create_session(cwd="/tmp")
result = await acp_agent.set_session_model(
model_id="anthropic:claude-sonnet-4-6",
session_id=state.session_id,
)
assert isinstance(result, SetSessionModelResponse)
assert state.model == "claude-sonnet-4-6"
assert state.agent.provider == "anthropic"
assert state.agent.base_url == "https://anthropic.example/v1"
assert runtime_calls[-1] == "anthropic"
# ---------------------------------------------------------------------------
# prompt
@@ -354,6 +449,31 @@ class TestPrompt:
update = last_call[1].get("update") or last_call[0][1]
assert update.session_update == "agent_message_chunk"
@pytest.mark.asyncio
async def test_prompt_auto_titles_session(self, agent):
new_resp = await agent.new_session(cwd=".")
state = agent.session_manager.get_session(new_resp.session_id)
state.agent.run_conversation = MagicMock(return_value={
"final_response": "Here is the fix.",
"messages": [
{"role": "user", "content": "fix the broken ACP history"},
{"role": "assistant", "content": "Here is the fix."},
],
})
mock_conn = MagicMock(spec=acp.Client)
mock_conn.session_update = AsyncMock()
agent._conn = mock_conn
with patch("agent.title_generator.maybe_auto_title") as mock_title:
prompt = [TextContentBlock(type="text", text="fix the broken ACP history")]
await agent.prompt(prompt=prompt, session_id=new_resp.session_id)
mock_title.assert_called_once()
assert mock_title.call_args.args[1] == new_resp.session_id
assert mock_title.call_args.args[2] == "fix the broken ACP history"
assert mock_title.call_args.args[3] == "Here is the fix."
@pytest.mark.asyncio
async def test_prompt_populates_usage_from_top_level_run_conversation_fields(self, agent):
"""ACP should map top-level token fields into PromptResponse.usage."""

View File

@@ -3,6 +3,7 @@
import contextlib
import io
import json
import time
from types import SimpleNamespace
import pytest
from unittest.mock import MagicMock, patch
@@ -100,15 +101,23 @@ class TestListAndCleanup:
def test_list_sessions_returns_created(self, manager):
s1 = manager.create_session(cwd="/a")
s2 = manager.create_session(cwd="/b")
s1.history.append({"role": "user", "content": "hello from a"})
s2.history.append({"role": "user", "content": "hello from b"})
listing = manager.list_sessions()
ids = {s["session_id"] for s in listing}
assert s1.session_id in ids
assert s2.session_id in ids
assert len(listing) == 2
def test_list_sessions_hides_empty_threads(self, manager):
manager.create_session(cwd="/empty")
assert manager.list_sessions() == []
def test_cleanup_clears_all(self, manager):
manager.create_session()
manager.create_session()
s1 = manager.create_session()
s2 = manager.create_session()
s1.history.append({"role": "user", "content": "one"})
s2.history.append({"role": "user", "content": "two"})
assert len(manager.list_sessions()) == 2
manager.cleanup()
assert manager.list_sessions() == []
@@ -194,6 +203,8 @@ class TestPersistence:
def test_list_sessions_includes_db_only(self, manager):
"""Sessions only in DB (not in memory) appear in list_sessions."""
state = manager.create_session(cwd="/db-only")
state.history.append({"role": "user", "content": "database only thread"})
manager.save_session(state.session_id)
sid = state.session_id
# Drop from memory.
@@ -204,6 +215,53 @@ class TestPersistence:
ids = {s["session_id"] for s in listing}
assert sid in ids
def test_list_sessions_filters_by_cwd(self, manager):
keep = manager.create_session(cwd="/keep")
drop = manager.create_session(cwd="/drop")
keep.history.append({"role": "user", "content": "keep me"})
drop.history.append({"role": "user", "content": "drop me"})
listing = manager.list_sessions(cwd="/keep")
ids = {s["session_id"] for s in listing}
assert keep.session_id in ids
assert drop.session_id not in ids
def test_list_sessions_matches_windows_and_wsl_paths(self, manager):
state = manager.create_session(cwd="/mnt/e/Projects/AI/browser-link-3")
state.history.append({"role": "user", "content": "same project from WSL"})
listing = manager.list_sessions(cwd=r"E:\Projects\AI\browser-link-3")
ids = {s["session_id"] for s in listing}
assert state.session_id in ids
def test_list_sessions_prefers_title_then_preview(self, manager):
state = manager.create_session(cwd="/named")
state.history.append({"role": "user", "content": "Investigate broken ACP history in Zed"})
manager.save_session(state.session_id)
db = manager._get_db()
db.set_session_title(state.session_id, "Fix Zed ACP history")
listing = manager.list_sessions(cwd="/named")
assert listing[0]["title"] == "Fix Zed ACP history"
db.set_session_title(state.session_id, "")
listing = manager.list_sessions(cwd="/named")
assert listing[0]["title"].startswith("Investigate broken ACP history")
def test_list_sessions_sorted_by_most_recent_activity(self, manager):
older = manager.create_session(cwd="/ordered")
older.history.append({"role": "user", "content": "older"})
manager.save_session(older.session_id)
time.sleep(0.02)
newer = manager.create_session(cwd="/ordered")
newer.history.append({"role": "user", "content": "newer"})
manager.save_session(newer.session_id)
listing = manager.list_sessions(cwd="/ordered")
assert [item["session_id"] for item in listing[:2]] == [newer.session_id, older.session_id]
assert listing[0]["updated_at"]
assert listing[1]["updated_at"]
def test_fork_restores_source_from_db(self, manager):
"""Forking a session that is only in DB should work."""
original = manager.create_session()

View File

@@ -215,6 +215,46 @@ class TestBuildToolComplete:
assert len(display_text) < 6000
assert "truncated" in display_text
def test_build_tool_complete_for_patch_uses_diff_blocks(self):
"""Completed patch calls should keep structured diff content for Zed."""
patch_result = (
'{"success": true, "diff": "--- a/README.md\\n+++ b/README.md\\n@@ -1 +1,2 @@\\n old line\\n+new line\\n", '
'"files_modified": ["README.md"]}'
)
result = build_tool_complete("tc-p1", "patch", patch_result)
assert isinstance(result, ToolCallProgress)
assert len(result.content) == 1
diff_item = result.content[0]
assert isinstance(diff_item, FileEditToolCallContent)
assert diff_item.path == "README.md"
assert diff_item.old_text == "old line"
assert diff_item.new_text == "old line\nnew line"
def test_build_tool_complete_for_patch_falls_back_to_text_when_no_diff(self):
result = build_tool_complete("tc-p2", "patch", '{"success": true}')
assert isinstance(result, ToolCallProgress)
assert isinstance(result.content[0], ContentToolCallContent)
def test_build_tool_complete_for_write_file_uses_snapshot_diff(self, tmp_path):
target = tmp_path / "diff-test.txt"
snapshot = type("Snapshot", (), {"paths": [target], "before": {str(target): None}})()
target.write_text("hello from hermes\n", encoding="utf-8")
result = build_tool_complete(
"tc-wf1",
"write_file",
'{"bytes_written": 18, "dirs_created": false}',
function_args={"path": str(target), "content": "hello from hermes\n"},
snapshot=snapshot,
)
assert isinstance(result, ToolCallProgress)
assert len(result.content) == 1
diff_item = result.content[0]
assert isinstance(diff_item, FileEditToolCallContent)
assert diff_item.path.endswith("diff-test.txt")
assert diff_item.old_text is None
assert diff_item.new_text == "hello from hermes"
# ---------------------------------------------------------------------------
# extract_locations

View File

@@ -2,7 +2,8 @@
Surrogates (U+D800..U+DFFF) are invalid in UTF-8 and crash json.dumps()
inside the OpenAI SDK. They can appear via clipboard paste from rich-text
editors like Google Docs.
editors like Google Docs, OR from byte-level reasoning models (xiaomi/mimo,
kimi, glm) emitting lone halves in reasoning output.
"""
import json
import pytest
@@ -11,6 +12,7 @@ from unittest.mock import MagicMock, patch
from run_agent import (
_sanitize_surrogates,
_sanitize_messages_surrogates,
_sanitize_structure_surrogates,
_SURROGATE_RE,
)
@@ -109,6 +111,186 @@ class TestSanitizeMessagesSurrogates:
assert "\ufffd" in msgs[0]["content"]
class TestReasoningFieldSurrogates:
"""Surrogates in reasoning fields (byte-level reasoning models).
xiaomi/mimo, kimi, glm and similar byte-level tokenizers can emit lone
surrogates in reasoning output. These fields are carried through to the
API as `reasoning_content` on assistant messages, and must be sanitized
or json.dumps() crashes with 'utf-8' codec can't encode surrogates.
"""
def test_reasoning_field_sanitized(self):
msgs = [
{"role": "assistant", "content": "ok", "reasoning": "thought \udce2 here"},
]
assert _sanitize_messages_surrogates(msgs) is True
assert "\udce2" not in msgs[0]["reasoning"]
assert "\ufffd" in msgs[0]["reasoning"]
def test_reasoning_content_field_sanitized(self):
"""api_messages carry `reasoning_content` built from `reasoning`."""
msgs = [
{"role": "assistant", "content": "ok", "reasoning_content": "thought \udce2 here"},
]
assert _sanitize_messages_surrogates(msgs) is True
assert "\udce2" not in msgs[0]["reasoning_content"]
assert "\ufffd" in msgs[0]["reasoning_content"]
def test_reasoning_details_nested_sanitized(self):
"""reasoning_details is a list of dicts with nested string fields."""
msgs = [
{
"role": "assistant",
"content": "ok",
"reasoning_details": [
{"type": "reasoning.summary", "summary": "summary \udce2 text"},
{"type": "reasoning.text", "text": "chain \udc00 of thought"},
],
},
]
assert _sanitize_messages_surrogates(msgs) is True
assert "\udce2" not in msgs[0]["reasoning_details"][0]["summary"]
assert "\ufffd" in msgs[0]["reasoning_details"][0]["summary"]
assert "\udc00" not in msgs[0]["reasoning_details"][1]["text"]
assert "\ufffd" in msgs[0]["reasoning_details"][1]["text"]
def test_deeply_nested_reasoning_sanitized(self):
"""Nested dicts / lists inside extra fields are recursed into."""
msgs = [
{
"role": "assistant",
"content": "ok",
"reasoning_details": [
{
"type": "reasoning.encrypted",
"content": {
"encrypted_content": "opaque",
"text_parts": ["part1", "part2 \udce2 part"],
},
},
],
},
]
assert _sanitize_messages_surrogates(msgs) is True
assert (
msgs[0]["reasoning_details"][0]["content"]["text_parts"][1]
== "part2 \ufffd part"
)
def test_reasoning_end_to_end_json_serialization(self):
"""After sanitization, the full message dict must serialize clean."""
msgs = [
{
"role": "assistant",
"content": "answer",
"reasoning_content": "reasoning with \udce2 surrogate",
"reasoning_details": [
{"summary": "nested \udcb0 surrogate"},
],
},
]
_sanitize_messages_surrogates(msgs)
# Must round-trip through json + utf-8 encoding without error
payload = json.dumps(msgs, ensure_ascii=False).encode("utf-8")
assert b"\\" not in payload[:0] # sanity — just ensure we got bytes
assert len(payload) > 0
def test_no_surrogates_returns_false(self):
"""Clean reasoning fields don't trigger a modification."""
msgs = [
{
"role": "assistant",
"content": "ok",
"reasoning": "clean thought",
"reasoning_content": "also clean",
"reasoning_details": [{"summary": "clean summary"}],
},
]
assert _sanitize_messages_surrogates(msgs) is False
class TestSanitizeStructureSurrogates:
"""Test the _sanitize_structure_surrogates() helper for nested payloads."""
def test_empty_payload(self):
assert _sanitize_structure_surrogates({}) is False
assert _sanitize_structure_surrogates([]) is False
def test_flat_dict(self):
payload = {"a": "clean", "b": "dirty \udce2 text"}
assert _sanitize_structure_surrogates(payload) is True
assert payload["a"] == "clean"
assert "\ufffd" in payload["b"]
def test_flat_list(self):
payload = ["clean", "dirty \udce2"]
assert _sanitize_structure_surrogates(payload) is True
assert payload[0] == "clean"
assert "\ufffd" in payload[1]
def test_nested_dict_in_list(self):
payload = [{"x": "dirty \udce2"}, {"x": "clean"}]
assert _sanitize_structure_surrogates(payload) is True
assert "\ufffd" in payload[0]["x"]
assert payload[1]["x"] == "clean"
def test_deeply_nested(self):
payload = {
"level1": {
"level2": [
{"level3": "deep \udce2 surrogate"},
],
},
}
assert _sanitize_structure_surrogates(payload) is True
assert "\ufffd" in payload["level1"]["level2"][0]["level3"]
def test_clean_payload_returns_false(self):
payload = {"a": "clean", "b": [{"c": "also clean"}]}
assert _sanitize_structure_surrogates(payload) is False
def test_non_string_values_ignored(self):
payload = {"int": 42, "list": [1, 2, 3], "dict": {"none": None}, "bool": True}
assert _sanitize_structure_surrogates(payload) is False
# Non-string values survive unchanged
assert payload["int"] == 42
assert payload["list"] == [1, 2, 3]
class TestApiMessagesSurrogateRecovery:
"""Integration: verify the recovery block sanitizes api_messages.
The bug this guards against: a surrogate in `reasoning_content` on
api_messages (transformed from `reasoning` during build) crashes the
OpenAI SDK's json.dumps(), and the recovery block previously only
sanitized the canonical `messages` list — not `api_messages` — so the
next retry would send the same broken payload and fail 3 times.
"""
def test_api_messages_reasoning_content_sanitized(self):
"""The extended sanitizer catches reasoning_content in api_messages."""
api_messages = [
{"role": "system", "content": "sys"},
{
"role": "assistant",
"content": "response",
"reasoning_content": "thought \udce2 trail",
"tool_calls": [
{
"id": "call_1",
"function": {"name": "tool", "arguments": "{}"},
}
],
},
{"role": "tool", "content": "result", "tool_call_id": "call_1"},
]
assert _sanitize_messages_surrogates(api_messages) is True
assert "\udce2" not in api_messages[1]["reasoning_content"]
# Full payload must now serialize clean
json.dumps(api_messages, ensure_ascii=False).encode("utf-8")
class TestRunConversationSurrogateSanitization:
"""Integration: verify run_conversation sanitizes user_message."""

View File

@@ -34,3 +34,21 @@ def test_messaging_extra_includes_qrcode_for_weixin_setup():
messaging_extra = optional_dependencies["messaging"]
assert any(dep.startswith("qrcode") for dep in messaging_extra)
def test_dingtalk_extra_includes_qrcode_for_qr_auth():
"""DingTalk's QR-code device-flow auth (hermes_cli/dingtalk_auth.py)
needs the qrcode package."""
optional_dependencies = _load_optional_dependencies()
dingtalk_extra = optional_dependencies["dingtalk"]
assert any(dep.startswith("qrcode") for dep in dingtalk_extra)
def test_feishu_extra_includes_qrcode_for_qr_login():
"""Feishu's QR login flow (gateway/platforms/feishu.py) needs the
qrcode package."""
optional_dependencies = _load_optional_dependencies()
feishu_extra = optional_dependencies["feishu"]
assert any(dep.startswith("qrcode") for dep in feishu_extra)