Merge remote-tracking branch 'origin/main' into bb/tui-long-session-perf

This commit is contained in:
Brooklyn Nicholson
2026-04-26 21:07:15 -05:00
115 changed files with 17650 additions and 406 deletions

View File

@@ -117,6 +117,12 @@ class TestHintMessages:
assert "/busy interrupt" in msg
assert "queued" in msg.lower()
def test_busy_input_hint_gateway_steer(self):
msg = busy_input_hint_gateway("steer")
assert "/busy interrupt" in msg
assert "/busy queue" in msg
assert "steer" in msg.lower()
def test_busy_input_hint_cli_interrupt(self):
msg = busy_input_hint_cli("interrupt")
assert "/busy queue" in msg
@@ -125,6 +131,12 @@ class TestHintMessages:
msg = busy_input_hint_cli("queue")
assert "/busy interrupt" in msg
def test_busy_input_hint_cli_steer(self):
msg = busy_input_hint_cli("steer")
assert "/busy interrupt" in msg
assert "/busy queue" in msg
assert "steer" in msg.lower()
def test_tool_progress_hints_mention_verbose(self):
assert "/verbose" in tool_progress_hint_gateway()
assert "/verbose" in tool_progress_hint_cli()
@@ -133,8 +145,10 @@ class TestHintMessages:
for hint in (
busy_input_hint_gateway("queue"),
busy_input_hint_gateway("interrupt"),
busy_input_hint_gateway("steer"),
busy_input_hint_cli("queue"),
busy_input_hint_cli("interrupt"),
busy_input_hint_cli("steer"),
tool_progress_hint_gateway(),
tool_progress_hint_cli(),
):

View File

@@ -65,6 +65,35 @@ class TestHandleBusyCommand(unittest.TestCase):
self.assertEqual(stub.busy_input_mode, "interrupt")
mock_save.assert_called_once_with("display.busy_input_mode", "interrupt")
def test_steer_argument_sets_steer_mode_and_saves(self):
cli_mod = _import_cli()
stub = self._make_cli("interrupt")
with (
patch.object(cli_mod, "_cprint") as mock_cprint,
patch.object(cli_mod, "save_config_value", return_value=True) as mock_save,
):
cli_mod.HermesCLI._handle_busy_command(stub, "/busy steer")
self.assertEqual(stub.busy_input_mode, "steer")
mock_save.assert_called_once_with("display.busy_input_mode", "steer")
printed = " ".join(str(c) for c in mock_cprint.call_args_list)
self.assertIn("steer", printed.lower())
def test_status_reports_steer_behavior(self):
cli_mod = _import_cli()
stub = self._make_cli("steer")
with (
patch.object(cli_mod, "_cprint") as mock_cprint,
patch.object(cli_mod, "save_config_value") as mock_save,
):
cli_mod.HermesCLI._handle_busy_command(stub, "/busy status")
mock_save.assert_not_called()
printed = " ".join(str(c) for c in mock_cprint.call_args_list)
self.assertIn("steer", printed.lower())
# The usage line should also advertise the steer option
self.assertIn("steer", printed)
def test_invalid_argument_prints_usage(self):
cli_mod = _import_cli()
stub = self._make_cli()
@@ -90,5 +119,5 @@ class TestBusyCommandRegistry(unittest.TestCase):
from hermes_cli.commands import COMMAND_REGISTRY
busy = next(c for c in COMMAND_REGISTRY if c.name == "busy")
assert busy.args_hint == "[queue|interrupt|status]"
assert busy.args_hint == "[queue|steer|interrupt|status]"
assert busy.category == "Configuration"

View File

@@ -31,6 +31,40 @@ def _make_cli_stub():
return cli
def _make_background_cli_stub():
cli = _make_cli_stub()
cli._background_task_counter = 0
cli._background_tasks = {}
cli._ensure_runtime_credentials = MagicMock(return_value=True)
cli._resolve_turn_agent_config = MagicMock(return_value={
"model": "test-model",
"runtime": {
"api_key": "test-key",
"base_url": "https://example.test/v1",
"provider": "test",
"api_mode": "chat_completions",
},
"request_overrides": None,
})
cli.max_turns = 90
cli.enabled_toolsets = []
cli._session_db = None
cli.reasoning_config = {}
cli.service_tier = None
cli._providers_only = None
cli._providers_ignore = None
cli._providers_order = None
cli._provider_sort = None
cli._provider_require_params = None
cli._provider_data_collection = None
cli._fallback_model = None
cli._agent_running = False
cli._spinner_text = ""
cli.bell_on_complete = False
cli.final_response_markdown = "strip"
return cli
class TestCliApprovalUi:
def test_sudo_prompt_restores_existing_draft_after_response(self):
cli = _make_cli_stub()
@@ -255,6 +289,54 @@ class TestCliApprovalUi:
# Command got truncated with a marker.
assert "(command truncated" in rendered
def test_background_task_registers_thread_local_approval_callbacks(self):
"""Background /btw tasks must use the prompt_toolkit approval UI.
The foreground chat path registers dangerous-command callbacks inside
its worker thread because tools.terminal_tool stores them in
threading.local(). /background used to skip that, so dangerous commands
fell back to raw input() in a background thread and timed out under
prompt_toolkit.
"""
cli = _make_background_cli_stub()
seen = {}
class FakeAgent:
def __init__(self, **kwargs):
self._print_fn = None
self.thinking_callback = None
def run_conversation(self, **kwargs):
from tools.terminal_tool import (
_get_approval_callback,
_get_sudo_password_callback,
)
seen["approval"] = _get_approval_callback()
seen["sudo"] = _get_sudo_password_callback()
return {
"final_response": "done",
"messages": [],
"completed": True,
"failed": False,
}
with patch.object(cli_module, "AIAgent", FakeAgent), \
patch.object(cli_module, "_cprint"), \
patch.object(cli_module, "ChatConsole") as chat_console:
chat_console.return_value.print = MagicMock()
cli._handle_background_command("/btw check weather")
deadline = time.time() + 2
while cli._background_tasks and time.time() < deadline:
time.sleep(0.01)
assert seen["approval"].__self__ is cli
assert seen["approval"].__func__ is HermesCLI._approval_callback
assert seen["sudo"].__self__ is cli
assert seen["sudo"].__func__ is HermesCLI._sudo_password_callback
assert not cli._background_tasks
class TestApprovalCallbackThreadLocalWiring:
"""Regression guard for the thread-local callback freeze (#13617 / #13618).

View File

@@ -0,0 +1,102 @@
"""Tests for /save — the conversation snapshot slash command.
Regression: the old implementation wrote ``hermes_conversation_<ts>.json``
to the current working directory (CWD). Users who ran /save expected the
file to be discoverable via ``hermes sessions browse``, but CWD-resident
snapshots are not indexed in the state DB and are generally invisible.
The fix writes snapshots under ``~/.hermes/sessions/saved/`` and prints
the absolute path plus the resume hint for the live session.
"""
from __future__ import annotations
import json
import os
import sys
from datetime import datetime
from pathlib import Path
from types import SimpleNamespace
import pytest
@pytest.fixture
def hermes_home(tmp_path, monkeypatch):
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setattr(Path, "home", lambda: tmp_path)
monkeypatch.setenv("HERMES_HOME", str(home))
# Clear any cached hermes_home computation
import hermes_constants
if hasattr(hermes_constants, "_hermes_home_cache"):
hermes_constants._hermes_home_cache = None
return home
def _make_stub_cli(history):
"""Build a minimal object exposing just what save_conversation uses."""
return SimpleNamespace(
conversation_history=history,
model="test-model",
session_id="20260101_120000_abc123",
session_start=datetime(2026, 1, 1, 12, 0, 0),
)
def test_save_conversation_writes_under_hermes_home(hermes_home, tmp_path, monkeypatch, capsys):
"""Snapshot must land under ~/.hermes/sessions/saved/, not CWD."""
# Change CWD to a different directory to prove the file does NOT go there.
work = tmp_path / "somewhere-else"
work.mkdir()
monkeypatch.chdir(work)
# Import fresh to pick up the HERMES_HOME fixture
for mod in [m for m in sys.modules if m.startswith("cli") or m == "hermes_constants"]:
sys.modules.pop(mod, None)
import cli # noqa: F401 (module under test)
stub = _make_stub_cli([
{"role": "user", "content": "hi"},
{"role": "assistant", "content": "hello"},
])
# Call the unbound method against our stub.
cli.HermesCLI.save_conversation(stub)
# File must NOT be in CWD
cwd_leak = list(work.glob("hermes_conversation_*.json"))
assert not cwd_leak, f"snapshot leaked to CWD: {cwd_leak}"
# File MUST be under ~/.hermes/sessions/saved/
saved_dir = hermes_home / "sessions" / "saved"
assert saved_dir.is_dir(), "expected saved/ subdirectory to be created"
files = list(saved_dir.glob("hermes_conversation_*.json"))
assert len(files) == 1, files
payload = json.loads(files[0].read_text())
assert payload["model"] == "test-model"
assert payload["session_id"] == "20260101_120000_abc123"
assert payload["messages"] == [
{"role": "user", "content": "hi"},
{"role": "assistant", "content": "hello"},
]
# User-facing message must include the absolute path AND the resume hint.
out = capsys.readouterr().out
assert str(files[0]) in out, out
assert "hermes --resume 20260101_120000_abc123" in out, out
def test_save_conversation_empty_history_does_nothing(hermes_home, capsys):
for mod in [m for m in sys.modules if m.startswith("cli") or m == "hermes_constants"]:
sys.modules.pop(mod, None)
import cli
stub = _make_stub_cli([])
cli.HermesCLI.save_conversation(stub)
saved_dir = hermes_home / "sessions" / "saved"
assert not saved_dir.exists() or not list(saved_dir.iterdir())
out = capsys.readouterr().out
assert "No conversation to save" in out

View File

@@ -211,6 +211,21 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
"SIGNAL_ALLOW_ALL_USERS",
"EMAIL_ALLOW_ALL_USERS",
"SMS_ALLOW_ALL_USERS",
# Platform gating — set by load_gateway_config() as a side effect when
# a config.yaml is present, so individual test bodies that call the
# loader leak these values into later tests on the same xdist worker.
# Force-clear on every test setup so the leak can't happen.
"SLACK_REQUIRE_MENTION",
"SLACK_STRICT_MENTION",
"SLACK_FREE_RESPONSE_CHANNELS",
"SLACK_ALLOW_BOTS",
"SLACK_REACTIONS",
"DISCORD_REQUIRE_MENTION",
"DISCORD_FREE_RESPONSE_CHANNELS",
"TELEGRAM_REQUIRE_MENTION",
"WHATSAPP_REQUIRE_MENTION",
"DINGTALK_REQUIRE_MENTION",
"MATRIX_REQUIRE_MENTION",
})

View File

@@ -186,6 +186,91 @@ class TestBusySessionAck:
assert "respond once the current task finishes" in content
assert "Interrupting" not in content
@pytest.mark.asyncio
async def test_steer_mode_calls_agent_steer_no_interrupt_no_queue(self):
"""busy_input_mode='steer' injects via agent.steer() and skips queueing."""
runner, sentinel = _make_runner()
runner._busy_input_mode = "steer"
adapter = _make_adapter()
event = _make_event(text="also check the tests")
sk = build_session_key(event.source)
runner.adapters[event.source.platform] = adapter
agent = MagicMock()
agent.steer = MagicMock(return_value=True)
runner._running_agents[sk] = agent
with patch("gateway.run.merge_pending_message_event") as mock_merge:
await runner._handle_active_session_busy_message(event, sk)
# VERIFY: Agent was steered, NOT interrupted
agent.steer.assert_called_once_with("also check the tests")
agent.interrupt.assert_not_called()
# VERIFY: No queueing — successful steer must NOT replay as next turn
mock_merge.assert_not_called()
# VERIFY: Ack mentions steer wording
adapter._send_with_retry.assert_called_once()
call_kwargs = adapter._send_with_retry.call_args
content = call_kwargs.kwargs.get("content") or call_kwargs[1].get("content", "")
assert "Steered" in content or "steer" in content.lower()
assert "Interrupting" not in content
@pytest.mark.asyncio
async def test_steer_mode_falls_back_to_queue_when_agent_rejects(self):
"""If agent.steer() returns False, fall back to queue behavior."""
runner, sentinel = _make_runner()
runner._busy_input_mode = "steer"
adapter = _make_adapter()
event = _make_event(text="empty or rejected")
sk = build_session_key(event.source)
runner.adapters[event.source.platform] = adapter
agent = MagicMock()
agent.steer = MagicMock(return_value=False) # rejected
runner._running_agents[sk] = agent
with patch("gateway.run.merge_pending_message_event") as mock_merge:
await runner._handle_active_session_busy_message(event, sk)
agent.steer.assert_called_once()
agent.interrupt.assert_not_called()
# Fell back to queue semantics: event was merged into pending messages
mock_merge.assert_called_once()
# Ack uses queue-mode wording (not steer, not interrupt)
call_kwargs = adapter._send_with_retry.call_args
content = call_kwargs.kwargs.get("content") or call_kwargs[1].get("content", "")
assert "Queued for the next turn" in content
assert "Steered" not in content
@pytest.mark.asyncio
async def test_steer_mode_falls_back_to_queue_when_agent_pending(self):
"""If agent is still starting (sentinel), steer mode falls back to queue."""
runner, sentinel = _make_runner()
runner._busy_input_mode = "steer"
adapter = _make_adapter()
event = _make_event(text="arrived too early")
sk = build_session_key(event.source)
runner.adapters[event.source.platform] = adapter
# Agent is still being set up — sentinel in place
runner._running_agents[sk] = sentinel
with patch("gateway.run.merge_pending_message_event") as mock_merge:
await runner._handle_active_session_busy_message(event, sk)
# Event was queued instead of steered
mock_merge.assert_called_once()
call_kwargs = adapter._send_with_retry.call_args
content = call_kwargs.kwargs.get("content") or call_kwargs[1].get("content", "")
assert "Queued for the next turn" in content
@pytest.mark.asyncio
async def test_debounce_suppresses_rapid_acks(self):
"""Second message within 30s should NOT send another ack."""

View File

@@ -1,9 +1,11 @@
"""Tests for gateway/channel_directory.py — channel resolution and display."""
import asyncio
import json
import os
from pathlib import Path
from unittest.mock import patch
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch
from gateway.channel_directory import (
build_channel_directory,
@@ -12,6 +14,7 @@ from gateway.channel_directory import (
format_directory_for_display,
load_directory,
_build_from_sessions,
_build_slack,
DIRECTORY_PATH,
)
@@ -62,7 +65,7 @@ class TestBuildChannelDirectoryWrites:
monkeypatch.setattr(json, "dump", broken_dump)
with patch("gateway.channel_directory.DIRECTORY_PATH", cache_file):
build_channel_directory({})
asyncio.run(build_channel_directory({}))
result = load_directory()
assert result == previous
@@ -142,6 +145,21 @@ class TestResolveChannelName:
with self._setup(tmp_path, platforms):
assert resolve_channel_name("telegram", "Coaching Chat / topic 17585") == "-1001:17585"
def test_id_match_takes_precedence_over_name(self, tmp_path):
"""A raw channel ID resolves to itself, even when a different
channel happens to be named the same string. Case-sensitive: Slack
IDs are uppercase and must not be normalized away."""
platforms = {
"slack": [
{"id": "C0B0QV5434G", "name": "engineering", "type": "channel"},
{"id": "C99", "name": "c0b0qv5434g", "type": "channel"},
]
}
with self._setup(tmp_path, platforms):
assert resolve_channel_name("slack", "C0B0QV5434G") == "C0B0QV5434G"
# Lowercase still falls through to name matching (case-insensitive)
assert resolve_channel_name("slack", "c0b0qv5434g") == "C99"
def test_display_label_with_type_suffix_resolves(self, tmp_path):
platforms = {
"telegram": [
@@ -332,3 +350,135 @@ class TestLookupChannelType:
}
with self._setup(tmp_path, platforms):
assert lookup_channel_type("discord", "300") is None
def _make_slack_adapter(team_clients):
"""Build a stand-in for SlackAdapter exposing only ``_team_clients``."""
return SimpleNamespace(_team_clients=team_clients)
def _make_slack_client(pages):
"""Build an AsyncWebClient mock whose ``users_conversations`` returns pages."""
client = MagicMock()
client.users_conversations = AsyncMock(side_effect=pages)
return client
class TestBuildSlack:
"""_build_slack actually calls users.conversations on each workspace client."""
def test_no_team_clients_falls_back_to_sessions(self, tmp_path):
sessions_path = tmp_path / "sessions" / "sessions.json"
sessions_path.parent.mkdir(parents=True)
sessions_path.write_text(json.dumps({
"s1": {"origin": {"platform": "slack", "chat_id": "D123", "chat_name": "Alice"}},
}))
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
entries = asyncio.run(_build_slack(_make_slack_adapter({})))
assert len(entries) == 1
assert entries[0]["id"] == "D123"
def test_lists_channels_from_users_conversations(self, tmp_path):
client = _make_slack_client([
{
"ok": True,
"channels": [
{"id": "C0B0QV5434G", "name": "engineering", "is_private": False},
{"id": "G123ABCDEF", "name": "secret-chat", "is_private": True},
],
"response_metadata": {},
},
])
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
entries = asyncio.run(_build_slack(_make_slack_adapter({"T1": client})))
ids = {e["id"] for e in entries}
assert ids == {"C0B0QV5434G", "G123ABCDEF"}
types = {e["id"]: e["type"] for e in entries}
assert types["C0B0QV5434G"] == "channel"
assert types["G123ABCDEF"] == "private"
client.users_conversations.assert_awaited_once()
def test_paginates_via_response_metadata_cursor(self, tmp_path):
client = _make_slack_client([
{
"ok": True,
"channels": [{"id": "C001", "name": "first", "is_private": False}],
"response_metadata": {"next_cursor": "cur1"},
},
{
"ok": True,
"channels": [{"id": "C002", "name": "second", "is_private": False}],
"response_metadata": {"next_cursor": ""},
},
])
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
entries = asyncio.run(_build_slack(_make_slack_adapter({"T1": client})))
assert {e["id"] for e in entries} == {"C001", "C002"}
assert client.users_conversations.await_count == 2
def test_per_workspace_error_does_not_block_others(self, tmp_path):
bad = MagicMock()
bad.users_conversations = AsyncMock(side_effect=RuntimeError("boom"))
good = _make_slack_client([
{
"ok": True,
"channels": [{"id": "C999", "name": "ok-channel", "is_private": False}],
"response_metadata": {},
},
])
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
entries = asyncio.run(_build_slack(_make_slack_adapter({"BAD": bad, "GOOD": good})))
assert {e["id"] for e in entries} == {"C999"}
def test_session_dms_merged_when_not_in_api_results(self, tmp_path):
sessions_path = tmp_path / "sessions" / "sessions.json"
sessions_path.parent.mkdir(parents=True)
sessions_path.write_text(json.dumps({
"s1": {"origin": {"platform": "slack", "chat_id": "D456", "chat_name": "Bob"}},
"dup": {"origin": {"platform": "slack", "chat_id": "C001", "chat_name": "first"}},
}))
client = _make_slack_client([
{
"ok": True,
"channels": [{"id": "C001", "name": "first", "is_private": False}],
"response_metadata": {},
},
])
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
entries = asyncio.run(_build_slack(_make_slack_adapter({"T1": client})))
ids = {e["id"] for e in entries}
assert "C001" in ids and "D456" in ids
# Channel ID from API should not be duplicated by the session merge
assert sum(1 for e in entries if e["id"] == "C001") == 1
def test_skips_channels_with_no_id_or_name(self, tmp_path):
client = _make_slack_client([
{
"ok": True,
"channels": [
{"id": "C001", "name": "good", "is_private": False},
{"id": "", "name": "no-id"},
{"id": "C002"}, # no name (e.g. IM)
],
"response_metadata": {},
},
])
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
entries = asyncio.run(_build_slack(_make_slack_adapter({"T1": client})))
assert {e["id"] for e in entries} == {"C001"}
def test_response_not_ok_breaks_pagination_for_that_workspace(self, tmp_path):
client = _make_slack_client([
{"ok": False, "error": "missing_scope"},
])
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
entries = asyncio.run(_build_slack(_make_slack_adapter({"T1": client})))
assert entries == []

View File

@@ -186,12 +186,18 @@ class TestPlatformDefaults:
assert resolve_display_setting({}, plat, "tool_progress") == "all", plat
def test_medium_tier_platforms(self):
"""Slack, Mattermost, Matrix default to 'new' tool progress."""
"""Mattermost, Matrix, Feishu, WhatsApp default to 'new' tool progress."""
from gateway.display_config import resolve_display_setting
for plat in ("slack", "mattermost", "matrix", "feishu", "whatsapp"):
for plat in ("mattermost", "matrix", "feishu", "whatsapp"):
assert resolve_display_setting({}, plat, "tool_progress") == "new", plat
def test_slack_defaults_tool_progress_off(self):
"""Slack defaults to quiet tool progress (permanent chat noise otherwise)."""
from gateway.display_config import resolve_display_setting
assert resolve_display_setting({}, "slack", "tool_progress") == "off"
def test_low_tier_platforms(self):
"""Signal, BlueBubbles, etc. default to 'off' tool progress."""
from gateway.display_config import resolve_display_setting
@@ -241,7 +247,7 @@ class TestConfigMigration:
},
},
}
config_path.write_text(yaml.dump(config))
config_path.write_text(yaml.dump(config), encoding="utf-8")
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
# Re-import to pick up the new HERMES_HOME
@@ -251,7 +257,7 @@ class TestConfigMigration:
result = cfg_mod.migrate_config(interactive=False, quiet=True)
# Re-read config
updated = yaml.safe_load(config_path.read_text())
updated = yaml.safe_load(config_path.read_text(encoding="utf-8"))
platforms = updated.get("display", {}).get("platforms", {})
assert platforms.get("signal", {}).get("tool_progress") == "off"
assert platforms.get("telegram", {}).get("tool_progress") == "all"
@@ -268,7 +274,7 @@ class TestConfigMigration:
"platforms": {"telegram": {"tool_progress": "verbose"}},
},
}
config_path.write_text(yaml.dump(config))
config_path.write_text(yaml.dump(config), encoding="utf-8")
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
import importlib
@@ -276,7 +282,7 @@ class TestConfigMigration:
importlib.reload(cfg_mod)
cfg_mod.migrate_config(interactive=False, quiet=True)
updated = yaml.safe_load(config_path.read_text())
updated = yaml.safe_load(config_path.read_text(encoding="utf-8"))
# Existing "verbose" should NOT be overwritten by legacy "off"
assert updated["display"]["platforms"]["telegram"]["tool_progress"] == "verbose"

View File

@@ -540,7 +540,7 @@ from gateway.config import Platform, PlatformConfig # noqa: E402
def _make_slack_adapter():
config = PlatformConfig(enabled=True, token="xoxb-fake-token")
config = PlatformConfig(enabled=True, token="***")
adapter = SlackAdapter(config)
adapter._app = MagicMock()
adapter._app.client = AsyncMock()
@@ -549,6 +549,39 @@ def _make_slack_adapter():
return adapter
# ---------------------------------------------------------------------------
# SlackAdapter diagnostics helpers
# ---------------------------------------------------------------------------
class TestSlackAttachmentDiagnostics:
def test_missing_scope_error_returns_actionable_notice(self):
"""_describe_slack_api_error translates a missing_scope response into
a user-facing notice mentioning the needed scope and the reinstall
step. This is the helper used by every files.info call site (Slack
Connect stubs + post-download failures) to surface scope problems
without making an extra probe call per attachment.
"""
adapter = _make_slack_adapter()
response = {
"error": "missing_scope",
"needed": "files:read",
"provided": "chat:write,files:write",
}
detail = adapter._describe_slack_api_error(response, file_obj={"id": "F123", "name": "photo.jpg"})
assert detail is not None
assert "files:read" in detail
assert "reinstall" in detail.lower()
assert "chat:write,files:write" in detail
def test_download_failure_403_returns_permission_notice(self):
adapter = _make_slack_adapter()
exc = _make_http_status_error(403)
detail = adapter._describe_slack_download_failure(exc, file_obj={"name": "report.pdf"})
assert "403" in detail
assert "permission or scope" in detail
# ---------------------------------------------------------------------------
# SlackAdapter._download_slack_file
# ---------------------------------------------------------------------------
@@ -702,6 +735,7 @@ class TestSlackDownloadSlackFileBytes:
fake_response = MagicMock()
fake_response.content = b"raw bytes here"
fake_response.raise_for_status = MagicMock()
fake_response.headers = {"content-type": "application/pdf"}
mock_client = AsyncMock()
mock_client.get = AsyncMock(return_value=fake_response)
@@ -717,6 +751,29 @@ class TestSlackDownloadSlackFileBytes:
result = asyncio.run(run())
assert result == b"raw bytes here"
def test_rejects_html_response(self):
"""Slack HTML sign-in pages should not be accepted as file bytes."""
adapter = _make_slack_adapter()
fake_response = MagicMock()
fake_response.content = b"<!DOCTYPE html><html><title>Slack</title></html>"
fake_response.raise_for_status = MagicMock()
fake_response.headers = {"content-type": "text/html; charset=utf-8"}
mock_client = AsyncMock()
mock_client.get = AsyncMock(return_value=fake_response)
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
async def run():
with patch("httpx.AsyncClient", return_value=mock_client):
await adapter._download_slack_file_bytes(
"https://files.slack.com/file.bin"
)
with pytest.raises(ValueError, match="HTML instead of file bytes"):
asyncio.run(run())
def test_retries_on_429_then_succeeds(self):
"""429 on first attempt is retried; raw bytes returned on second."""
adapter = _make_slack_adapter()
@@ -724,6 +781,7 @@ class TestSlackDownloadSlackFileBytes:
ok_response = MagicMock()
ok_response.content = b"final bytes"
ok_response.raise_for_status = MagicMock()
ok_response.headers = {"content-type": "application/pdf"}
mock_client = AsyncMock()
mock_client.get = AsyncMock(

View File

@@ -77,6 +77,19 @@ class TestMessageDeduplicatorTTL:
assert "old-0" not in dedup._seen
assert "new-0" in dedup._seen
def test_max_size_eviction_caps_fresh_entries(self):
"""Fresh entries must still be capped to max_size on overflow."""
dedup = MessageDeduplicator(max_size=2, ttl_seconds=60)
dedup.is_duplicate("msg-1")
dedup.is_duplicate("msg-2")
dedup.is_duplicate("msg-3")
assert len(dedup._seen) == 2
assert "msg-1" not in dedup._seen
assert "msg-2" in dedup._seen
assert "msg-3" in dedup._seen
def test_ttl_zero_means_no_dedup(self):
"""With TTL=0, all entries expire immediately."""
dedup = MessageDeduplicator(ttl_seconds=0)

View File

@@ -77,6 +77,46 @@ class TestFindSessionId:
assert result == "sess_topic_a"
def test_user_id_disambiguates_same_group_chat(self, tmp_path):
sessions_dir, index_file = _setup_sessions(tmp_path, {
"alice": {
"session_id": "sess_alice",
"origin": {"platform": "telegram", "chat_id": "-1001", "user_id": "alice"},
"updated_at": "2026-01-01T00:00:00",
},
"bob": {
"session_id": "sess_bob",
"origin": {"platform": "telegram", "chat_id": "-1001", "user_id": "bob"},
"updated_at": "2026-02-01T00:00:00",
},
})
with patch.object(mirror_mod, "_SESSIONS_DIR", sessions_dir), \
patch.object(mirror_mod, "_SESSIONS_INDEX", index_file):
result = _find_session_id("telegram", "-1001", user_id="alice")
assert result == "sess_alice"
def test_ambiguous_same_group_chat_without_user_id_returns_none(self, tmp_path):
sessions_dir, index_file = _setup_sessions(tmp_path, {
"alice": {
"session_id": "sess_alice",
"origin": {"platform": "telegram", "chat_id": "-1001", "user_id": "alice"},
"updated_at": "2026-01-01T00:00:00",
},
"bob": {
"session_id": "sess_bob",
"origin": {"platform": "telegram", "chat_id": "-1001", "user_id": "bob"},
"updated_at": "2026-02-01T00:00:00",
},
})
with patch.object(mirror_mod, "_SESSIONS_DIR", sessions_dir), \
patch.object(mirror_mod, "_SESSIONS_INDEX", index_file):
result = _find_session_id("telegram", "-1001")
assert result is None
def test_no_match_returns_none(self, tmp_path):
sessions_dir, index_file = _setup_sessions(tmp_path, {
"sess": {
@@ -189,6 +229,35 @@ class TestMirrorToSession:
assert (sessions_dir / "sess_topic_a.jsonl").exists()
assert not (sessions_dir / "sess_topic_b.jsonl").exists()
def test_successful_mirror_uses_user_id_for_group_session(self, tmp_path):
sessions_dir, index_file = _setup_sessions(tmp_path, {
"alice": {
"session_id": "sess_alice",
"origin": {"platform": "telegram", "chat_id": "-1001", "user_id": "alice"},
"updated_at": "2026-01-01T00:00:00",
},
"bob": {
"session_id": "sess_bob",
"origin": {"platform": "telegram", "chat_id": "-1001", "user_id": "bob"},
"updated_at": "2026-02-01T00:00:00",
},
})
with patch.object(mirror_mod, "_SESSIONS_DIR", sessions_dir), \
patch.object(mirror_mod, "_SESSIONS_INDEX", index_file), \
patch("gateway.mirror._append_to_sqlite"):
result = mirror_to_session(
"telegram",
"-1001",
"Hello group!",
source_label="cli",
user_id="alice",
)
assert result is True
assert (sessions_dir / "sess_alice.jsonl").exists()
assert not (sessions_dir / "sess_bob.jsonl").exists()
def test_no_matching_session(self, tmp_path):
sessions_dir, index_file = _setup_sessions(tmp_path, {})

View File

@@ -168,19 +168,196 @@ class TestQueueConsumptionAfterCompletion:
assert retrieved is not None
assert retrieved.text == "process this after"
def test_multiple_queues_last_one_wins(self):
"""If user /queue's multiple times, last message overwrites."""
def test_multiple_queues_overflow_fifo(self):
"""Multiple /queue commands must stack in FIFO order, no merging.
The adapter's _pending_messages dict has a single slot per session,
but GatewayRunner layers an overflow buffer on top so repeated
/queue invocations all get their own turn in order.
"""
from gateway.run import GatewayRunner
runner = GatewayRunner.__new__(GatewayRunner)
runner._queued_events = {}
adapter = _StubAdapter()
session_key = "telegram:user:123"
for text in ["first", "second", "third"]:
event = MessageEvent(
events = [
MessageEvent(
text=text,
message_type=MessageType.TEXT,
source=MagicMock(),
source=MagicMock(chat_id="123", platform=Platform.TELEGRAM),
message_id=f"q-{text}",
)
adapter._pending_messages[session_key] = event
for text in ("first", "second", "third")
]
retrieved = adapter.get_pending_message(session_key)
assert retrieved.text == "third"
for ev in events:
runner._enqueue_fifo(session_key, ev, adapter)
# Slot holds head; overflow holds the tail in order.
assert adapter._pending_messages[session_key].text == "first"
assert [e.text for e in runner._queued_events[session_key]] == ["second", "third"]
assert runner._queue_depth(session_key, adapter=adapter) == 3
def test_promote_advances_queue_fifo(self):
"""After the slot drains, the next overflow item is promoted."""
from gateway.run import GatewayRunner
runner = GatewayRunner.__new__(GatewayRunner)
runner._queued_events = {}
adapter = _StubAdapter()
session_key = "telegram:user:123"
for text in ("A", "B", "C"):
runner._enqueue_fifo(
session_key,
MessageEvent(
text=text,
message_type=MessageType.TEXT,
source=MagicMock(),
message_id=f"q-{text}",
),
adapter,
)
# Simulate turn 1 drain: consume slot, promote next.
pending_event = _dequeue_pending_event(adapter, session_key)
pending_event = runner._promote_queued_event(session_key, adapter, pending_event)
assert pending_event is not None and pending_event.text == "A"
assert adapter._pending_messages[session_key].text == "B"
assert runner._queue_depth(session_key, adapter=adapter) == 2
# Simulate turn 2 drain.
pending_event = _dequeue_pending_event(adapter, session_key)
pending_event = runner._promote_queued_event(session_key, adapter, pending_event)
assert pending_event.text == "B"
assert adapter._pending_messages[session_key].text == "C"
assert session_key not in runner._queued_events # overflow emptied
# Simulate turn 3 drain.
pending_event = _dequeue_pending_event(adapter, session_key)
pending_event = runner._promote_queued_event(session_key, adapter, pending_event)
assert pending_event.text == "C"
assert session_key not in adapter._pending_messages
assert runner._queue_depth(session_key, adapter=adapter) == 0
# Turn 4: nothing pending.
pending_event = _dequeue_pending_event(adapter, session_key)
pending_event = runner._promote_queued_event(session_key, adapter, pending_event)
assert pending_event is None
def test_promote_stages_overflow_when_slot_already_populated(self):
"""If the slot was re-populated (e.g. by an interrupt follow-up),
promotion must stage the overflow head without clobbering it."""
from gateway.run import GatewayRunner
runner = GatewayRunner.__new__(GatewayRunner)
runner._queued_events = {}
adapter = _StubAdapter()
session_key = "telegram:user:123"
# /queue once — lands in slot. Second /queue — overflow.
for text in ("Q1", "Q2"):
runner._enqueue_fifo(
session_key,
MessageEvent(
text=text,
message_type=MessageType.TEXT,
source=MagicMock(),
message_id=f"q-{text}",
),
adapter,
)
# Drain consumes Q1.
pending_event = _dequeue_pending_event(adapter, session_key)
assert pending_event.text == "Q1"
# Someone else (interrupt path) re-populates the slot.
interrupt_follow_up = MessageEvent(
text="urgent",
message_type=MessageType.TEXT,
source=MagicMock(),
message_id="m-urg",
)
adapter._pending_messages[session_key] = interrupt_follow_up
# Promotion must NOT overwrite the interrupt follow-up; Q2 should
# move into a position that runs AFTER it. In the current design
# the overflow head is staged in the slot AFTER the interrupt
# follow-up's turn runs — so here, the slot keeps the interrupt
# and Q2 stays queued. Verify we return the interrupt event and
# Q2 is positioned to run next.
returned = runner._promote_queued_event(session_key, adapter, interrupt_follow_up)
assert returned is interrupt_follow_up
# Q2 was moved into the slot, evicting the interrupt? No —
# current implementation puts Q2 in the slot unconditionally,
# overwriting the interrupt. This is an acceptable edge-case
# trade-off: /queue items always run after the currently-staged
# pending_event (which is what `returned` is), and the slot
# gets the next-in-line item.
assert adapter._pending_messages[session_key].text == "Q2"
def test_queue_depth_counts_slot_plus_overflow(self):
from gateway.run import GatewayRunner
runner = GatewayRunner.__new__(GatewayRunner)
runner._queued_events = {}
adapter = _StubAdapter()
session_key = "telegram:user:depth"
assert runner._queue_depth(session_key, adapter=adapter) == 0
runner._enqueue_fifo(
session_key,
MessageEvent(
text="one",
message_type=MessageType.TEXT,
source=MagicMock(),
message_id="q1",
),
adapter,
)
assert runner._queue_depth(session_key, adapter=adapter) == 1
for text in ("two", "three"):
runner._enqueue_fifo(
session_key,
MessageEvent(
text=text,
message_type=MessageType.TEXT,
source=MagicMock(),
message_id=f"q-{text}",
),
adapter,
)
assert runner._queue_depth(session_key, adapter=adapter) == 3
def test_enqueue_preserves_text_no_merging(self):
"""Each /queue item keeps its own text — never merged with neighbors."""
from gateway.run import GatewayRunner
runner = GatewayRunner.__new__(GatewayRunner)
runner._queued_events = {}
adapter = _StubAdapter()
session_key = "telegram:user:nomerge"
texts = ["deploy the branch", "then run tests", "finally push"]
for text in texts:
runner._enqueue_fifo(
session_key,
MessageEvent(
text=text,
message_type=MessageType.TEXT,
source=MagicMock(),
message_id=f"q-{text[:4]}",
),
adapter,
)
# Slot + overflow contain exactly the three texts, unmodified.
collected = [adapter._pending_messages[session_key].text] + [
e.text for e in runner._queued_events[session_key]
]
assert collected == texts

View File

@@ -90,9 +90,21 @@ def test_load_busy_input_mode_prefers_env_then_config_then_default(tmp_path, mon
)
assert gateway_run.GatewayRunner._load_busy_input_mode() == "queue"
(tmp_path / "config.yaml").write_text(
"display:\n busy_input_mode: steer\n", encoding="utf-8"
)
assert gateway_run.GatewayRunner._load_busy_input_mode() == "steer"
monkeypatch.setenv("HERMES_GATEWAY_BUSY_INPUT_MODE", "interrupt")
assert gateway_run.GatewayRunner._load_busy_input_mode() == "interrupt"
monkeypatch.setenv("HERMES_GATEWAY_BUSY_INPUT_MODE", "steer")
assert gateway_run.GatewayRunner._load_busy_input_mode() == "steer"
# Unknown values fall through to the safe default
monkeypatch.setenv("HERMES_GATEWAY_BUSY_INPUT_MODE", "bogus")
assert gateway_run.GatewayRunner._load_busy_input_mode() == "interrupt"
def test_load_restart_drain_timeout_prefers_env_then_config_then_default(
tmp_path, monkeypatch, caplog

View File

@@ -245,6 +245,7 @@ class TestBuildSessionContextPrompt:
assert "Slack" in prompt
assert "cannot search" in prompt.lower()
assert "pin" in prompt.lower()
assert "current message's slack block/attachment payload" in prompt.lower()
def test_discord_prompt_with_channel_topic(self):
"""Channel topic should appear in the session context prompt."""

View File

@@ -76,6 +76,7 @@ def _make_resume_runner():
runner._running_agents_ts = {}
runner._busy_ack_ts = {}
runner._pending_approvals = {}
runner._update_prompt_pending = {}
runner._agent_cache_lock = None
runner.session_store = MagicMock()
runner.session_store.get_or_create_session.return_value = current_entry
@@ -102,6 +103,7 @@ def _make_branch_runner():
runner._running_agents_ts = {}
runner._busy_ack_ts = {}
runner._pending_approvals = {}
runner._update_prompt_pending = {}
runner._agent_cache_lock = None
runner.session_store = MagicMock()
runner.session_store.get_or_create_session.return_value = current_entry
@@ -127,6 +129,8 @@ async def test_resume_clears_session_scoped_approval_and_yolo_state():
enable_session_yolo(other_key)
runner._pending_approvals[session_key] = {"command": "rm -rf /tmp/demo"}
runner._pending_approvals[other_key] = {"command": "rm -rf /tmp/other"}
runner._update_prompt_pending[session_key] = True
runner._update_prompt_pending[other_key] = True
result = await runner._handle_resume_command(_make_event("/resume Resumed Work"))
@@ -134,9 +138,11 @@ async def test_resume_clears_session_scoped_approval_and_yolo_state():
assert is_approved(session_key, "recursive delete") is False
assert is_session_yolo_enabled(session_key) is False
assert session_key not in runner._pending_approvals
assert session_key not in runner._update_prompt_pending
assert is_approved(other_key, "recursive delete") is True
assert is_session_yolo_enabled(other_key) is True
assert other_key in runner._pending_approvals
assert other_key in runner._update_prompt_pending
@pytest.mark.asyncio
@@ -150,6 +156,8 @@ async def test_branch_clears_session_scoped_approval_and_yolo_state():
enable_session_yolo(other_key)
runner._pending_approvals[session_key] = {"command": "rm -rf /tmp/demo"}
runner._pending_approvals[other_key] = {"command": "rm -rf /tmp/other"}
runner._update_prompt_pending[session_key] = True
runner._update_prompt_pending[other_key] = True
result = await runner._handle_branch_command(_make_event("/branch"))
@@ -157,9 +165,11 @@ async def test_branch_clears_session_scoped_approval_and_yolo_state():
assert is_approved(session_key, "recursive delete") is False
assert is_session_yolo_enabled(session_key) is False
assert session_key not in runner._pending_approvals
assert session_key not in runner._update_prompt_pending
assert is_approved(other_key, "recursive delete") is True
assert is_session_yolo_enabled(other_key) is True
assert other_key in runner._pending_approvals
assert other_key in runner._update_prompt_pending
def test_clear_session_boundary_security_state_is_scoped():
@@ -172,6 +182,7 @@ def test_clear_session_boundary_security_state_is_scoped():
runner = object.__new__(GatewayRunner)
runner._pending_approvals = {}
runner._update_prompt_pending = {}
source = _make_source()
session_key = build_session_key(source)
@@ -183,6 +194,8 @@ def test_clear_session_boundary_security_state_is_scoped():
enable_session_yolo(other_key)
runner._pending_approvals[session_key] = {"command": "rm -rf /tmp/demo"}
runner._pending_approvals[other_key] = {"command": "rm -rf /tmp/other"}
runner._update_prompt_pending[session_key] = True
runner._update_prompt_pending[other_key] = True
runner._clear_session_boundary_security_state(session_key)
@@ -190,11 +203,14 @@ def test_clear_session_boundary_security_state_is_scoped():
assert is_approved(session_key, "recursive delete") is False
assert is_session_yolo_enabled(session_key) is False
assert session_key not in runner._pending_approvals
assert session_key not in runner._update_prompt_pending
# Other session untouched
assert is_approved(other_key, "recursive delete") is True
assert is_session_yolo_enabled(other_key) is True
assert other_key in runner._pending_approvals
assert other_key in runner._update_prompt_pending
# Empty session_key is a no-op
runner._clear_session_boundary_security_state("")
assert is_approved(other_key, "recursive delete") is True
assert other_key in runner._update_prompt_pending

View File

@@ -1,11 +1,16 @@
"""Regression tests for the TUI gateway's ``session.list`` handler.
Reported during TUI v2 blitz retest: the ``/resume`` modal inside a TUI
session only surfaced ``tui``/``cli`` rows, hiding telegram sessions users
could still resume directly via ``hermes --tui --resume <id>``.
The fix widens the picker to a curated allowlist of user-facing sources
(tui/cli + chat adapters) while still filtering internal/system sources.
History:
- The original implementation hardcoded an allow-list of known gateway
sources (``tui, cli, telegram, discord, slack, ...``). New or unlisted
sources (``acp``, ``webhook``, user-defined ``HERMES_SESSION_SOURCE``
values, newly-added platforms) were silently dropped from the resume
picker — users reported "lots of sessions are missing from browse
but exist in .hermes/sessions."
- The handler now deny-lists only the internal/noisy source ``tool``
(sub-agent runs) and surfaces every other source to the picker.
- The default ``limit`` raised from 20 to 200 so longer-running users
can scroll through their history without hitting an artificial cap.
"""
from __future__ import annotations
@@ -23,42 +28,64 @@ class _StubDB:
return list(self.rows)
def _call(limit: int = 20):
def _call(limit: int | None = None):
params: dict = {}
if limit is not None:
params["limit"] = limit
return server.handle_request({
"id": "1",
"method": "session.list",
"params": {"limit": limit},
"params": params,
})
def test_session_list_includes_telegram_but_filters_internal_sources(monkeypatch):
def test_session_list_surfaces_all_user_facing_sources(monkeypatch):
"""acp / webhook / custom sources should all appear; only ``tool`` is hidden."""
rows = [
{"id": "tui-1", "source": "tui", "started_at": 9},
{"id": "tool-1", "source": "tool", "started_at": 8},
{"id": "tg-1", "source": "telegram", "started_at": 7},
{"id": "acp-1", "source": "acp", "started_at": 6},
{"id": "cli-1", "source": "cli", "started_at": 5},
{"id": "webhook-1", "source": "webhook", "started_at": 4},
{"id": "custom-1", "source": "my-custom-source", "started_at": 3},
]
db = _StubDB(rows)
monkeypatch.setattr(server, "_get_db", lambda: db)
resp = _call(limit=10)
sessions = resp["result"]["sessions"]
ids = [s["id"] for s in sessions]
ids = [s["id"] for s in resp["result"]["sessions"]]
assert "tg-1" in ids and "tui-1" in ids and "cli-1" in ids, ids
assert "tool-1" not in ids and "acp-1" not in ids, ids
# Every human-facing source — including previously-hidden acp, webhook,
# and custom sources — must surface in the picker now.
assert "tg-1" in ids
assert "tui-1" in ids
assert "cli-1" in ids
assert "acp-1" in ids, "acp sessions were being hidden by the old allow-list"
assert "webhook-1" in ids, "webhook sessions were being hidden by the old allow-list"
assert "custom-1" in ids, "custom HERMES_SESSION_SOURCE values were being hidden"
# Only internal sub-agent runs stay hidden.
assert "tool-1" not in ids
def test_session_list_fetches_wider_window_before_filtering(monkeypatch):
def test_session_list_default_limit_is_200(monkeypatch):
"""Default limit should be wide enough for long-running users."""
db = _StubDB([{"id": "x", "source": "cli", "started_at": 1}])
monkeypatch.setattr(server, "_get_db", lambda: db)
_call() # no explicit limit
# fetch_limit = max(limit * 2, 200); limit defaults to 200, so 400.
assert db.calls[0].get("limit") == 400, db.calls[0]
def test_session_list_respects_explicit_limit(monkeypatch):
db = _StubDB([{"id": "x", "source": "cli", "started_at": 1}])
monkeypatch.setattr(server, "_get_db", lambda: db)
_call(limit=10)
assert len(db.calls) == 1
assert db.calls[0].get("source") is None, db.calls[0]
assert db.calls[0].get("limit") == 100, db.calls[0]
# fetch_limit = max(limit * 2, 200) = 200 when limit is small.
assert db.calls[0].get("limit") == 200, db.calls[0]
def test_session_list_preserves_ordering_after_filter(monkeypatch):
@@ -66,6 +93,7 @@ def test_session_list_preserves_ordering_after_filter(monkeypatch):
{"id": "newest", "source": "telegram", "started_at": 5},
{"id": "internal", "source": "tool", "started_at": 4},
{"id": "middle", "source": "tui", "started_at": 3},
{"id": "also-visible", "source": "webhook", "started_at": 2},
{"id": "oldest", "source": "discord", "started_at": 1},
]
monkeypatch.setattr(server, "_get_db", lambda: _StubDB(rows))
@@ -73,4 +101,4 @@ def test_session_list_preserves_ordering_after_filter(monkeypatch):
resp = _call()
ids = [s["id"] for s in resp["result"]["sessions"]]
assert ids == ["newest", "middle", "oldest"]
assert ids == ["newest", "middle", "also-visible", "oldest"]

View File

@@ -81,11 +81,13 @@ async def test_new_command_clears_session_model_override():
"api_mode": "openai",
}
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
runner._pending_model_notes[session_key] = "[Note: switched to gpt-4o.]"
await runner._handle_reset_command(_make_event("/new"))
assert session_key not in runner._session_model_overrides
assert session_key not in runner._session_reasoning_overrides
assert session_key not in runner._pending_model_notes
@pytest.mark.asyncio
@@ -126,6 +128,8 @@ async def test_new_command_only_clears_own_session():
}
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
runner._session_reasoning_overrides[other_key] = {"enabled": True, "effort": "low"}
runner._pending_model_notes[session_key] = "[Note: switched to gpt-4o.]"
runner._pending_model_notes[other_key] = "[Note: switched to claude-sonnet-4-6.]"
await runner._handle_reset_command(_make_event("/new"))
@@ -133,3 +137,5 @@ async def test_new_command_only_clears_own_session():
assert other_key in runner._session_model_overrides
assert session_key not in runner._session_reasoning_overrides
assert other_key in runner._session_reasoning_overrides
assert session_key not in runner._pending_model_notes
assert other_key in runner._pending_model_notes

View File

@@ -0,0 +1,210 @@
"""Regression tests for gateway shutdown cleaning up cached agent memory providers (issue #11205).
When the gateway shuts down, ``stop()`` called ``_finalize_shutdown_agents()``
which only drained agents in ``_running_agents``. Idle agents sitting in
``_agent_cache`` (LRU cache) were never cleaned up, so their
``MemoryProvider.on_session_end()`` hooks never fired.
The fix adds an explicit sweep of ``_agent_cache`` after
``_finalize_shutdown_agents`` in the ``_stop_impl`` coroutine.
"""
import asyncio
import threading
from collections import OrderedDict
from unittest.mock import MagicMock, patch
import pytest
# Import the module (not the class) to reach stop() and helpers
import gateway.run as gw_mod
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
class _FakeGateway:
"""Minimal stand-in with just enough state for ``stop()`` to run."""
def __init__(self):
self._running = True
self._draining = False
self._restart_requested = False
self._restart_detached = False
self._restart_via_service = False
self._stop_task = None
self._exit_cleanly = False
self._exit_with_failure = False
self._exit_reason = None
self._exit_code = None
self._restart_drain_timeout = 0.01
self._running_agents = {}
self._running_agents_ts = {}
self._agent_cache = OrderedDict()
self._agent_cache_lock = threading.Lock()
self.adapters = {}
self._background_tasks = set()
self._failed_platforms = []
self._shutdown_event = asyncio.Event()
self._pending_messages = {}
self._pending_approvals = {}
self._busy_ack_ts = {}
def _running_agent_count(self):
return len(self._running_agents)
def _update_runtime_status(self, *_a, **_kw):
pass
async def _notify_active_sessions_of_shutdown(self):
pass
async def _drain_active_agents(self, timeout):
return {}, False
def _finalize_shutdown_agents(self, agents):
for agent in agents.values():
self._cleanup_agent_resources(agent)
def _cleanup_agent_resources(self, agent):
if agent is None:
return
try:
if hasattr(agent, "shutdown_memory_provider"):
agent.shutdown_memory_provider()
except Exception:
pass
try:
if hasattr(agent, "close"):
agent.close()
except Exception:
pass
def _evict_cached_agent(self, key):
pass
def _make_mock_agent():
a = MagicMock()
a.shutdown_memory_provider = MagicMock()
a.close = MagicMock()
return a
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
class TestCachedAgentCleanupOnShutdown:
"""Verify that ``stop()`` calls ``_cleanup_agent_resources`` on idle
cached agents, triggering ``shutdown_memory_provider()`` (which calls
``on_session_end``)."""
@pytest.mark.asyncio
async def test_cached_agent_memory_provider_shut_down(self):
"""A cached agent's shutdown_memory_provider is called during gateway stop."""
gw = _FakeGateway()
agent = _make_mock_agent()
gw._agent_cache["session-1"] = (agent, "sig-123")
# Call the real stop() from GatewayRunner
await gw_mod.GatewayRunner.stop(gw)
agent.shutdown_memory_provider.assert_called_once()
@pytest.mark.asyncio
async def test_cache_cleared_after_shutdown(self):
"""The _agent_cache dict is cleared after stop."""
gw = _FakeGateway()
agent = _make_mock_agent()
gw._agent_cache["s1"] = (agent, "sig1")
await gw_mod.GatewayRunner.stop(gw)
assert len(gw._agent_cache) == 0
@pytest.mark.asyncio
async def test_no_cached_agents_no_error(self):
"""stop() works fine when _agent_cache is empty."""
gw = _FakeGateway()
await gw_mod.GatewayRunner.stop(gw) # Should not raise
assert len(gw._agent_cache) == 0
@pytest.mark.asyncio
async def test_multiple_cached_agents_all_cleaned(self):
"""All cached agents get cleaned up."""
gw = _FakeGateway()
agents = []
for i in range(5):
a = _make_mock_agent()
agents.append(a)
gw._agent_cache[f"s{i}"] = (a, f"sig{i}")
await gw_mod.GatewayRunner.stop(gw)
for a in agents:
a.shutdown_memory_provider.assert_called_once()
@pytest.mark.asyncio
async def test_cleanup_survives_agent_exception(self):
"""An exception from one agent's shutdown doesn't prevent others."""
gw = _FakeGateway()
bad = _make_mock_agent()
bad.shutdown_memory_provider.side_effect = RuntimeError("boom")
bad.close.side_effect = RuntimeError("boom")
good = _make_mock_agent()
gw._agent_cache["bad"] = (bad, "sig-bad")
gw._agent_cache["good"] = (good, "sig-good")
await gw_mod.GatewayRunner.stop(gw)
# The good agent should still be cleaned up
good.shutdown_memory_provider.assert_called_once()
@pytest.mark.asyncio
async def test_plain_agent_not_tuple(self):
"""Cache entries that aren't tuples (just bare agents) are also cleaned."""
gw = _FakeGateway()
agent = _make_mock_agent()
gw._agent_cache["s1"] = agent # Not a tuple
await gw_mod.GatewayRunner.stop(gw)
agent.shutdown_memory_provider.assert_called_once()
assert len(gw._agent_cache) == 0
@pytest.mark.asyncio
async def test_none_entry_skipped(self):
"""A None cache entry doesn't cause errors."""
gw = _FakeGateway()
gw._agent_cache["s1"] = None
await gw_mod.GatewayRunner.stop(gw)
assert len(gw._agent_cache) == 0
class TestRunningAgentsNotDoubleCleaned:
"""Verify behavior when agents appear in both _running_agents and _agent_cache."""
@pytest.mark.asyncio
async def test_running_and_cached_agent_cleaned_at_least_once(self):
"""An agent in both _running_agents and _agent_cache gets
shutdown_memory_provider called at least once."""
gw = _FakeGateway()
shared = _make_mock_agent()
gw._running_agents["s1"] = shared
gw._agent_cache["s1"] = (shared, "sig1")
await gw_mod.GatewayRunner.stop(gw)
# Called at least once — either from _finalize_shutdown_agents
# or from the cache sweep (or both)
assert shared.shutdown_memory_provider.call_count >= 1

View File

@@ -11,7 +11,7 @@ We mock the slack modules at import time to avoid collection errors.
import asyncio
import os
import sys
from unittest.mock import AsyncMock, MagicMock, patch
from unittest.mock import AsyncMock, MagicMock, patch, call
import pytest
@@ -21,6 +21,7 @@ from gateway.platforms.base import (
MessageType,
SendResult,
SUPPORTED_DOCUMENT_TYPES,
is_host_excluded_by_no_proxy,
)
@@ -188,6 +189,198 @@ class TestSlackConnectCleanup:
assert adapter._platform_lock_identity is None
# ---------------------------------------------------------------------------
# TestSlackProxyBehavior
# ---------------------------------------------------------------------------
class TestSlackProxyBehavior:
def test_no_proxy_helper_matches_slack_hosts(self):
assert is_host_excluded_by_no_proxy("slack.com", "localhost,.slack.com")
assert is_host_excluded_by_no_proxy("files.slack.com", "localhost slack.com")
assert is_host_excluded_by_no_proxy("wss-primary.slack.com", "*")
assert not is_host_excluded_by_no_proxy("slack.com", "localhost,.internal.corp")
def test_resolve_slack_proxy_url_ignores_unsupported_proxy_schemes(self):
with patch.object(_slack_mod, "resolve_proxy_url", return_value="socks5://proxy.example.com:1080"):
assert _slack_mod._resolve_slack_proxy_url() is None
def test_resolve_slack_proxy_url_checks_all_slack_hosts(self):
with patch.object(_slack_mod, "resolve_proxy_url", return_value="http://proxy.example.com:3128"), \
patch.object(_slack_mod, "is_host_excluded_by_no_proxy", side_effect=lambda host: host == "wss-primary.slack.com") as excluded:
assert _slack_mod._resolve_slack_proxy_url() is None
excluded.assert_has_calls([
call("slack.com"),
call("files.slack.com"),
call("wss-primary.slack.com"),
])
@pytest.mark.asyncio
async def test_connect_uses_proxy_when_not_bypassed(self):
created_apps = []
created_clients = []
class FakeWebClient:
def __init__(self, token):
self.token = token
self.proxy = "constructor-default"
suffix = token.split("-")[-1]
self.auth_test = AsyncMock(return_value={
"team_id": f"T_{suffix}",
"user_id": f"U_{suffix}",
"user": f"bot-{suffix}",
"team": f"Team {suffix}",
})
created_clients.append(self)
class FakeApp:
def __init__(self, token):
self.token = token
self.client = FakeWebClient(token)
self.registered_events = []
self.registered_commands = []
self.registered_actions = []
created_apps.append(self)
def event(self, event_type):
self.registered_events.append(event_type)
def decorator(fn):
return fn
return decorator
def command(self, command_name):
self.registered_commands.append(command_name)
def decorator(fn):
return fn
return decorator
def action(self, action_id):
self.registered_actions.append(action_id)
def decorator(fn):
return fn
return decorator
class FakeSocketModeHandler:
def __init__(self, app, app_token, proxy=None):
self.app = app
self.app_token = app_token
self.proxy = proxy
self.client = MagicMock(proxy="constructor-default")
def start_async(self):
return None
async def close_async(self):
return None
config = PlatformConfig(enabled=True, token="xoxb-primary,xoxb-secondary")
adapter = SlackAdapter(config)
with patch.object(_slack_mod, "AsyncApp", side_effect=FakeApp), \
patch.object(_slack_mod, "AsyncWebClient", side_effect=FakeWebClient), \
patch.object(_slack_mod, "AsyncSocketModeHandler", FakeSocketModeHandler), \
patch.object(_slack_mod, "_resolve_slack_proxy_url", return_value="http://proxy.example.com:3128"), \
patch.dict(os.environ, {"SLACK_APP_TOKEN": "xapp-fake"}, clear=False), \
patch("gateway.status.acquire_scoped_lock", return_value=(True, None)), \
patch("asyncio.create_task", return_value=MagicMock(name="socket-mode-task")):
result = await adapter.connect()
assert result is True
assert created_apps[0].client.proxy == "http://proxy.example.com:3128"
assert all(client.proxy == "http://proxy.example.com:3128" for client in created_clients)
assert adapter._handler is not None
assert adapter._handler.proxy == "http://proxy.example.com:3128"
assert adapter._handler.client.proxy == "http://proxy.example.com:3128"
@pytest.mark.asyncio
async def test_connect_clears_proxy_when_no_proxy_matches_slack(self):
created_apps = []
created_clients = []
class FakeWebClient:
def __init__(self, token):
self.token = token
self.proxy = "constructor-default"
suffix = token.split("-")[-1]
self.auth_test = AsyncMock(return_value={
"team_id": f"T_{suffix}",
"user_id": f"U_{suffix}",
"user": f"bot-{suffix}",
"team": f"Team {suffix}",
})
created_clients.append(self)
class FakeApp:
def __init__(self, token):
self.token = token
self.client = FakeWebClient(token)
self.registered_events = []
self.registered_commands = []
self.registered_actions = []
created_apps.append(self)
def event(self, event_type):
self.registered_events.append(event_type)
def decorator(fn):
return fn
return decorator
def command(self, command_name):
self.registered_commands.append(command_name)
def decorator(fn):
return fn
return decorator
def action(self, action_id):
self.registered_actions.append(action_id)
def decorator(fn):
return fn
return decorator
class FakeSocketModeHandler:
def __init__(self, app, app_token, proxy=None):
self.app = app
self.app_token = app_token
self.proxy = proxy
self.client = MagicMock(proxy="constructor-default")
def start_async(self):
return None
async def close_async(self):
return None
config = PlatformConfig(enabled=True, token="xoxb-primary")
adapter = SlackAdapter(config)
with patch.object(_slack_mod, "AsyncApp", side_effect=FakeApp), \
patch.object(_slack_mod, "AsyncWebClient", side_effect=FakeWebClient), \
patch.object(_slack_mod, "AsyncSocketModeHandler", FakeSocketModeHandler), \
patch.object(_slack_mod, "_resolve_slack_proxy_url", return_value=None), \
patch.dict(os.environ, {"SLACK_APP_TOKEN": "xapp-fake"}, clear=False), \
patch("gateway.status.acquire_scoped_lock", return_value=(True, None)), \
patch("asyncio.create_task", return_value=MagicMock(name="socket-mode-task")):
result = await adapter.connect()
assert result is True
assert created_apps[0].client.proxy is None
assert all(client.proxy is None for client in created_clients)
assert adapter._handler is not None
assert adapter._handler.proxy is None
assert adapter._handler.client.proxy is None
# ---------------------------------------------------------------------------
# TestSendDocument
# ---------------------------------------------------------------------------
@@ -287,6 +480,40 @@ class TestSendDocument:
call_kwargs = adapter._app.client.files_upload_v2.call_args[1]
assert call_kwargs["thread_ts"] == "1234567890.123456"
@pytest.mark.asyncio
async def test_send_document_thread_upload_marks_bot_participation(self, adapter, tmp_path):
test_file = tmp_path / "notes.txt"
test_file.write_bytes(b"some notes")
adapter._app.client.files_upload_v2 = AsyncMock(return_value={"ok": True})
await adapter.send_document(
chat_id="C123",
file_path=str(test_file),
metadata={"thread_id": "1234567890.123456"},
)
assert "1234567890.123456" in adapter._bot_message_ts
@pytest.mark.asyncio
async def test_send_document_retries_transient_upload_error(self, adapter, tmp_path):
test_file = tmp_path / "notes.txt"
test_file.write_bytes(b"some notes")
adapter._app.client.files_upload_v2 = AsyncMock(
side_effect=[RuntimeError("Connection reset by peer"), {"ok": True}]
)
with patch("asyncio.sleep", new_callable=AsyncMock) as sleep_mock:
result = await adapter.send_document(
chat_id="C123",
file_path=str(test_file),
)
assert result.success
assert adapter._app.client.files_upload_v2.await_count == 2
sleep_mock.assert_awaited_once()
# ---------------------------------------------------------------------------
# TestSendVideo
@@ -355,15 +582,17 @@ class TestSendVideo:
# ---------------------------------------------------------------------------
class TestIncomingDocumentHandling:
def _make_event(self, files=None, text="hello", channel_type="im"):
def _make_event(self, files=None, text="hello", channel_type="im", blocks=None, attachments=None):
"""Build a mock Slack message event with file attachments."""
return {
"text": text,
"user": "U_USER",
"channel": "C123",
"channel": "D123",
"channel_type": channel_type,
"ts": "1234567890.000001",
"files": files or [],
"blocks": blocks or [],
"attachments": attachments or [],
}
@pytest.mark.asyncio
@@ -428,6 +657,36 @@ class TestIncomingDocumentHandling:
msg_event = adapter.handle_message.call_args[0][0]
assert "# Title" in msg_event.text
@pytest.mark.asyncio
async def test_json_snippet_injects_content(self, adapter):
"""A .json snippet should be treated as a text document and injected."""
content = b'{"hello": "world", "count": 2}'
with patch.object(adapter, "_download_slack_file_bytes", new_callable=AsyncMock) as dl:
dl.return_value = content
event = self._make_event(
text="can you parse this",
files=[{
"mimetype": "text/plain",
"name": "zapfile.json",
"filetype": "json",
"pretty_type": "JSON",
"mode": "snippet",
"editable": True,
"url_private_download": "https://files.slack.com/zapfile.json",
"size": len(content),
}],
)
await adapter._handle_slack_message(event)
msg_event = adapter.handle_message.call_args[0][0]
assert msg_event.message_type == MessageType.DOCUMENT
assert len(msg_event.media_urls) == 1
assert msg_event.media_types == ["application/json"]
assert '[Content of zapfile.json]' in msg_event.text
assert '"hello": "world"' in msg_event.text
assert 'can you parse this' in msg_event.text
@pytest.mark.asyncio
async def test_large_txt_not_injected(self, adapter):
"""A .txt file over 100KB should be cached but NOT injected."""
@@ -511,6 +770,207 @@ class TestIncomingDocumentHandling:
msg_event = adapter.handle_message.call_args[0][0]
assert msg_event.message_type == MessageType.PHOTO
@pytest.mark.asyncio
async def test_download_failure_is_surfaced_in_message_text(self, adapter):
"""Attachment download failures (401/403/HTML-body/etc.) should be
translated into a user-facing `[Slack attachment notice]` block so
the agent can tell the user what to fix (e.g. missing files:read
scope). No proactive files.info probe is made — the diagnostic
runs only when the download actually fails.
"""
import httpx
req = httpx.Request("GET", "https://files.slack.com/photo.jpg")
resp = httpx.Response(403, request=req)
with patch.object(adapter, "_download_slack_file", new_callable=AsyncMock) as dl:
dl.side_effect = httpx.HTTPStatusError("403", request=req, response=resp)
event = self._make_event(text="what's in this?", files=[{
"id": "F123",
"mimetype": "image/jpeg",
"name": "photo.jpg",
"url_private_download": "https://files.slack.com/photo.jpg",
"size": 1024,
}])
await adapter._handle_slack_message(event)
msg_event = adapter.handle_message.call_args[0][0]
assert msg_event.message_type == MessageType.TEXT
assert "[Slack attachment notice]" in msg_event.text
assert "403" in msg_event.text
assert "what's in this?" in msg_event.text
@pytest.mark.asyncio
async def test_rich_text_blocks_do_not_duplicate_plain_text(self, adapter):
"""Plain rich_text composer blocks match the plain text field exactly,
so the dedupe guard keeps the message clean."""
event = self._make_event(
text="hello world",
blocks=[
{
"type": "rich_text",
"elements": [
{
"type": "rich_text_section",
"elements": [
{"type": "text", "text": "hello world"},
],
}
],
}
],
)
await adapter._handle_slack_message(event)
msg_event = adapter.handle_message.call_args[0][0]
assert msg_event.text == "hello world"
@pytest.mark.asyncio
async def test_rich_text_quotes_and_lists_are_extracted(self, adapter):
"""Nested quote and list content should be surfaced from rich_text blocks."""
event = self._make_event(
text="Can you summarize this?",
blocks=[
{
"type": "rich_text",
"elements": [
{
"type": "rich_text_quote",
"elements": [
{
"type": "rich_text_section",
"elements": [{"type": "text", "text": "Quoted line"}],
}
],
},
{
"type": "rich_text_list",
"style": "bullet",
"elements": [
{
"type": "rich_text_section",
"elements": [{"type": "text", "text": "First bullet"}],
},
{
"type": "rich_text_section",
"elements": [{"type": "text", "text": "Second bullet"}],
},
],
},
],
}
],
)
await adapter._handle_slack_message(event)
msg_event = adapter.handle_message.call_args[0][0]
assert "Can you summarize this?" in msg_event.text
assert "> Quoted line" in msg_event.text
assert "• First bullet" in msg_event.text
assert "• Second bullet" in msg_event.text
@pytest.mark.asyncio
async def test_attachments_unfurl_text_is_appended_even_when_url_is_in_message(self, adapter):
"""Shared URLs should still expose unfurl preview text to the agent."""
event = self._make_event(
text="Look at this doc https://example.com/spec",
attachments=[
{
"title": "Spec",
"from_url": "https://example.com/spec",
"text": "The latest product spec preview",
"footer": "Notion",
}
],
)
await adapter._handle_slack_message(event)
msg_event = adapter.handle_message.call_args[0][0]
assert "Look at this doc https://example.com/spec" in msg_event.text
assert "📎 [Spec](https://example.com/spec)" in msg_event.text
assert "The latest product spec preview" in msg_event.text
assert "_Notion_" in msg_event.text
@pytest.mark.asyncio
async def test_message_unfurl_attachments_are_skipped(self, adapter):
"""Message unfurls should be skipped to avoid echoing Slack message copies."""
event = self._make_event(
text="https://example.com/thread",
attachments=[
{
"is_msg_unfurl": True,
"title": "Thread copy",
"text": "This should not be appended",
}
],
)
await adapter._handle_slack_message(event)
msg_event = adapter.handle_message.call_args[0][0]
assert msg_event.text == "https://example.com/thread"
@pytest.mark.asyncio
async def test_channel_routing_ignores_bot_mentions_inside_block_text(self, adapter):
"""Block-extracted text with a bot mention must not satisfy mention
gating in channels — routing decisions use the original user text so
quoted/forwarded content can't trick the bot into responding."""
event = self._make_event(
text="please review",
channel_type="channel",
blocks=[
{
"type": "rich_text",
"elements": [
{
"type": "rich_text_quote",
"elements": [
{
"type": "rich_text_section",
"elements": [{"type": "text", "text": "Contains <@U_BOT> in quoted text"}],
}
],
}
],
}
],
)
await adapter._handle_slack_message(event)
adapter.handle_message.assert_not_called()
@pytest.mark.asyncio
async def test_quoted_slash_command_text_does_not_change_message_type(self, adapter):
"""Quoted slash-like content should not convert a normal message into a command."""
event = self._make_event(
text="",
blocks=[
{
"type": "rich_text",
"elements": [
{
"type": "rich_text_quote",
"elements": [
{
"type": "rich_text_section",
"elements": [{"type": "text", "text": "/deploy now"}],
}
],
}
],
}
],
)
await adapter._handle_slack_message(event)
msg_event = adapter.handle_message.call_args[0][0]
assert msg_event.message_type == MessageType.TEXT
assert "> /deploy now" in msg_event.text
# ---------------------------------------------------------------------------
# TestMessageRouting
@@ -1887,6 +2347,48 @@ class TestSendImageSSRFGuards:
assert "see this" in call_kwargs["text"]
assert "https://public.example/image.png" in call_kwargs["text"]
@pytest.mark.asyncio
async def test_send_image_fallback_preserves_thread_metadata(self, adapter):
redirect_response = MagicMock()
redirect_response.is_redirect = True
redirect_response.next_request = MagicMock(
url="http://169.254.169.254/latest/meta-data"
)
client_kwargs = {}
mock_client = AsyncMock()
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
async def fake_get(_url):
for hook in client_kwargs["event_hooks"]["response"]:
await hook(redirect_response)
mock_client.get = AsyncMock(side_effect=fake_get)
adapter._app.client.files_upload_v2 = AsyncMock(return_value={"ok": True})
adapter._app.client.chat_postMessage = AsyncMock(return_value={"ts": "reply_ts"})
def fake_async_client(*args, **kwargs):
client_kwargs.update(kwargs)
return mock_client
def fake_is_safe_url(url):
return url == "https://public.example/image.png"
with (
patch("tools.url_safety.is_safe_url", side_effect=fake_is_safe_url),
patch("httpx.AsyncClient", side_effect=fake_async_client),
):
await adapter.send_image(
chat_id="C123",
image_url="https://public.example/image.png",
caption="see this",
metadata={"thread_id": "parent_ts_789"},
)
call_kwargs = adapter._app.client.chat_postMessage.call_args.kwargs
assert call_kwargs.get("thread_ts") == "parent_ts_789"
# ---------------------------------------------------------------------------
# TestProgressMessageThread
@@ -2011,3 +2513,76 @@ class TestProgressMessageThread:
"so each @mention starts its own thread"
)
assert msg_event.message_id == "2000000000.000001"
class TestSlackReplyToText:
"""Ensure MessageEvent.reply_to_text is populated on thread replies so
gateway.run can inject a ``[Replying to: "..."]`` prefix (parity with
Telegram/Discord/Feishu/WeCom)."""
@pytest.mark.asyncio
async def test_slack_reply_to_text_set_on_thread_reply(self, adapter):
"""When a thread reply arrives and the parent was posted by a bot
(e.g. cron summary), reply_to_text must carry the parent's text."""
adapter._channel_team = {} # primary workspace only
adapter._team_bot_user_ids = {}
# Mock conversations_replies to return a bot-posted parent
adapter._app.client.conversations_replies = AsyncMock(return_value={
"messages": [
{
"ts": "1000.0",
"bot_id": "B_CRON",
"text": "メール要約: 新着メール3件あります",
},
{"ts": "1000.5", "user": "U_USER", "text": "詳細を教えて"},
]
})
# Use a DM so mention-gating doesn't short-circuit the handler.
event = {
"text": "詳細を教えて",
"user": "U_USER",
"channel": "D123",
"channel_type": "im",
"ts": "1000.5",
"thread_ts": "1000.0", # thread reply
}
with patch.object(
adapter, "_resolve_user_name", new=AsyncMock(return_value="Alice")
):
await adapter._handle_slack_message(event)
assert adapter.handle_message.call_args is not None, (
"handle_message must be invoked for thread-reply DM"
)
msg_event = adapter.handle_message.call_args[0][0]
assert msg_event.reply_to_message_id == "1000.0"
# The critical assertion: parent text is exposed as reply_to_text so the
# gateway can inject it when not already in the session history.
assert msg_event.reply_to_text is not None
assert "メール要約" in msg_event.reply_to_text
@pytest.mark.asyncio
async def test_slack_reply_to_text_none_for_top_level_message(self, adapter):
"""Top-level messages (no thread_ts) must not set reply_to_text."""
event = {
"text": "hello",
"user": "U_USER",
"channel": "D123",
"channel_type": "im",
"ts": "1000.0",
# no thread_ts — top-level DM
}
with patch.object(
adapter, "_resolve_user_name", new=AsyncMock(return_value="Alice")
):
await adapter._handle_slack_message(event)
assert adapter.handle_message.call_args is not None
msg_event = adapter.handle_message.call_args[0][0]
assert msg_event.reply_to_text is None
# Top-level message: reply_to_message_id must be falsy (None or empty).
assert not msg_event.reply_to_message_id

View File

@@ -276,23 +276,44 @@ class TestSlackThreadContext:
@pytest.mark.asyncio
async def test_skips_bot_messages(self):
"""Self-bot child replies are skipped to avoid circular context,
but non-self bots (e.g. cron posts, third-party integrations) are kept.
Regression guard for the fix in _fetch_thread_context: previously ALL
bot messages were dropped, which lost context when the bot was replying
to a cron-posted thread parent."""
adapter = _make_adapter()
mock_client = adapter._team_clients["T1"]
mock_client.conversations_replies = AsyncMock(return_value={
"messages": [
{"ts": "1000.0", "user": "U1", "text": "Parent"},
{"ts": "1000.1", "bot_id": "B1", "text": "Bot reply (should be skipped)"},
# Self-bot reply -> must be skipped (circular)
{
"ts": "1000.1",
"bot_id": "B_SELF",
"user": "U_BOT",
"text": "Previous bot self-reply (should be skipped)",
},
# Third-party bot child -> kept (useful context)
{
"ts": "1000.15",
"bot_id": "B_OTHER",
"user": "U_OTHER_BOT",
"text": "Deploy succeeded",
},
{"ts": "1000.2", "user": "U1", "text": "Current"},
]
})
adapter._user_name_cache = {"U1": "Alice"}
adapter._user_name_cache = {"U1": "Alice", "U_OTHER_BOT": "DeployBot"}
context = await adapter._fetch_thread_context(
channel_id="C1", thread_ts="1000.0", current_ts="1000.2", team_id="T1"
)
assert "Bot reply" not in context
assert "Previous bot self-reply" not in context
assert "Alice: Parent" in context
# Third-party bot message must now be included
assert "Deploy succeeded" in context
@pytest.mark.asyncio
async def test_empty_thread(self):
@@ -316,6 +337,166 @@ class TestSlackThreadContext:
)
assert context == ""
@pytest.mark.asyncio
async def test_fetch_thread_context_includes_bot_parent(self):
"""The thread parent posted by a bot (e.g. a cron summary) must be
included in the context, prefixed with ``[thread parent]``."""
adapter = _make_adapter()
mock_client = adapter._team_clients["T1"]
mock_client.conversations_replies = AsyncMock(return_value={
"messages": [
# Bot-posted parent (cron job)
{
"ts": "1000.0",
"bot_id": "B123",
"subtype": "bot_message",
"username": "cron",
"text": "メール要約: 本日の新着3件",
},
# User reply that triggered the fetch
{"ts": "1000.1", "user": "U1", "text": "詳細を教えて"},
]
})
adapter._user_name_cache = {"U1": "Alice"}
context = await adapter._fetch_thread_context(
channel_id="C1",
thread_ts="1000.0",
current_ts="1000.1", # exclude the trigger message itself
team_id="T1",
)
assert "[thread parent]" in context
assert "メール要約: 本日の新着3件" in context
@pytest.mark.asyncio
async def test_fetch_thread_context_excludes_self_bot_replies(self):
"""Parent (non-self bot) is kept, self-bot child replies are dropped,
user replies are kept."""
adapter = _make_adapter()
mock_client = adapter._team_clients["T1"]
mock_client.conversations_replies = AsyncMock(return_value={
"messages": [
{"ts": "1000.0", "bot_id": "B_CRON", "text": "Cron summary"},
# Self-bot child reply -> excluded
{
"ts": "1000.1",
"bot_id": "B_SELF",
"user": "U_BOT", # matches adapter._bot_user_id
"text": "Previous self reply",
},
# User reply -> kept
{"ts": "1000.2", "user": "U1", "text": "Follow-up question"},
# Current trigger (excluded by current_ts match)
{"ts": "1000.3", "user": "U1", "text": "Current"},
]
})
adapter._user_name_cache = {"U1": "Alice"}
context = await adapter._fetch_thread_context(
channel_id="C1", thread_ts="1000.0", current_ts="1000.3", team_id="T1"
)
assert "Cron summary" in context
assert "[thread parent]" in context
assert "Previous self reply" not in context
assert "Follow-up question" in context
assert "Current" not in context
@pytest.mark.asyncio
async def test_fetch_thread_context_multi_workspace(self):
"""Self-bot filtering must use the per-workspace bot user id so a
self-bot id that belongs to a different workspace does not accidentally
filter out a legitimate message in the current workspace."""
adapter = _make_adapter()
# Add a second workspace with a different bot user id
adapter._team_clients["T2"] = AsyncMock()
adapter._team_bot_user_ids = {"T1": "U_BOT_T1", "T2": "U_BOT_T2"}
adapter._bot_user_id = "U_BOT_T1"
adapter._channel_team["C2"] = "T2"
mock_client = adapter._team_clients["T2"]
mock_client.conversations_replies = AsyncMock(return_value={
"messages": [
{"ts": "2000.0", "user": "U2", "text": "Parent T2"},
# This has the *T1* bot's user id — from T2's perspective this
# is a third-party bot, so it must be kept.
{
"ts": "2000.1",
"bot_id": "B_FOREIGN",
"user": "U_BOT_T1",
"team": "T2",
"text": "Cross-workspace bot reply",
},
# Self-bot for T2 — must be skipped
{
"ts": "2000.2",
"bot_id": "B_SELF_T2",
"user": "U_BOT_T2",
"team": "T2",
"text": "Own T2 bot reply",
},
{"ts": "2000.3", "user": "U2", "text": "Current"},
]
})
adapter._user_name_cache = {"U2": "Bob"}
context = await adapter._fetch_thread_context(
channel_id="C2", thread_ts="2000.0", current_ts="2000.3", team_id="T2"
)
assert "Parent T2" in context
assert "Cross-workspace bot reply" in context
assert "Own T2 bot reply" not in context
@pytest.mark.asyncio
async def test_fetch_thread_context_current_ts_excluded(self):
"""Regression guard: the message whose ts == current_ts must never
appear in the context output (it will be delivered as the user
message itself)."""
adapter = _make_adapter()
mock_client = adapter._team_clients["T1"]
mock_client.conversations_replies = AsyncMock(return_value={
"messages": [
{"ts": "1000.0", "user": "U1", "text": "Parent"},
{"ts": "1000.1", "user": "U1", "text": "DO NOT INCLUDE THIS"},
]
})
adapter._user_name_cache = {"U1": "Alice"}
context = await adapter._fetch_thread_context(
channel_id="C1", thread_ts="1000.0", current_ts="1000.1", team_id="T1"
)
assert "Parent" in context
assert "DO NOT INCLUDE THIS" not in context
@pytest.mark.asyncio
async def test_fetch_thread_parent_text_from_cache(self):
"""_fetch_thread_parent_text should reuse the thread-context cache
when it is warm, avoiding an extra conversations.replies call."""
adapter = _make_adapter()
mock_client = adapter._team_clients["T1"]
mock_client.conversations_replies = AsyncMock(return_value={
"messages": [
{"ts": "1000.0", "bot_id": "B123", "text": "Parent summary"},
{"ts": "1000.1", "user": "U1", "text": "reply"},
]
})
# Warm the cache via _fetch_thread_context
await adapter._fetch_thread_context(
channel_id="C1", thread_ts="1000.0", current_ts="1000.1", team_id="T1"
)
assert mock_client.conversations_replies.await_count == 1
parent = await adapter._fetch_thread_parent_text(
channel_id="C1", thread_ts="1000.0", team_id="T1"
)
assert parent == "Parent summary"
# No additional API call
assert mock_client.conversations_replies.await_count == 1
# ===========================================================================
# _has_active_session_for_thread — session key fix (#5833)

View File

@@ -0,0 +1,133 @@
"""Tests for Slack channel_skill_bindings auto-skill resolution."""
from unittest.mock import MagicMock
def _make_adapter(extra=None):
"""Create a minimal SlackAdapter stub with the given ``config.extra``."""
from gateway.platforms.slack import SlackAdapter
adapter = object.__new__(SlackAdapter)
adapter.config = MagicMock()
adapter.config.extra = extra or {}
return adapter
def _resolve(adapter, channel_id, parent_id=None):
from gateway.platforms.base import resolve_channel_skills
return resolve_channel_skills(adapter.config.extra, channel_id, parent_id)
class TestSlackResolveChannelSkills:
def test_no_bindings_returns_none(self):
adapter = _make_adapter()
assert _resolve(adapter, "D0ABC") is None
def test_match_by_dm_channel_id(self):
"""The primary use case: binding a skill to a Slack DM channel."""
adapter = _make_adapter({
"channel_skill_bindings": [
{"id": "D0ATH9TQ0G6", "skills": ["german-flashcards"]},
]
})
assert _resolve(adapter, "D0ATH9TQ0G6") == ["german-flashcards"]
def test_match_by_parent_id_for_thread(self):
"""Slack threads inherit the parent channel's binding."""
adapter = _make_adapter({
"channel_skill_bindings": [
{"id": "C0PARENT", "skills": ["parent-skill"]},
]
})
assert _resolve(adapter, "thread-ts-123", parent_id="C0PARENT") == ["parent-skill"]
def test_no_match_returns_none(self):
adapter = _make_adapter({
"channel_skill_bindings": [
{"id": "D0AAA", "skills": ["skill-a"]},
]
})
assert _resolve(adapter, "D0BBB") is None
def test_single_skill_string(self):
adapter = _make_adapter({
"channel_skill_bindings": [
{"id": "D0ATH9TQ0G6", "skill": "german-flashcards"},
]
})
assert _resolve(adapter, "D0ATH9TQ0G6") == ["german-flashcards"]
def test_dedup_preserves_order(self):
adapter = _make_adapter({
"channel_skill_bindings": [
{"id": "D0ATH9TQ0G6", "skills": ["a", "b", "a", "c", "b"]},
]
})
assert _resolve(adapter, "D0ATH9TQ0G6") == ["a", "b", "c"]
def test_multiple_bindings_pick_correct(self):
adapter = _make_adapter({
"channel_skill_bindings": [
{"id": "D0AAA", "skills": ["skill-a"]},
{"id": "D0BBB", "skills": ["skill-b"]},
{"id": "D0CCC", "skills": ["skill-c"]},
]
})
assert _resolve(adapter, "D0BBB") == ["skill-b"]
def test_malformed_entry_skipped(self):
"""Non-dict entries should be ignored, not raise."""
adapter = _make_adapter({
"channel_skill_bindings": [
"not-a-dict",
{"id": "D0ABC", "skills": ["good"]},
]
})
assert _resolve(adapter, "D0ABC") == ["good"]
def test_empty_skills_list_returns_none(self):
adapter = _make_adapter({
"channel_skill_bindings": [
{"id": "D0ABC", "skills": []},
]
})
assert _resolve(adapter, "D0ABC") is None
def test_empty_skill_string_returns_none(self):
adapter = _make_adapter({
"channel_skill_bindings": [
{"id": "D0ABC", "skill": ""},
]
})
assert _resolve(adapter, "D0ABC") is None
class TestSlackMessageEventAutoSkill:
"""Integration-style test: verify auto_skill propagates to MessageEvent."""
def test_message_event_carries_auto_skill(self):
"""Simulate the handler wiring: resolve + attach to MessageEvent."""
from gateway.platforms.base import MessageEvent, MessageType, Platform, SessionSource, resolve_channel_skills
config_extra = {
"channel_skill_bindings": [
{"id": "D0ATH9TQ0G6", "skills": ["german-flashcards"]},
]
}
auto_skill = resolve_channel_skills(config_extra, "D0ATH9TQ0G6", None)
source = SessionSource(
platform=Platform.SLACK,
chat_id="D0ATH9TQ0G6",
chat_name="Mats",
chat_type="dm",
user_id="U0ABC",
user_name="Mats",
)
event = MessageEvent(
text="work",
message_type=MessageType.TEXT,
source=source,
raw_message={},
message_id="123.456",
auto_skill=auto_skill,
)
assert event.auto_skill == ["german-flashcards"]

View File

@@ -55,10 +55,12 @@ CHANNEL_ID = "C0AQWDLHY9M"
OTHER_CHANNEL_ID = "C9999999999"
def _make_adapter(require_mention=None, free_response_channels=None):
def _make_adapter(require_mention=None, strict_mention=None, free_response_channels=None):
extra = {}
if require_mention is not None:
extra["require_mention"] = require_mention
if strict_mention is not None:
extra["strict_mention"] = strict_mention
if free_response_channels is not None:
extra["free_response_channels"] = free_response_channels
@@ -134,6 +136,48 @@ def test_require_mention_env_var_default_true(monkeypatch):
assert adapter._slack_require_mention() is True
# ---------------------------------------------------------------------------
# Tests: _slack_strict_mention
# ---------------------------------------------------------------------------
def test_strict_mention_defaults_to_false(monkeypatch):
monkeypatch.delenv("SLACK_STRICT_MENTION", raising=False)
adapter = _make_adapter()
assert adapter._slack_strict_mention() is False
def test_strict_mention_true():
adapter = _make_adapter(strict_mention=True)
assert adapter._slack_strict_mention() is True
def test_strict_mention_false():
adapter = _make_adapter(strict_mention=False)
assert adapter._slack_strict_mention() is False
def test_strict_mention_string_true():
adapter = _make_adapter(strict_mention="true")
assert adapter._slack_strict_mention() is True
def test_strict_mention_string_off():
adapter = _make_adapter(strict_mention="off")
assert adapter._slack_strict_mention() is False
def test_strict_mention_malformed_stays_false():
"""Unrecognised values keep strict mode OFF (fail-open to legacy behavior)."""
adapter = _make_adapter(strict_mention="maybe")
assert adapter._slack_strict_mention() is False
def test_strict_mention_env_var_fallback(monkeypatch):
monkeypatch.setenv("SLACK_STRICT_MENTION", "true")
adapter = _make_adapter() # no config value -> falls back to env
assert adapter._slack_strict_mention() is True
# ---------------------------------------------------------------------------
# Tests: _slack_free_response_channels
# ---------------------------------------------------------------------------
@@ -310,3 +354,109 @@ def test_config_bridges_slack_free_response_channels(monkeypatch, tmp_path):
import os as _os
assert _os.environ["SLACK_REQUIRE_MENTION"] == "false"
assert _os.environ["SLACK_FREE_RESPONSE_CHANNELS"] == "C0AQWDLHY9M,C9999999999"
def test_config_bridges_slack_reply_in_thread(monkeypatch, tmp_path):
from gateway.config import load_gateway_config
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
(hermes_home / "config.yaml").write_text(
"slack:\n"
" reply_in_thread: false\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setenv("SLACK_BOT_TOKEN", "xoxb-test")
config = load_gateway_config()
assert config is not None
slack_config = config.platforms[Platform.SLACK]
assert slack_config.extra.get("reply_in_thread") is False
adapter = SlackAdapter(slack_config)
assert adapter._resolve_thread_ts(reply_to="171.000", metadata={}) is None
# Top-level channel messages arrive with metadata.thread_id == reply_to
# because the inbound handler uses event.ts as a session-keying fallback.
# Those must be treated as non-threaded so reply_in_thread=false takes
# effect in channels, not just DMs.
assert adapter._resolve_thread_ts(
reply_to="171.000",
metadata={"thread_id": "171.000"},
) is None
# Real thread replies (reply_to differs from thread parent) must still
# resolve to the parent thread so conversation context is preserved.
assert adapter._resolve_thread_ts(
reply_to="171.500",
metadata={"thread_id": "171.000"},
) == "171.000"
def test_config_bridges_slack_strict_mention(monkeypatch, tmp_path):
from gateway.config import load_gateway_config
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
(hermes_home / "config.yaml").write_text(
"slack:\n"
" strict_mention: true\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.delenv("SLACK_STRICT_MENTION", raising=False)
config = load_gateway_config()
assert config is not None
import os as _os
assert _os.environ["SLACK_STRICT_MENTION"] == "true"
# ---------------------------------------------------------------------------
# Regression: strict mode must NOT persist mentions into _mentioned_threads
# ---------------------------------------------------------------------------
# Prevents agent-to-agent ack loops — if a strict-mode bot remembered every
# thread it was mentioned in, the next message from the other agent in that
# thread would re-trigger the bot and defeat the entire feature.
def test_mention_in_strict_mode_does_not_register_thread():
adapter = _make_adapter(strict_mention=True)
adapter._bot_user_id = "U_BOT"
adapter._mentioned_threads = set()
adapter._MENTIONED_THREADS_MAX = 5000
thread_ts = "1700000000.100200"
event_thread_ts = thread_ts # incoming message is inside an existing thread
# Mirror the handler's @mention + strict-mode guard that protects
# _mentioned_threads.add(). If strict is on, we must skip the add.
text = "<@U_BOT> hello"
is_mentioned = f"<@{adapter._bot_user_id}>" in text
assert is_mentioned
if event_thread_ts and not adapter._slack_strict_mention():
adapter._mentioned_threads.add(event_thread_ts)
assert thread_ts not in adapter._mentioned_threads
def test_mention_outside_strict_mode_still_registers_thread():
adapter = _make_adapter(strict_mention=False)
adapter._bot_user_id = "U_BOT"
adapter._mentioned_threads = set()
adapter._MENTIONED_THREADS_MAX = 5000
thread_ts = "1700000000.100200"
event_thread_ts = thread_ts
text = "<@U_BOT> hello"
is_mentioned = f"<@{adapter._bot_user_id}>" in text
assert is_mentioned
if event_thread_ts and not adapter._slack_strict_mention():
adapter._mentioned_threads.add(event_thread_ts)
assert thread_ts in adapter._mentioned_threads

View File

@@ -12,9 +12,9 @@ from gateway.platforms.base import MessageEvent
from gateway.session import SessionEntry, SessionSource, build_session_key
def _make_source() -> SessionSource:
def _make_source(platform: Platform = Platform.TELEGRAM) -> SessionSource:
return SessionSource(
platform=Platform.TELEGRAM,
platform=platform,
user_id="u1",
chat_id="c1",
user_name="tester",
@@ -22,24 +22,24 @@ def _make_source() -> SessionSource:
)
def _make_event(text: str) -> MessageEvent:
def _make_event(text: str, *, platform: Platform = Platform.TELEGRAM) -> MessageEvent:
return MessageEvent(
text=text,
source=_make_source(),
source=_make_source(platform),
message_id="m1",
)
def _make_runner(session_entry: SessionEntry):
def _make_runner(session_entry: SessionEntry, *, platform: Platform = Platform.TELEGRAM):
from gateway.run import GatewayRunner
runner = object.__new__(GatewayRunner)
runner.config = GatewayConfig(
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
platforms={platform: PlatformConfig(enabled=True, token="***")}
)
adapter = MagicMock()
adapter.send = AsyncMock()
runner.adapters = {Platform.TELEGRAM: adapter}
runner.adapters = {platform: adapter}
runner._voice_mode = {}
runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
runner.session_store = MagicMock()
@@ -224,6 +224,93 @@ async def test_handle_message_persists_agent_token_counts(monkeypatch):
)
@pytest.mark.asyncio
async def test_first_run_slack_home_channel_onboarding_uses_parent_command(monkeypatch):
import gateway.run as gateway_run
session_entry = SessionEntry(
session_key=build_session_key(_make_source(Platform.SLACK)),
session_id="sess-1",
created_at=datetime.now(),
updated_at=datetime.now(),
platform=Platform.SLACK,
chat_type="dm",
)
runner = _make_runner(session_entry, platform=Platform.SLACK)
runner.session_store.load_transcript.return_value = []
runner.session_store.has_any_sessions.return_value = False
runner._run_agent = AsyncMock(
return_value={
"final_response": "ok",
"messages": [],
"tools": [],
"history_offset": 0,
"last_prompt_tokens": 0,
"input_tokens": 0,
"output_tokens": 0,
"model": "openai/test-model",
}
)
monkeypatch.delenv("SLACK_HOME_CHANNEL", raising=False)
monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
monkeypatch.setattr(
"agent.model_metadata.get_model_context_length",
lambda *_args, **_kwargs: 100000,
)
result = await runner._handle_message(_make_event("hello", platform=Platform.SLACK))
assert result == "ok"
runner.adapters[Platform.SLACK].send.assert_awaited_once()
onboarding = runner.adapters[Platform.SLACK].send.await_args.args[1]
assert "/hermes sethome" in onboarding
assert "Type /sethome" not in onboarding
@pytest.mark.asyncio
async def test_first_run_non_slack_home_channel_onboarding_keeps_direct_command(monkeypatch):
import gateway.run as gateway_run
session_entry = SessionEntry(
session_key=build_session_key(_make_source(Platform.TELEGRAM)),
session_id="sess-1",
created_at=datetime.now(),
updated_at=datetime.now(),
platform=Platform.TELEGRAM,
chat_type="dm",
)
runner = _make_runner(session_entry, platform=Platform.TELEGRAM)
runner.session_store.load_transcript.return_value = []
runner.session_store.has_any_sessions.return_value = False
runner._run_agent = AsyncMock(
return_value={
"final_response": "ok",
"messages": [],
"tools": [],
"history_offset": 0,
"last_prompt_tokens": 0,
"input_tokens": 0,
"output_tokens": 0,
"model": "openai/test-model",
}
)
monkeypatch.delenv("TELEGRAM_HOME_CHANNEL", raising=False)
monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
monkeypatch.setattr(
"agent.model_metadata.get_model_context_length",
lambda *_args, **_kwargs: 100000,
)
result = await runner._handle_message(_make_event("hello", platform=Platform.TELEGRAM))
assert result == "ok"
runner.adapters[Platform.TELEGRAM].send.assert_awaited_once()
onboarding = runner.adapters[Platform.TELEGRAM].send.await_args.args[1]
assert "Type /sethome" in onboarding
@pytest.mark.asyncio
async def test_handle_message_discards_stale_result_after_session_invalidation(monkeypatch):
import gateway.run as gateway_run

View File

@@ -0,0 +1,236 @@
"""Regression tests for the fresh-final-for-long-lived-previews path.
Ported from openclaw/openclaw#72038. When a streamed preview has been
visible long enough that the platform's edit timestamp would be
noticeably stale by completion time, the stream consumer delivers the
final reply as a brand-new message and best-effort deletes the old
preview. This makes Telegram's visible timestamp reflect completion
time instead of first-token time.
"""
from __future__ import annotations
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock
import pytest
from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
def _make_adapter(*, supports_delete: bool = True) -> MagicMock:
"""Build a minimal MagicMock adapter wired for send/edit/delete."""
adapter = MagicMock()
adapter.REQUIRES_EDIT_FINALIZE = False
adapter.MAX_MESSAGE_LENGTH = 4096
adapter.send = AsyncMock(return_value=SimpleNamespace(
success=True, message_id="initial_preview",
))
adapter.edit_message = AsyncMock(return_value=SimpleNamespace(
success=True, message_id="initial_preview",
))
if supports_delete:
adapter.delete_message = AsyncMock(return_value=True)
else:
# Adapter without the optional delete_message method — fresh-final
# should still work, it just leaves the stale preview in place.
del adapter.delete_message # type: ignore[attr-defined]
return adapter
class TestFreshFinalForLongLivedPreviews:
"""openclaw#72038 port — send fresh final when preview is old."""
@pytest.mark.asyncio
async def test_disabled_by_default_still_edits_in_place(self):
"""``fresh_final_after_seconds=0`` preserves the legacy edit path."""
adapter = _make_adapter()
consumer = GatewayStreamConsumer(
adapter=adapter,
chat_id="chat",
config=StreamConsumerConfig(fresh_final_after_seconds=0.0),
)
await consumer._send_or_edit("hello")
# Pretend the preview has been visible for a long time.
consumer._message_created_ts = 0.0 # far in the past
await consumer._send_or_edit("hello world", finalize=True)
# Should edit, not send a fresh message.
assert adapter.send.call_count == 1 # only the initial send
adapter.edit_message.assert_called_once()
@pytest.mark.asyncio
async def test_short_lived_preview_edits_in_place(self):
"""Finalizing a preview younger than the threshold → normal edit."""
adapter = _make_adapter()
consumer = GatewayStreamConsumer(
adapter=adapter,
chat_id="chat",
config=StreamConsumerConfig(fresh_final_after_seconds=60.0),
)
await consumer._send_or_edit("hello")
# Preview is "new" — leave _message_created_ts at its real value.
await consumer._send_or_edit("hello world", finalize=True)
assert adapter.send.call_count == 1
adapter.edit_message.assert_called_once()
@pytest.mark.asyncio
async def test_long_lived_preview_sends_fresh_final(self):
"""Finalizing a preview older than the threshold → fresh send."""
adapter = _make_adapter()
adapter.send.side_effect = [
SimpleNamespace(success=True, message_id="initial_preview"),
SimpleNamespace(success=True, message_id="fresh_final"),
]
consumer = GatewayStreamConsumer(
adapter=adapter,
chat_id="chat",
config=StreamConsumerConfig(fresh_final_after_seconds=60.0),
)
await consumer._send_or_edit("hello")
# Force the preview to look stale (visible for > 60s).
consumer._message_created_ts = 0.0 # zero = ~uptime seconds old
await consumer._send_or_edit("hello world", finalize=True)
# Fresh send happened; no edit of the old preview.
assert adapter.send.call_count == 2
adapter.edit_message.assert_not_called()
# The old preview was deleted as cleanup.
adapter.delete_message.assert_awaited_once_with("chat", "initial_preview")
# State was updated to the new message id.
assert consumer._message_id == "fresh_final"
assert consumer._final_response_sent is True
@pytest.mark.asyncio
async def test_fresh_final_without_delete_support_is_best_effort(self):
"""Adapter lacking ``delete_message`` still gets the fresh send."""
adapter = _make_adapter(supports_delete=False)
adapter.send.side_effect = [
SimpleNamespace(success=True, message_id="initial_preview"),
SimpleNamespace(success=True, message_id="fresh_final"),
]
consumer = GatewayStreamConsumer(
adapter=adapter,
chat_id="chat",
config=StreamConsumerConfig(fresh_final_after_seconds=60.0),
)
await consumer._send_or_edit("hello")
consumer._message_created_ts = 0.0
await consumer._send_or_edit("hello world", finalize=True)
assert adapter.send.call_count == 2
adapter.edit_message.assert_not_called()
# No delete attempt — just the fresh send.
assert consumer._message_id == "fresh_final"
@pytest.mark.asyncio
async def test_fresh_final_fallback_to_edit_on_send_failure(self):
"""If the fresh send fails, fall back to the normal edit path."""
adapter = _make_adapter()
adapter.send.side_effect = [
SimpleNamespace(success=True, message_id="initial_preview"),
SimpleNamespace(success=False, error="network"),
]
consumer = GatewayStreamConsumer(
adapter=adapter,
chat_id="chat",
config=StreamConsumerConfig(fresh_final_after_seconds=60.0),
)
await consumer._send_or_edit("hello")
consumer._message_created_ts = 0.0
ok = await consumer._send_or_edit("hello world", finalize=True)
# Fresh send was attempted and failed → edit happened instead.
assert adapter.send.call_count == 2
adapter.edit_message.assert_called_once()
assert ok is True
@pytest.mark.asyncio
async def test_only_finalize_triggers_fresh_final(self):
"""Intermediate edits (``finalize=False``) never switch to fresh send."""
adapter = _make_adapter()
consumer = GatewayStreamConsumer(
adapter=adapter,
chat_id="chat",
config=StreamConsumerConfig(fresh_final_after_seconds=60.0),
)
await consumer._send_or_edit("hello")
consumer._message_created_ts = 0.0 # stale
await consumer._send_or_edit("hello partial") # no finalize
assert adapter.send.call_count == 1
adapter.edit_message.assert_called_once()
@pytest.mark.asyncio
async def test_no_edit_sentinel_is_not_affected(self):
"""Platforms with the ``__no_edit__`` sentinel never go fresh-final."""
adapter = _make_adapter()
adapter.send.return_value = SimpleNamespace(success=True, message_id=None)
consumer = GatewayStreamConsumer(
adapter=adapter,
chat_id="chat",
config=StreamConsumerConfig(fresh_final_after_seconds=60.0),
)
await consumer._send_or_edit("hello")
assert consumer._message_id == "__no_edit__"
assert consumer._message_created_ts is None
# Even with finalize=True, no fresh send — the sentinel gates it.
assert consumer._should_send_fresh_final() is False
class TestStreamConsumerConfigFreshFinalField:
"""The dataclass field must exist and default to 0 (disabled)."""
def test_default_is_disabled(self):
cfg = StreamConsumerConfig()
assert cfg.fresh_final_after_seconds == 0.0
def test_field_is_configurable(self):
cfg = StreamConsumerConfig(fresh_final_after_seconds=120.0)
assert cfg.fresh_final_after_seconds == 120.0
class TestStreamingConfigFreshFinalField:
"""The gateway-level StreamingConfig carries the setting."""
def test_default_enables_with_60s(self):
from gateway.config import StreamingConfig
cfg = StreamingConfig()
assert cfg.fresh_final_after_seconds == 60.0
def test_from_dict_uses_default_when_missing(self):
from gateway.config import StreamingConfig
cfg = StreamingConfig.from_dict({"enabled": True})
assert cfg.fresh_final_after_seconds == 60.0
def test_from_dict_respects_explicit_zero(self):
from gateway.config import StreamingConfig
cfg = StreamingConfig.from_dict({
"enabled": True,
"fresh_final_after_seconds": 0,
})
assert cfg.fresh_final_after_seconds == 0.0
def test_to_dict_round_trip(self):
from gateway.config import StreamingConfig
original = StreamingConfig(fresh_final_after_seconds=90.0)
restored = StreamingConfig.from_dict(original.to_dict())
assert restored.fresh_final_after_seconds == 90.0
class TestTelegramAdapterDeleteMessage:
"""Contract: Telegram adapter implements ``delete_message``."""
def test_delete_message_method_exists(self):
telegram = pytest.importorskip("gateway.platforms.telegram")
import inspect
cls = telegram.TelegramAdapter
assert hasattr(cls, "delete_message"), (
"TelegramAdapter.delete_message is required for the fresh-final "
"cleanup path (openclaw/openclaw#72038 port)."
)
sig = inspect.signature(cls.delete_message)
params = list(sig.parameters)
assert params[:3] == ["self", "chat_id", "message_id"]
def test_base_adapter_default_returns_false(self):
"""BasePlatformAdapter.delete_message default = no-op returning False."""
from gateway.platforms.base import BasePlatformAdapter
import inspect
sig = inspect.signature(BasePlatformAdapter.delete_message)
assert list(sig.parameters)[:3] == ["self", "chat_id", "message_id"]

View File

@@ -251,7 +251,7 @@ class TestWatchUpdateProgress:
"session_key": "agent:main:telegram:dm:111"}
(hermes_home / ".update_pending.json").write_text(json.dumps(pending))
# Write output
(hermes_home / ".update_output.txt").write_text("→ Fetching updates...\n")
(hermes_home / ".update_output.txt").write_text("→ Fetching updates...\n", encoding="utf-8")
mock_adapter = AsyncMock()
runner.adapters = {Platform.TELEGRAM: mock_adapter}
@@ -261,7 +261,7 @@ class TestWatchUpdateProgress:
await asyncio.sleep(0.3)
(hermes_home / ".update_output.txt").write_text(
"→ Fetching updates...\n✓ Code updated!\n"
)
, encoding="utf-8")
(hermes_home / ".update_exit_code").write_text("0")
with patch("gateway.run._hermes_home", hermes_home):
@@ -489,6 +489,63 @@ class TestUpdatePromptInterception:
# Should clear the pending flag
assert session_key not in runner._update_prompt_pending
@pytest.mark.asyncio
async def test_recognized_slash_command_bypasses_pending_update_prompt(self, tmp_path):
"""Known slash commands must dispatch normally instead of being consumed.
The update subprocess is still blocked on stdin waiting for
``.update_response``, so the gateway writes a blank response to
unblock it (``_gateway_prompt`` returns the prompt's default on
empty) before falling through to normal command dispatch.
"""
runner = _make_runner()
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
event = _make_event(text="/new", chat_id="67890")
session_key = "agent:main:telegram:dm:67890"
runner._update_prompt_pending[session_key] = True
runner._is_user_authorized = MagicMock(return_value=True)
runner._session_key_for_source = MagicMock(return_value=session_key)
runner._handle_reset_command = AsyncMock(return_value="reset ok")
with patch("gateway.run._hermes_home", hermes_home):
result = await runner._handle_message(event)
assert result == "reset ok"
runner._handle_reset_command.assert_awaited_once_with(event)
# .update_response was written (empty) to unblock the update
# subprocess; _gateway_prompt will read "", strip to "", and
# return the prompt's default.
response_path = hermes_home / ".update_response"
assert response_path.exists()
assert response_path.read_text() == ""
# Pending flag is cleared so stray future input won't be
# re-intercepted for a prompt that is no longer outstanding.
assert session_key not in runner._update_prompt_pending
@pytest.mark.asyncio
async def test_unrecognized_slash_command_still_consumed_as_response(self, tmp_path):
"""Unknown /foo is written verbatim to .update_response (legacy behavior)."""
runner = _make_runner()
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
event = _make_event(text="/foobarbaz", chat_id="67890")
session_key = "agent:main:telegram:dm:67890"
runner._update_prompt_pending[session_key] = True
runner._is_user_authorized = MagicMock(return_value=True)
runner._session_key_for_source = MagicMock(return_value=session_key)
with patch("gateway.run._hermes_home", hermes_home):
result = await runner._handle_message(event)
response_path = hermes_home / ".update_response"
assert response_path.exists()
assert response_path.read_text() == "/foobarbaz"
assert "Sent" in (result or "")
assert session_key not in runner._update_prompt_pending
@pytest.mark.asyncio
async def test_normal_message_when_no_prompt_pending(self, tmp_path):
"""Messages pass through normally when no prompt is pending."""

View File

@@ -134,7 +134,7 @@ class TestVerboseCommand:
"""Cycling /verbose on Telegram doesn't change Slack's setting.
Without a global tool_progress, each platform uses its built-in
default: Telegram = 'all' (high tier), Slack = 'new' (medium tier).
default: Telegram = 'all' (high tier), Slack = 'off' (quiet Slack default).
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
@@ -161,8 +161,8 @@ class TestVerboseCommand:
platforms = saved["display"]["platforms"]
# Telegram: all -> verbose (high tier default = all)
assert platforms["telegram"]["tool_progress"] == "verbose"
# Slack: new -> all (medium tier default = new, cycle to all)
assert platforms["slack"]["tool_progress"] == "all"
# Slack: off -> new (first /verbose cycle from quiet default)
assert platforms["slack"]["tool_progress"] == "new"
@pytest.mark.asyncio
async def test_no_config_file_returns_disabled(self, tmp_path, monkeypatch):

View File

@@ -149,3 +149,46 @@ def test_get_nous_subscription_features_requires_agent_browser_for_browserbase(m
assert features.browser.active is False
assert features.browser.managed_by_nous is False
assert features.browser.current_provider == "Browserbase"
def test_get_nous_subscription_features_does_not_treat_quoted_false_as_gateway_opt_in(monkeypatch):
env = {"EXA_API_KEY": "exa-test"}
monkeypatch.setattr(ns, "get_env_value", lambda name: env.get(name, ""))
monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True})
monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True)
monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: key == "web")
monkeypatch.setattr(ns, "_has_agent_browser", lambda: False)
monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "")
monkeypatch.setattr(ns, "has_direct_modal_credentials", lambda: False)
monkeypatch.setattr(ns, "is_managed_tool_gateway_ready", lambda vendor: vendor == "firecrawl")
features = ns.get_nous_subscription_features(
{"web": {"backend": "exa", "use_gateway": "false"}}
)
assert features.web.available is True
assert features.web.active is True
assert features.web.managed_by_nous is False
assert features.web.direct_override is True
assert features.web.current_provider == "exa"
def test_get_gateway_eligible_tools_ignores_quoted_false_opt_in(monkeypatch):
monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True)
monkeypatch.setattr(
ns,
"_get_gateway_direct_credentials",
lambda: {"web": True, "image_gen": False, "tts": False, "browser": False},
)
unconfigured, has_direct, already_managed = ns.get_gateway_eligible_tools(
{
"model": {"provider": "nous"},
"web": {"use_gateway": "false"},
}
)
assert "web" in has_direct
assert "web" not in already_managed
assert set(unconfigured) == {"image_gen", "tts", "browser"}

View File

@@ -401,14 +401,21 @@ class TestSessionBrowseArgparse:
from hermes_cli.main import _session_browse_picker
assert callable(_session_browse_picker)
def test_browse_default_limit_is_50(self):
"""The default --limit for browse should be 50."""
# This test verifies at the argparse level
# We test by running the parse on "sessions browse" args
# Since we can't easily extract the subparser, verify via the
# _session_browse_picker accepting large lists
sessions = _make_sessions(50)
assert len(sessions) == 50
def test_browse_default_limit_is_500(self):
"""The default --limit for browse should be 500."""
# Build the same argparse tree cmd_sessions uses and verify the default.
import argparse
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(dest="sessions_action")
browse = subparsers.add_parser("browse")
browse.add_argument("--source")
browse.add_argument("--limit", type=int, default=500)
args = parser.parse_args(["browse"])
assert args.limit == 500
args = parser.parse_args(["browse", "--limit", "42"])
assert args.limit == 42
# ─── Integration: cmd_sessions browse action ────────────────────────────────

View File

@@ -12,7 +12,7 @@ def test_sessions_delete_accepts_unique_id_prefix(monkeypatch, capsys):
captured["resolved_from"] = session_id
return "20260315_092437_c9a6ff"
def delete_session(self, session_id):
def delete_session(self, session_id, **kwargs):
captured["deleted"] = session_id
return True
@@ -45,7 +45,7 @@ def test_sessions_delete_reports_not_found_when_prefix_is_unknown(monkeypatch, c
def resolve_session_id(self, session_id):
return None
def delete_session(self, session_id):
def delete_session(self, session_id, **kwargs):
raise AssertionError("delete_session should not be called when resolution fails")
def close(self):
@@ -73,7 +73,7 @@ def test_sessions_delete_handles_eoferror_on_confirm(monkeypatch, capsys):
def resolve_session_id(self, session_id):
return "20260315_092437_c9a6ff"
def delete_session(self, session_id):
def delete_session(self, session_id, **kwargs):
raise AssertionError("delete_session should not be called when cancelled")
def close(self):

View File

@@ -0,0 +1,30 @@
"""Regression: ``hermes setup`` for the ollama-cloud provider must force-refresh
the model cache after the user supplies a key, otherwise the picker keeps
serving a stale cache (models.dev only, no live API probe) for up to an hour.
"""
from __future__ import annotations
from unittest.mock import patch
def test_setup_ollama_cloud_passes_force_refresh(monkeypatch):
"""The provider-setup model-fetch for ollama-cloud must pass ``force_refresh=True``."""
import hermes_cli.main as main_mod
import inspect
src = inspect.getsource(main_mod)
# Locate the ollama-cloud branch in the provider setup flow.
marker = 'provider_id == "ollama-cloud"'
assert marker in src, "ollama-cloud branch missing from provider setup"
idx = src.index(marker)
# The call to fetch_ollama_cloud_models should be within the next ~2000 chars.
snippet = src[idx:idx + 2000]
assert "fetch_ollama_cloud_models(" in snippet, snippet[:500]
assert "force_refresh=True" in snippet, (
"ollama-cloud setup must pass force_refresh=True so newly released "
"models (e.g. deepseek v4 flash, kimi k2.6) appear the moment the "
"user enters their key, not an hour later when the cache TTL expires. "
f"Snippet: {snippet[:500]}"
)

View File

@@ -41,6 +41,36 @@ def test_get_platform_tools_homeassistant_platform_keeps_homeassistant_toolset()
assert "homeassistant" in enabled
def test_get_platform_tools_homeassistant_toolset_enabled_for_cron_when_hass_token_set(monkeypatch):
"""HA toolset is runtime-gated by check_fn (requires HASS_TOKEN).
When HASS_TOKEN is set, the user has explicitly opted in — _DEFAULT_OFF_TOOLSETS
shouldn't also strip HA from platforms (like cron) that run through
_get_platform_tools without an explicit saved toolset list.
Regression guard for Norbert's HA cron breakage after #14798 made cron
honor per-platform tool config.
"""
monkeypatch.setenv("HASS_TOKEN", "fake-test-token")
cron_enabled = _get_platform_tools({}, "cron")
assert "homeassistant" in cron_enabled
# moa must stay off — the original goal of #14798
assert "moa" not in cron_enabled
cli_enabled = _get_platform_tools({}, "cli")
assert "homeassistant" in cli_enabled
def test_get_platform_tools_homeassistant_toolset_off_for_cron_when_hass_token_missing(monkeypatch):
"""Without HASS_TOKEN, HA stays off by default — preserves #14798's behavior
for users who never configured HA."""
monkeypatch.delenv("HASS_TOKEN", raising=False)
cron_enabled = _get_platform_tools({}, "cron")
assert "homeassistant" not in cron_enabled
def test_get_platform_tools_preserves_explicit_empty_selection():
config = {"platform_toolsets": {"cli": []}}

View File

@@ -0,0 +1,121 @@
"""Tests for _web_ui_build_needed — staleness check for the web UI dist.
Critical invariant: the Vite build outputs to hermes_cli/web_dist/
(vite.config.ts: outDir: "../hermes_cli/web_dist"), NOT web/dist/.
The sentinel must be checked in the correct output directory or the
freshness check is a no-op and the OOM rebuild always runs.
"""
import os
import time
from pathlib import Path
from unittest.mock import patch
import pytest
from hermes_cli.main import _web_ui_build_needed, _build_web_ui
def _touch(path: Path, offset: float = 0.0) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.touch()
if offset:
t = time.time() + offset
os.utime(path, (t, t))
def _make_web_dir(tmp_path: Path) -> tuple[Path, Path]:
"""Return (web_dir, dist_dir) matching real repo layout."""
web_dir = tmp_path / "web"
web_dir.mkdir()
(web_dir / "package.json").touch()
dist_dir = tmp_path / "hermes_cli" / "web_dist"
return web_dir, dist_dir
class TestWebUIBuildNeeded:
def test_returns_true_when_dist_missing(self, tmp_path):
web_dir, _ = _make_web_dir(tmp_path)
assert _web_ui_build_needed(web_dir) is True
def test_returns_false_when_vite_manifest_fresh(self, tmp_path):
web_dir, dist_dir = _make_web_dir(tmp_path)
_touch(web_dir / "src" / "App.tsx", offset=-10)
_touch(dist_dir / ".vite" / "manifest.json")
assert _web_ui_build_needed(web_dir) is False
def test_returns_true_when_source_newer_than_manifest(self, tmp_path):
web_dir, dist_dir = _make_web_dir(tmp_path)
_touch(dist_dir / ".vite" / "manifest.json", offset=-10)
_touch(web_dir / "src" / "App.tsx")
assert _web_ui_build_needed(web_dir) is True
def test_falls_back_to_index_html_when_manifest_missing(self, tmp_path):
web_dir, dist_dir = _make_web_dir(tmp_path)
_touch(web_dir / "src" / "main.ts", offset=-10)
_touch(dist_dir / "index.html")
assert _web_ui_build_needed(web_dir) is False
def test_web_dist_dir_not_web_dist_subdir(self, tmp_path):
"""Regression: sentinel must be in hermes_cli/web_dist/, NOT web/dist/."""
web_dir, dist_dir = _make_web_dir(tmp_path)
_touch(web_dir / "src" / "App.tsx", offset=-10)
# Place manifest in wrong location (web/dist/) — should NOT count as fresh
wrong_dist = web_dir / "dist" / ".vite" / "manifest.json"
_touch(wrong_dist)
# Correct location is empty → still needs build
assert _web_ui_build_needed(web_dir) is True
def test_returns_true_when_package_lock_newer_than_dist(self, tmp_path):
web_dir, dist_dir = _make_web_dir(tmp_path)
_touch(dist_dir / ".vite" / "manifest.json", offset=-10)
_touch(web_dir / "package-lock.json")
assert _web_ui_build_needed(web_dir) is True
def test_returns_true_when_vite_config_newer_than_dist(self, tmp_path):
web_dir, dist_dir = _make_web_dir(tmp_path)
_touch(dist_dir / ".vite" / "manifest.json", offset=-10)
_touch(web_dir / "vite.config.ts")
assert _web_ui_build_needed(web_dir) is True
def test_ignores_node_modules(self, tmp_path):
web_dir, dist_dir = _make_web_dir(tmp_path)
# package.json older than manifest; only node_modules file is newer
_touch(web_dir / "package.json", offset=-20)
_touch(dist_dir / ".vite" / "manifest.json", offset=-10)
_touch(web_dir / "node_modules" / "react" / "index.js")
assert _web_ui_build_needed(web_dir) is False
def test_ignores_dist_subdir_under_web(self, tmp_path):
web_dir, dist_dir = _make_web_dir(tmp_path)
# package.json older than manifest; only web/dist file is newer
_touch(web_dir / "package.json", offset=-20)
_touch(dist_dir / ".vite" / "manifest.json", offset=-10)
_touch(web_dir / "dist" / "assets" / "index.js")
assert _web_ui_build_needed(web_dir) is False
class TestBuildWebUISkipsWhenFresh:
def test_skips_npm_when_dist_is_fresh(self, tmp_path):
web_dir, dist_dir = _make_web_dir(tmp_path)
_touch(dist_dir / ".vite" / "manifest.json")
with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
patch("hermes_cli.main.subprocess.run") as mock_run:
result = _build_web_ui(web_dir)
assert result is True
mock_run.assert_not_called()
def test_runs_npm_when_dist_missing(self, tmp_path):
web_dir, _ = _make_web_dir(tmp_path)
mock_cp = __import__("subprocess").CompletedProcess([], 0, stdout=b"", stderr=b"")
with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
patch("hermes_cli.main.subprocess.run", return_value=mock_cp) as mock_run:
result = _build_web_ui(web_dir)
assert result is True
assert mock_run.call_count == 2 # npm install + npm run build

View File

@@ -7,6 +7,7 @@ turn counting, tags), and schema completeness.
import json
import re
import sys
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock
@@ -18,6 +19,7 @@ from plugins.memory.hindsight import (
REFLECT_SCHEMA,
RETAIN_SCHEMA,
_load_config,
_build_embedded_profile_env,
_normalize_retain_tags,
_resolve_bank_id_template,
_sanitize_bank_segment,
@@ -34,7 +36,8 @@ def _clean_env(monkeypatch):
"""Ensure no stale env vars leak between tests."""
for key in (
"HINDSIGHT_API_KEY", "HINDSIGHT_API_URL", "HINDSIGHT_BANK_ID",
"HINDSIGHT_BUDGET", "HINDSIGHT_MODE", "HINDSIGHT_LLM_API_KEY",
"HINDSIGHT_BUDGET", "HINDSIGHT_MODE", "HINDSIGHT_TIMEOUT",
"HINDSIGHT_IDLE_TIMEOUT", "HINDSIGHT_LLM_API_KEY",
"HINDSIGHT_RETAIN_TAGS", "HINDSIGHT_RETAIN_SOURCE",
"HINDSIGHT_RETAIN_USER_PREFIX", "HINDSIGHT_RETAIN_ASSISTANT_PREFIX",
):
@@ -251,6 +254,51 @@ class TestConfig:
assert cfg["banks"]["hermes"]["bankId"] == "env-bank"
assert cfg["banks"]["hermes"]["budget"] == "high"
def test_embedded_profile_env_includes_idle_timeout_from_config(self):
env = _build_embedded_profile_env({
"llm_provider": "openai",
"llm_model": "gpt-4o-mini",
"idle_timeout": 0,
})
assert env["HINDSIGHT_EMBED_DAEMON_IDLE_TIMEOUT"] == "0"
def test_embedded_profile_env_includes_idle_timeout_from_env(self, monkeypatch):
monkeypatch.setenv("HINDSIGHT_IDLE_TIMEOUT", "42")
env = _build_embedded_profile_env({
"llm_provider": "openai",
"llm_model": "gpt-4o-mini",
})
assert env["HINDSIGHT_EMBED_DAEMON_IDLE_TIMEOUT"] == "42"
def test_get_client_passes_idle_timeout_to_hindsight_embedded(self, monkeypatch):
captured = {}
class FakeHindsightEmbedded:
def __init__(self, **kwargs):
captured.update(kwargs)
monkeypatch.setitem(sys.modules, "hindsight", SimpleNamespace(HindsightEmbedded=FakeHindsightEmbedded))
monkeypatch.setattr("plugins.memory.hindsight._check_local_runtime", lambda: (True, ""))
p = HindsightMemoryProvider()
p._mode = "local_embedded"
p._config = {
"profile": "hermes",
"llm_provider": "openai_compatible",
"llm_api_key": "test-key",
"llm_model": "test-model",
"idle_timeout": 0,
}
p._llm_base_url = "http://localhost:8060/v1"
p._get_client()
assert captured["idle_timeout"] == 0
assert captured["llm_provider"] == "openai"
class TestPostSetup:
def test_local_embedded_setup_materializes_profile_env(self, tmp_path, monkeypatch):
@@ -272,7 +320,10 @@ class TestPostSetup:
provider.post_setup(str(hermes_home), {"memory": {}})
assert saved_configs[-1]["memory"]["provider"] == "hindsight"
assert (hermes_home / ".env").read_text() == "HINDSIGHT_LLM_API_KEY=sk-local-test\nHINDSIGHT_TIMEOUT=120\n"
env_text = (hermes_home / ".env").read_text()
assert "HINDSIGHT_LLM_API_KEY=sk-local-test\n" in env_text
assert "HINDSIGHT_TIMEOUT=120\n" in env_text
assert "HINDSIGHT_IDLE_TIMEOUT=300\n" in env_text
profile_env = user_home / ".hindsight" / "profiles" / "hermes.env"
assert profile_env.exists()
@@ -281,6 +332,7 @@ class TestPostSetup:
"HINDSIGHT_API_LLM_API_KEY=sk-local-test\n"
"HINDSIGHT_API_LLM_MODEL=gpt-4o-mini\n"
"HINDSIGHT_API_LOG_LEVEL=info\n"
"HINDSIGHT_EMBED_DAEMON_IDLE_TIMEOUT=300\n"
)
def test_local_embedded_setup_respects_existing_profile_name(self, tmp_path, monkeypatch):
@@ -446,6 +498,28 @@ class TestToolHandlers:
))
assert "error" in result
def test_local_embedded_recall_reconnects_after_idle_shutdown(self, provider, monkeypatch):
first_client = _make_mock_client()
first_client.arecall.side_effect = RuntimeError("Cannot connect to host 127.0.0.1:8888")
second_client = _make_mock_client()
second_client.arecall.return_value = SimpleNamespace(
results=[SimpleNamespace(text="Recovered memory")]
)
clients = iter([first_client, second_client])
provider._mode = "local_embedded"
provider._client = first_client
monkeypatch.setattr(provider, "_get_client", lambda: next(clients))
result = json.loads(provider.handle_tool_call(
"hindsight_recall", {"query": "test"}
))
assert result["result"] == "1. Recovered memory"
assert provider._client is second_client
first_client.arecall.assert_called_once()
second_client.arecall.assert_called_once()
# ---------------------------------------------------------------------------
# Prefetch tests
@@ -1102,3 +1176,22 @@ class TestSharedEventLoopLifecycle:
mock_client.aclose.assert_called_once()
assert provider._client is None
class TestShutdown:
def test_local_embedded_shutdown_closes_inner_async_client_on_shared_loop(self, provider):
inner_client = _make_mock_client()
embedded = MagicMock()
embedded._client = inner_client
embedded.close = MagicMock()
provider._mode = "local_embedded"
provider._client = embedded
provider.shutdown()
inner_client.aclose.assert_awaited_once()
embedded.close.assert_called_once()
assert embedded._client is None
assert provider._client is None

View File

@@ -0,0 +1,73 @@
"""Regression tests for background review agent cleanup."""
from __future__ import annotations
import run_agent as run_agent_module
from run_agent import AIAgent
def _bare_agent() -> AIAgent:
agent = object.__new__(AIAgent)
agent.model = "fake-model"
agent.platform = "telegram"
agent.provider = "openai"
agent.base_url = ""
agent.api_key = ""
agent.api_mode = ""
agent.session_id = "test-session"
agent._parent_session_id = ""
agent._credential_pool = None
agent._memory_store = object()
agent._memory_enabled = True
agent._user_profile_enabled = False
agent._MEMORY_REVIEW_PROMPT = "review memory"
agent._SKILL_REVIEW_PROMPT = "review skills"
agent._COMBINED_REVIEW_PROMPT = "review both"
agent.background_review_callback = None
agent.status_callback = None
agent._safe_print = lambda *_args, **_kwargs: None
return agent
class ImmediateThread:
def __init__(self, *, target, daemon=None, name=None):
self._target = target
def start(self):
self._target()
def test_background_review_shuts_down_memory_provider_before_close(monkeypatch):
events = []
class FakeReviewAgent:
def __init__(self, **kwargs):
events.append(("init", kwargs))
self._session_messages = []
def run_conversation(self, **kwargs):
events.append(("run_conversation", kwargs))
def shutdown_memory_provider(self):
events.append(("shutdown_memory_provider", None))
def close(self):
events.append(("close", None))
monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
agent = _bare_agent()
AIAgent._spawn_background_review(
agent,
messages_snapshot=[{"role": "user", "content": "hello"}],
review_memory=True,
)
assert [name for name, _payload in events] == [
"init",
"run_conversation",
"shutdown_memory_provider",
"close",
]

View File

@@ -261,6 +261,42 @@ class TestGatewayMode:
]
assert len(gw_handlers) == 0
def test_gateway_log_created_after_cli_init(self, hermes_home):
"""Gateway mode attaches gateway.log even after earlier CLI init."""
hermes_logging.setup_logging(hermes_home=hermes_home, mode="cli")
hermes_logging.setup_logging(hermes_home=hermes_home, mode="gateway")
root = logging.getLogger()
gw_handlers = [
h for h in root.handlers
if isinstance(h, RotatingFileHandler)
and "gateway.log" in getattr(h, "baseFilename", "")
]
assert len(gw_handlers) == 1
logging.getLogger("gateway.run").info("gateway connected after cli init")
for h in root.handlers:
h.flush()
gw_log = hermes_home / "logs" / "gateway.log"
assert gw_log.exists()
assert "gateway connected after cli init" in gw_log.read_text()
def test_gateway_log_created_after_cli_init_without_duplicate_handlers(self, hermes_home):
"""Repeated gateway setup calls do not attach duplicate gateway handlers."""
hermes_logging.setup_logging(hermes_home=hermes_home, mode="cli")
hermes_logging.setup_logging(hermes_home=hermes_home, mode="gateway")
hermes_logging.setup_logging(hermes_home=hermes_home, mode="gateway")
root = logging.getLogger()
gw_handlers = [
h for h in root.handlers
if isinstance(h, RotatingFileHandler)
and "gateway.log" in getattr(h, "baseFilename", "")
]
assert len(gw_handlers) == 1
def test_gateway_log_receives_gateway_records(self, hermes_home):
"""gateway.log captures records from gateway.* loggers."""
hermes_logging.setup_logging(hermes_home=hermes_home, mode="gateway")

View File

@@ -2010,3 +2010,58 @@ class TestAutoMaintenance:
# Should parse as a float timestamp close to now.
assert abs(float(marker) - time.time()) < 60
def test_auto_prune_deletes_transcript_files(self, db, tmp_path):
"""Issue #3015: auto-prune must also delete on-disk transcript files."""
sessions_dir = tmp_path / "sessions"
sessions_dir.mkdir()
self._make_old_ended(db, "old1", days_old=100)
self._make_old_ended(db, "old2", days_old=100)
db.create_session(session_id="new", source="cli") # active
# Transcript files mimicking real gateway/CLI layout
(sessions_dir / "old1.json").write_text("{}")
(sessions_dir / "old1.jsonl").write_text("{}\n")
(sessions_dir / "old2.jsonl").write_text("{}\n")
(sessions_dir / "request_dump_old1_001.json").write_text("{}")
(sessions_dir / "new.jsonl").write_text("{}\n") # active, must survive
result = db.maybe_auto_prune_and_vacuum(
retention_days=90, sessions_dir=sessions_dir
)
assert result["pruned"] == 2
# Pruned transcript files are gone
assert not (sessions_dir / "old1.json").exists()
assert not (sessions_dir / "old1.jsonl").exists()
assert not (sessions_dir / "old2.jsonl").exists()
assert not (sessions_dir / "request_dump_old1_001.json").exists()
# Active session's transcript is untouched
assert (sessions_dir / "new.jsonl").exists()
def test_auto_prune_without_sessions_dir_preserves_files(self, db, tmp_path):
"""Backward-compat: no sessions_dir = DB-only cleanup (legacy behavior)."""
sessions_dir = tmp_path / "sessions"
sessions_dir.mkdir()
self._make_old_ended(db, "old", days_old=100)
(sessions_dir / "old.jsonl").write_text("{}\n")
result = db.maybe_auto_prune_and_vacuum(retention_days=90)
assert result["pruned"] == 1
# File stays — caller didn't opt in
assert (sessions_dir / "old.jsonl").exists()
def test_prune_sessions_deletes_files_for_pruned_only(self, db, tmp_path):
"""Active-session transcripts must never be deleted by prune."""
sessions_dir = tmp_path / "sessions"
sessions_dir.mkdir()
self._make_old_ended(db, "old", days_old=100)
db.create_session(session_id="active", source="cli") # not ended
(sessions_dir / "old.jsonl").write_text("{}\n")
(sessions_dir / "active.jsonl").write_text("{}\n")
count = db.prune_sessions(older_than_days=90, sessions_dir=sessions_dir)
assert count == 1
assert not (sessions_dir / "old.jsonl").exists()
assert (sessions_dir / "active.jsonl").exists()

View File

@@ -0,0 +1,416 @@
"""
test_yuanbao_integration.py - Yuanbao 模块集成测试
验证各模块能正确组装和交互:
- YuanbaoAdapter 初始化
- Config / Platform 枚举
- get_connected_platforms 逻辑
- Proto 编解码 round-trip
- Markdown 分块
- API / Media 模块 import
- Toolset 注册
"""
import sys
import os
# 确保 hermes-agent 根目录在 sys.path 中
_REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if _REPO_ROOT not in sys.path:
sys.path.insert(0, _REPO_ROOT)
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from gateway.config import Platform, PlatformConfig, GatewayConfig
from gateway.platforms.yuanbao import YuanbaoAdapter
def make_config(**kwargs):
extra = kwargs.pop("extra", {})
extra.setdefault("app_id", "test_key")
extra.setdefault("app_secret", "test_secret")
extra.setdefault("ws_url", "wss://test.example.com/ws")
extra.setdefault("api_domain", "https://test.example.com")
return PlatformConfig(
extra=extra,
**kwargs,
)
# ===========================================================
# 1. Adapter 初始化
# ===========================================================
class TestYuanbaoAdapterInit:
def test_create_adapter(self):
config = make_config()
adapter = YuanbaoAdapter(config)
assert adapter is not None
assert adapter.PLATFORM == Platform.YUANBAO
def test_initial_state(self):
config = make_config()
adapter = YuanbaoAdapter(config)
status = adapter.get_status()
assert status["connected"] == False
assert status["bot_id"] is None
# ===========================================================
# 2. Config / Platform 枚举
# ===========================================================
class TestYuanbaoConfig:
def test_platform_enum(self):
assert Platform.YUANBAO.value == "yuanbao"
def test_config_fields(self):
config = make_config()
assert config.extra["app_id"] == "test_key"
assert config.extra["app_secret"] == "test_secret"
def test_get_connected_platforms_requires_key_and_secret(self):
# Only key, no secret → not in connected list
gw_only_key = GatewayConfig(
platforms={
Platform.YUANBAO: PlatformConfig(
enabled=True,
extra={"app_id": "key"},
)
}
)
platforms = gw_only_key.get_connected_platforms()
assert Platform.YUANBAO not in platforms
# key + secret both present → in connected list
gw_full = GatewayConfig(
platforms={
Platform.YUANBAO: PlatformConfig(
enabled=True,
extra={"app_id": "key", "app_secret": "secret"},
)
}
)
platforms2 = gw_full.get_connected_platforms()
assert Platform.YUANBAO in platforms2
# ===========================================================
# 3. GatewayRunner 注册
# ===========================================================
class TestGatewayRunnerRegistration:
def test_yuanbao_in_platform_enum(self):
"""Platform 枚举包含 YUANBAO"""
assert hasattr(Platform, "YUANBAO")
assert Platform.YUANBAO.value == "yuanbao"
def _make_minimal_runner(self, config):
"""通过 __new__ + 最小初始化绕过 run.py 的模块级 dotenv/ssl 副作用"""
import sys
from unittest.mock import MagicMock
# Stub out heavy dependencies if not already present
stubs = [
"dotenv",
"hermes_cli.env_loader",
"hermes_cli.config",
"hermes_constants",
]
_orig = {}
for mod in stubs:
if mod not in sys.modules:
_orig[mod] = None
sys.modules[mod] = MagicMock()
try:
from gateway.run import GatewayRunner
finally:
# Restore only the ones we injected
for mod, orig in _orig.items():
if orig is None:
sys.modules.pop(mod, None)
runner = GatewayRunner.__new__(GatewayRunner)
runner.config = config
runner.adapters = {}
runner._failed_platforms = {}
runner._session_model_overrides = {}
return runner, GatewayRunner
def test_runner_creates_yuanbao_adapter(self):
"""GatewayRunner._create_adapter 能为 YUANBAO 返回 YuanbaoAdapter 实例"""
from gateway.config import GatewayConfig
from unittest.mock import patch
config = make_config(enabled=True)
gw_config = GatewayConfig(platforms={Platform.YUANBAO: config})
try:
runner, _ = self._make_minimal_runner(gw_config)
# websockets 在测试环境可能未安装mock 掉 WEBSOCKETS_AVAILABLE
with patch("gateway.platforms.yuanbao.WEBSOCKETS_AVAILABLE", True):
adapter = runner._create_adapter(Platform.YUANBAO, config)
except ImportError as e:
pytest.skip(f"run.py import unavailable in test env: {e}")
assert adapter is not None
assert isinstance(adapter, YuanbaoAdapter)
def test_runner_adapter_platform_attr(self):
"""创建的 adapter.PLATFORM 为 Platform.YUANBAO"""
from gateway.config import GatewayConfig
from unittest.mock import patch
config = make_config(enabled=True)
gw_config = GatewayConfig(platforms={Platform.YUANBAO: config})
try:
runner, _ = self._make_minimal_runner(gw_config)
with patch("gateway.platforms.yuanbao.WEBSOCKETS_AVAILABLE", True):
adapter = runner._create_adapter(Platform.YUANBAO, config)
except ImportError as e:
pytest.skip(f"run.py import unavailable in test env: {e}")
assert adapter is not None
assert adapter.PLATFORM == Platform.YUANBAO
# ===========================================================
# 4. Proto round-trip
# ===========================================================
class TestProtoRoundTrip:
"""验证 proto 编解码基本功能"""
def test_conn_msg_roundtrip(self):
from gateway.platforms.yuanbao_proto import encode_conn_msg, decode_conn_msg
encoded = encode_conn_msg(msg_type=1, seq_no=42, data=b"hello")
decoded = decode_conn_msg(encoded)
assert decoded["seq_no"] == 42
assert decoded["data"] == b"hello"
def test_text_elem_encoding(self):
from gateway.platforms.yuanbao_proto import encode_send_c2c_message
msg = encode_send_c2c_message(
to_account="user123",
msg_body=[{"msg_type": "TIMTextElem", "msg_content": {"text": "hello"}}],
from_account="bot456",
)
assert isinstance(msg, bytes)
assert len(msg) > 0
# ===========================================================
# 5. Markdown 分块
# ===========================================================
class TestMarkdownChunking:
def test_chunks_are_sent_separately(self):
from gateway.platforms.yuanbao import MarkdownProcessor
long_text = "paragraph\n\n" * 100
chunks = MarkdownProcessor.chunk_markdown_text(long_text, 200)
assert len(chunks) > 1
for c in chunks:
# 段落原子块允许轻微超限,仅验证不崩溃
assert isinstance(c, str)
assert len(c) > 0
def test_chunk_short_text_no_split(self):
from gateway.platforms.yuanbao import MarkdownProcessor
text = "hello world"
chunks = MarkdownProcessor.chunk_markdown_text(text, 3000)
assert chunks == [text]
# ===========================================================
# 6. Sign Token 模块
# ===========================================================
class TestSignToken:
def test_import_ok(self):
from gateway.platforms.yuanbao import SignManager
assert callable(SignManager.get_token)
assert callable(SignManager.force_refresh)
# ===========================================================
# 6b. ConnectionManager / OutboundManager
# ===========================================================
class TestManagerImports:
def test_connection_manager_import(self):
from gateway.platforms.yuanbao import ConnectionManager
assert ConnectionManager is not None
def test_outbound_manager_import(self):
from gateway.platforms.yuanbao import OutboundManager
assert OutboundManager is not None
def test_message_sender_import(self):
from gateway.platforms.yuanbao import MessageSender
assert MessageSender is not None
def test_heartbeat_manager_import(self):
from gateway.platforms.yuanbao import HeartbeatManager
assert HeartbeatManager is not None
def test_slow_response_notifier_import(self):
from gateway.platforms.yuanbao import SlowResponseNotifier
assert SlowResponseNotifier is not None
def test_adapter_has_outbound_manager(self):
adapter = YuanbaoAdapter(make_config())
from gateway.platforms.yuanbao import ConnectionManager, OutboundManager
assert isinstance(adapter._connection, ConnectionManager)
assert isinstance(adapter._outbound, OutboundManager)
def test_outbound_composes_sub_managers(self):
adapter = YuanbaoAdapter(make_config())
from gateway.platforms.yuanbao import MessageSender, HeartbeatManager, SlowResponseNotifier
assert isinstance(adapter._outbound.sender, MessageSender)
assert isinstance(adapter._outbound.heartbeat, HeartbeatManager)
assert isinstance(adapter._outbound.slow_notifier, SlowResponseNotifier)
# ===========================================================
# 7. Media 模块
# ===========================================================
class TestMediaModule:
def test_import_ok(self):
from gateway.platforms.yuanbao_media import upload_to_cos, download_url
assert callable(upload_to_cos)
assert callable(download_url)
# ===========================================================
# 8. Toolset 注册
# ===========================================================
class TestToolset:
def test_yuanbao_toolset_registered(self):
"""toolsets.py 中存在 hermes-yuanbao 键"""
import importlib
ts = importlib.import_module("toolsets")
assert hasattr(ts, "TOOLSETS") or hasattr(ts, "toolsets")
toolsets_dict = getattr(ts, "TOOLSETS", getattr(ts, "toolsets", {}))
assert "hermes-yuanbao" in toolsets_dict
def test_tools_import(self):
from tools.yuanbao_tools import (
get_group_info,
query_group_members,
send_dm,
)
assert all(callable(f) for f in [
get_group_info,
query_group_members,
send_dm,
])
# ===========================================================
# 9. platforms/__init__.py 导出
# ===========================================================
class TestPlatformInit:
def test_yuanbao_adapter_exported(self):
"""gateway.platforms.__init__.py 应导出 YuanbaoAdapter"""
from gateway.platforms import YuanbaoAdapter as _YuanbaoAdapter
assert _YuanbaoAdapter is YuanbaoAdapter
# ===========================================================
# 10. P0 fixes verification
# ===========================================================
import asyncio
import collections
class TestP0ReconnectGuard:
"""P0-1: _reconnecting flag prevents concurrent reconnect attempts."""
def test_reconnecting_flag_initialized(self):
adapter = YuanbaoAdapter(make_config())
assert hasattr(adapter._connection, '_reconnecting')
assert adapter._connection._reconnecting is False
def test_schedule_reconnect_skips_when_not_running(self):
adapter = YuanbaoAdapter(make_config())
adapter._running = False
adapter._connection._reconnecting = False
adapter._connection.schedule_reconnect()
# No task should be created because _running is False
def test_schedule_reconnect_skips_when_already_reconnecting(self):
adapter = YuanbaoAdapter(make_config())
adapter._running = True
adapter._connection._reconnecting = True
adapter._connection.schedule_reconnect()
# No new task should be created because already reconnecting
class TestP0InboundTaskTracking:
"""P0-2: _inbound_tasks set is initialized and usable."""
def test_inbound_tasks_initialized(self):
adapter = YuanbaoAdapter(make_config())
assert hasattr(adapter, '_inbound_tasks')
assert isinstance(adapter._inbound_tasks, set)
assert len(adapter._inbound_tasks) == 0
class TestP0ChatLockEviction:
"""P0-3: get_chat_lock uses OrderedDict and safe eviction."""
def test_chat_locks_is_ordered_dict(self):
adapter = YuanbaoAdapter(make_config())
assert isinstance(adapter._outbound._chat_locks, collections.OrderedDict)
def test_eviction_skips_locked(self):
"""When eviction is needed, locked entries are skipped."""
adapter = YuanbaoAdapter(make_config())
from gateway.platforms.yuanbao import OutboundManager
# Fill to capacity with unlocked locks
for i in range(OutboundManager.CHAT_DICT_MAX_SIZE):
adapter._outbound._chat_locks[f"chat_{i}"] = asyncio.Lock()
# Lock the oldest entry
oldest_key = next(iter(adapter._outbound._chat_locks))
oldest_lock = adapter._outbound._chat_locks[oldest_key]
# Simulate a held lock by acquiring it in a non-async way (set _locked)
# asyncio.Lock is not held until actually acquired; so we test the
# method logic by acquiring the first lock manually.
# For a sync test, we check that get_chat_lock doesn't crash.
new_lock = adapter._outbound.get_chat_lock("new_chat")
assert "new_chat" in adapter._outbound._chat_locks
assert isinstance(new_lock, asyncio.Lock)
# The oldest unlocked entry should have been evicted
assert len(adapter._outbound._chat_locks) == OutboundManager.CHAT_DICT_MAX_SIZE
def test_move_to_end_on_access(self):
"""Accessing an existing key moves it to the end (MRU)."""
adapter = YuanbaoAdapter(make_config())
adapter._outbound._chat_locks["a"] = asyncio.Lock()
adapter._outbound._chat_locks["b"] = asyncio.Lock()
adapter._outbound._chat_locks["c"] = asyncio.Lock()
# Access "a" — should move to end
adapter._outbound.get_chat_lock("a")
keys = list(adapter._outbound._chat_locks.keys())
assert keys[-1] == "a"
assert keys[0] == "b"
class TestP0PlatformScopedLock:
"""P0-4: connect() calls _acquire_platform_lock."""
def test_adapter_has_platform_lock_methods(self):
adapter = YuanbaoAdapter(make_config())
assert hasattr(adapter, '_acquire_platform_lock')
assert hasattr(adapter, '_release_platform_lock')
if __name__ == "__main__":
pytest.main([__file__, "-v"])

View File

@@ -0,0 +1,324 @@
"""
test_yuanbao_markdown.py - Unit tests for yuanbao_markdown.py
Run (no pytest needed):
cd /root/.openclaw/workspace/hermes-agent
python3 tests/test_yuanbao_markdown.py -v
Or with pytest if available:
python3 -m pytest tests/test_yuanbao_markdown.py -v
"""
import sys
import os
import unittest
# Ensure project root is on the path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from gateway.platforms.yuanbao import MarkdownProcessor
# ============ has_unclosed_fence ============
class TestHasUnclosedFence(unittest.TestCase):
def test_unclosed_fence(self):
self.assertTrue(MarkdownProcessor.has_unclosed_fence("```python\ncode"))
def test_closed_fence(self):
self.assertFalse(MarkdownProcessor.has_unclosed_fence("```python\ncode\n```"))
def test_empty(self):
self.assertFalse(MarkdownProcessor.has_unclosed_fence(""))
def test_no_fence(self):
self.assertFalse(MarkdownProcessor.has_unclosed_fence("just some text\nno fences here"))
def test_multiple_closed_fences(self):
text = "```python\ncode1\n```\n\n```js\ncode2\n```"
self.assertFalse(MarkdownProcessor.has_unclosed_fence(text))
def test_second_fence_unclosed(self):
text = "```python\ncode1\n```\n\n```js\ncode2"
self.assertTrue(MarkdownProcessor.has_unclosed_fence(text))
def test_fence_at_start(self):
self.assertTrue(MarkdownProcessor.has_unclosed_fence("```\nsome code"))
def test_inline_backtick_ignored(self):
text = "`inline code` is fine"
self.assertFalse(MarkdownProcessor.has_unclosed_fence(text))
# ============ ends_with_table_row ============
class TestEndsWithTableRow(unittest.TestCase):
def test_simple_table_row(self):
self.assertTrue(MarkdownProcessor.ends_with_table_row("| col1 | col2 |"))
def test_table_row_with_trailing_newline(self):
self.assertTrue(MarkdownProcessor.ends_with_table_row("| col1 | col2 |\n"))
def test_table_row_in_middle(self):
text = "| col1 | col2 |\nsome other text"
self.assertFalse(MarkdownProcessor.ends_with_table_row(text))
def test_empty(self):
self.assertFalse(MarkdownProcessor.ends_with_table_row(""))
def test_non_table(self):
self.assertFalse(MarkdownProcessor.ends_with_table_row("just a normal line"))
def test_only_pipe_start(self):
self.assertFalse(MarkdownProcessor.ends_with_table_row("| just pipe at start"))
def test_table_separator_row(self):
self.assertTrue(MarkdownProcessor.ends_with_table_row("| --- | --- |"))
def test_whitespace_only(self):
self.assertFalse(MarkdownProcessor.ends_with_table_row(" \n "))
# ============ split_at_paragraph_boundary ============
class TestSplitAtParagraphBoundary(unittest.TestCase):
def test_split_at_empty_line(self):
text = "paragraph one\n\nparagraph two\n\nparagraph three\nextra"
head, tail = MarkdownProcessor.split_at_paragraph_boundary(text, 30)
self.assertLessEqual(len(head), 30)
self.assertEqual(head + tail, text)
def test_split_at_sentence_end(self):
text = "This is a sentence.\nNext line.\nAnother line."
head, tail = MarkdownProcessor.split_at_paragraph_boundary(text, 25)
self.assertLessEqual(len(head), 25)
self.assertEqual(head + tail, text)
def test_forced_split_no_boundary(self):
text = "a" * 100
head, tail = MarkdownProcessor.split_at_paragraph_boundary(text, 50)
self.assertEqual(len(head), 50)
self.assertEqual(head + tail, text)
def test_split_at_newline(self):
text = "line one\nline two\nline three"
head, tail = MarkdownProcessor.split_at_paragraph_boundary(text, 15)
self.assertLessEqual(len(head), 15)
self.assertEqual(head + tail, text)
def test_chinese_sentence_boundary(self):
text = "这是第一句话。\n这是第二句话。\n这是第三句话。"
head, tail = MarkdownProcessor.split_at_paragraph_boundary(text, 15)
self.assertLessEqual(len(head), 15)
self.assertEqual(head + tail, text)
# ============ chunk_markdown_text ============
class TestChunkMarkdownText(unittest.TestCase):
def test_empty(self):
self.assertEqual(MarkdownProcessor.chunk_markdown_text(""), [])
def test_short_text_no_split(self):
text = "hello world"
self.assertEqual(MarkdownProcessor.chunk_markdown_text(text, 3000), [text])
def test_exactly_max_chars(self):
text = "a" * 3000
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
self.assertEqual(len(result), 1)
self.assertEqual(result[0], text)
def test_plain_text_split(self):
"""x * 9000 should return 3 chunks of ~3000"""
text = "x" * 9000
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
self.assertEqual(len(result), 3)
for chunk in result:
self.assertLessEqual(len(chunk), 3000)
self.assertEqual(''.join(result), text)
def test_5000_chars_returns_2(self):
"""验收标准: 'a'*5000 with max 3000 → 2 chunks"""
result = MarkdownProcessor.chunk_markdown_text("a" * 5000, 3000)
self.assertEqual(len(result), 2)
def test_code_fence_not_split(self):
"""代码块不应被切断"""
code_lines = "\n".join([f" line_{i} = {i}" for i in range(200)])
text = f"Some intro text.\n\n```python\n{code_lines}\n```\n\nSome outro text."
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
for chunk in result:
self.assertFalse(MarkdownProcessor.has_unclosed_fence(chunk),
f"Chunk has unclosed fence:\n{chunk[:200]}...")
def test_table_not_split(self):
"""表格行不应被切断"""
header = "| Name | Value | Description |\n| --- | --- | --- |"
rows = "\n".join([f"| item_{i} | {i * 100} | description for item {i} |"
for i in range(50)])
table = f"{header}\n{rows}"
text = "Some intro text.\n\n" + table + "\n\nSome outro text."
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
for chunk in result:
self.assertFalse(MarkdownProcessor.has_unclosed_fence(chunk))
def test_code_fence_200_lines_not_cut(self):
"""包含 200 行代码块的文本,代码块不被切断"""
code_lines = "\n".join([f"x = {i}" for i in range(200)])
text = f"Intro.\n\n```python\n{code_lines}\n```\n\nOutro."
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
for chunk in result:
self.assertFalse(MarkdownProcessor.has_unclosed_fence(chunk))
def test_multiple_paragraphs(self):
"""多段落文本应在段落边界切割"""
paragraphs = ["This is paragraph number " + str(i) + ". " * 50
for i in range(10)]
text = "\n\n".join(paragraphs)
result = MarkdownProcessor.chunk_markdown_text(text, 500)
self.assertGreater(len(result), 1)
total_content = ''.join(result)
self.assertGreaterEqual(len(total_content), len(text) * 0.95)
def test_single_long_line(self):
"""单行超长文本应被强制切割"""
text = "a" * 10000
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
self.assertGreaterEqual(len(result), 3)
for c in result:
self.assertLessEqual(len(c), 3000)
def test_fence_followed_by_text(self):
"""围栏后的文本应正常切割"""
text = "```python\nprint('hi')\n```\n\n" + "Normal text. " * 300
result = MarkdownProcessor.chunk_markdown_text(text, 500)
for chunk in result:
self.assertFalse(MarkdownProcessor.has_unclosed_fence(chunk))
def test_returns_non_empty_strings(self):
"""所有返回的片段都应为非空字符串"""
text = "Hello world!\n\n" * 100
result = MarkdownProcessor.chunk_markdown_text(text, 100)
for chunk in result:
self.assertGreater(len(chunk), 0)
# ============ Acceptance criteria ============
class TestAcceptanceCriteria(unittest.TestCase):
def test_9000_x_returns_3_chunks(self):
"""验收MarkdownProcessor.chunk_markdown_text("x" * 9000, 3000) 返回 3 个片段"""
result = MarkdownProcessor.chunk_markdown_text("x" * 9000, 3000)
self.assertEqual(len(result), 3)
for chunk in result:
self.assertLessEqual(len(chunk), 3000)
def test_5000_a_returns_2_chunks(self):
"""验收python -c 输出 2"""
result = MarkdownProcessor.chunk_markdown_text("a" * 5000, 3000)
self.assertEqual(len(result), 2)
def test_has_unclosed_fence_true(self):
"""验收MarkdownProcessor.has_unclosed_fence("```python\\ncode") 返回 True"""
self.assertTrue(MarkdownProcessor.has_unclosed_fence("```python\ncode"))
def test_has_unclosed_fence_false(self):
"""验收MarkdownProcessor.has_unclosed_fence("```python\\ncode\\n```") 返回 False"""
self.assertFalse(MarkdownProcessor.has_unclosed_fence("```python\ncode\n```"))
def test_code_block_200_lines_not_broken(self):
"""验收:包含 200 行代码块的文本,代码块不被切断"""
code_lines = "\n".join([f" result_{i} = compute({i})" for i in range(200)])
text = f"Introduction.\n\n```python\n{code_lines}\n```\n\nConclusion."
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
for chunk in result:
self.assertFalse(MarkdownProcessor.has_unclosed_fence(chunk),
f"Found unclosed fence in chunk:\n{chunk[:100]}...")
def test_table_rows_not_broken(self):
"""验收:表格行不被切断(每个 chunk 中的表格 fence 完整)"""
rows = "\n".join([
f"| Col A {i} | Col B {i} | Col C {i} |" for i in range(100)
])
text = f"Table:\n\n| A | B | C |\n| --- | --- | --- |\n{rows}\n\nDone."
result = MarkdownProcessor.chunk_markdown_text(text, 500)
for chunk in result:
self.assertFalse(MarkdownProcessor.has_unclosed_fence(chunk))
if __name__ == '__main__':
unittest.main(verbosity=2)
# ============ pytest-style function tests (task specification) ============
def test_short_text_no_split():
assert MarkdownProcessor.chunk_markdown_text("hello", 100) == ["hello"]
def test_plain_text_split():
chunks = MarkdownProcessor.chunk_markdown_text("a" * 5000, 3000)
assert len(chunks) >= 2
for c in chunks:
assert len(c) <= 3000
def test_fence_not_broken():
"""代码块不应被切断"""
code_block = "```python\n" + "x = 1\n" * 200 + "```"
chunks = MarkdownProcessor.chunk_markdown_text(code_block, 1000)
for c in chunks:
assert not MarkdownProcessor.has_unclosed_fence(c), f"Chunk has unclosed fence: {c[:100]}"
def test_large_fence_kept_whole():
"""超大代码块即便超过 max_chars 也应整块输出"""
code_block = "```python\n" + "x = 1\n" * 200 + "```"
chunks = MarkdownProcessor.chunk_markdown_text(code_block, 500)
# 代码块应在同一个 chunk 中(允许超出 max_chars
fence_chunks = [c for c in chunks if "```python" in c]
for c in fence_chunks:
assert not MarkdownProcessor.has_unclosed_fence(c)
def test_mixed_content():
"""代码块前后的普通文本可以正常切割"""
text = "intro paragraph\n\n" + "```python\nx=1\n```" + "\n\noutro paragraph"
chunks = MarkdownProcessor.chunk_markdown_text(text, 100)
for c in chunks:
assert not MarkdownProcessor.has_unclosed_fence(c)
def test_table_not_broken():
"""表格不应被切断"""
table = "| A | B |\n|---|---|\n| 1 | 2 |\n| 3 | 4 |"
text = "before\n\n" + table + "\n\nafter"
chunks = MarkdownProcessor.chunk_markdown_text(text, 30)
table_in_chunk = [c for c in chunks if "|" in c]
for c in table_in_chunk:
lines = [line for line in c.split('\n') if line.strip().startswith('|')]
if lines:
# 至少表格行不被半截切割
pass
def test_has_unclosed_fence():
assert MarkdownProcessor.has_unclosed_fence("```python\ncode") == True
assert MarkdownProcessor.has_unclosed_fence("```python\ncode\n```") == False
assert MarkdownProcessor.has_unclosed_fence("no fence") == False
def test_ends_with_table_row():
assert MarkdownProcessor.ends_with_table_row("| a | b |") == True
assert MarkdownProcessor.ends_with_table_row("normal text") == False
def test_empty_text():
assert MarkdownProcessor.chunk_markdown_text("", 100) == []
def test_exact_limit():
text = "a" * 3000
chunks = MarkdownProcessor.chunk_markdown_text(text, 3000)
assert len(chunks) == 1

File diff suppressed because it is too large Load Diff

654
tests/test_yuanbao_proto.py Normal file
View File

@@ -0,0 +1,654 @@
"""
test_yuanbao_proto.py - yuanbao_proto 单元测试
测试覆盖:
1. varint 编解码 round-trip
2. conn 层 encode/decode round-trip
3. biz 层 encode/decode round-trip
4. decode_inbound_push 解析 TIMTextElem 消息
5. encode_send_c2c_message / encode_send_group_message 编码
6. 固定 bytes 常量验证(防止协议悄悄改动)
7. auth-bind / ping 编码
"""
import sys
import os
# 确保 hermes-agent 根目录在 sys.path 中
_REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if _REPO_ROOT not in sys.path:
sys.path.insert(0, _REPO_ROOT)
import pytest
from gateway.platforms.yuanbao_proto import (
# 基础工具
_encode_varint,
_decode_varint,
_parse_fields,
_fields_to_dict,
_encode_msg_body_element,
_decode_msg_body_element,
_encode_msg_content,
_decode_msg_content,
# conn 层
encode_conn_msg,
decode_conn_msg,
encode_conn_msg_full,
# biz 层
encode_biz_msg,
decode_biz_msg,
# 入站/出站
decode_inbound_push,
encode_send_c2c_message,
encode_send_group_message,
# 帮助函数
encode_auth_bind,
encode_ping,
encode_push_ack,
# 常量
PB_MSG_TYPES,
BIZ_SERVICES,
CMD_TYPE,
CMD,
MODULE,
next_seq_no,
)
# ===========================================================
# 1. varint 编解码
# ===========================================================
class TestVarint:
def test_small_values(self):
for v in [0, 1, 127, 128, 255, 300, 16383, 16384, 2**21, 2**28]:
encoded = _encode_varint(v)
decoded, pos = _decode_varint(encoded, 0)
assert decoded == v, f"round-trip failed for {v}"
assert pos == len(encoded)
def test_zero(self):
assert _encode_varint(0) == b"\x00"
v, p = _decode_varint(b"\x00", 0)
assert v == 0 and p == 1
def test_1_byte_boundary(self):
# 127 = 0x7F => 1 byte
assert _encode_varint(127) == b"\x7f"
# 128 => 2 bytes: 0x80 0x01
assert _encode_varint(128) == b"\x80\x01"
def test_known_values(self):
# protobuf spec examples
# 300 => 0xAC 0x02
assert _encode_varint(300) == bytes([0xAC, 0x02])
def test_multi_byte(self):
# 2^32 - 1 = 4294967295
v = 2**32 - 1
enc = _encode_varint(v)
dec, _ = _decode_varint(enc, 0)
assert dec == v
def test_partial_decode(self):
# 在 offset 处解码
data = b"\x00" + _encode_varint(300) + b"\x00"
v, pos = _decode_varint(data, 1)
assert v == 300
assert pos == 3 # 1 + 2 bytes for 300
# ===========================================================
# 2. conn 层 round-trip
# ===========================================================
class TestConnCodec:
def test_basic_round_trip(self):
payload = b"hello world"
encoded = encode_conn_msg(msg_type=0, seq_no=42, data=payload)
decoded = decode_conn_msg(encoded)
assert decoded["msg_type"] == 0
assert decoded["seq_no"] == 42
assert decoded["data"] == payload
def test_empty_data(self):
encoded = encode_conn_msg(msg_type=2, seq_no=0, data=b"")
decoded = decode_conn_msg(encoded)
assert decoded["msg_type"] == 2
assert decoded["data"] == b""
def test_all_cmd_types(self):
for ct in [0, 1, 2, 3]:
enc = encode_conn_msg(msg_type=ct, seq_no=1, data=b"\x01\x02")
dec = decode_conn_msg(enc)
assert dec["msg_type"] == ct
def test_large_seq_no(self):
enc = encode_conn_msg(msg_type=1, seq_no=2**32 - 1, data=b"x")
dec = decode_conn_msg(enc)
assert dec["seq_no"] == 2**32 - 1
def test_full_round_trip(self):
"""encode_conn_msg_full 含 cmd/msg_id/module"""
enc = encode_conn_msg_full(
cmd_type=CMD_TYPE["Request"],
cmd="auth-bind",
seq_no=99,
msg_id="abc123",
module="conn_access",
data=b"\xde\xad\xbe\xef",
)
dec = decode_conn_msg(enc)
head = dec["head"]
assert head["cmd_type"] == CMD_TYPE["Request"]
assert head["cmd"] == "auth-bind"
assert head["seq_no"] == 99
assert head["msg_id"] == "abc123"
assert head["module"] == "conn_access"
assert dec["data"] == b"\xde\xad\xbe\xef"
# 固定 bytes 常量测试——防协议悄悄改动
def test_fixed_bytes_simple(self):
"""
encode_conn_msg(msg_type=0, seq_no=1, data=b"") 的固定编码。
ConnMsg { head { seq_no=1 } }
head bytes: field3 varint(1) = 0x18 0x01
head field: field1 len(2) 0x18 0x01 = 0x0a 0x02 0x18 0x01
"""
enc = encode_conn_msg(msg_type=0, seq_no=1, data=b"")
# head: field 3 (seq_no=1) => tag=0x18, value=0x01
head_content = bytes([0x18, 0x01])
# outer field 1 (head message)
expected = bytes([0x0a, len(head_content)]) + head_content
assert enc == expected, f"got: {enc.hex()}, expected: {expected.hex()}"
# ===========================================================
# 3. biz 层 round-trip
# ===========================================================
class TestBizCodec:
def test_round_trip(self):
body = b"\x0a\x05hello"
enc = encode_biz_msg(
service="trpc.yuanbao.example",
method="/im/send_c2c_msg",
req_id="req-001",
body=body,
)
dec = decode_biz_msg(enc)
assert dec["service"] == "trpc.yuanbao.example"
assert dec["method"] == "/im/send_c2c_msg"
assert dec["req_id"] == "req-001"
assert dec["body"] == body
assert dec["is_response"] is False
def test_is_response_flag(self):
# Response cmd_type = 1
enc = encode_conn_msg_full(
cmd_type=CMD_TYPE["Response"],
cmd="/im/send_c2c_msg",
seq_no=1,
msg_id="rsp-001",
module="svc",
data=b"\x01",
)
dec = decode_biz_msg(enc)
assert dec["is_response"] is True
def test_empty_body(self):
enc = encode_biz_msg("svc", "method", "id1", b"")
dec = decode_biz_msg(enc)
assert dec["body"] == b""
assert dec["method"] == "method"
# ===========================================================
# 4. MsgContent / MsgBodyElement 编解码
# ===========================================================
class TestMsgBodyElement:
def test_text_elem_round_trip(self):
el = {
"msg_type": "TIMTextElem",
"msg_content": {"text": "Hello, 世界!"},
}
encoded = _encode_msg_body_element(el)
decoded = _decode_msg_body_element(encoded)
assert decoded["msg_type"] == "TIMTextElem"
assert decoded["msg_content"]["text"] == "Hello, 世界!"
def test_image_elem_round_trip(self):
el = {
"msg_type": "TIMImageElem",
"msg_content": {
"uuid": "img-uuid-123",
"image_format": 2,
"url": "https://example.com/img.jpg",
"image_info_array": [
{"type": 1, "size": 1024, "width": 100, "height": 200, "url": "https://thumb.jpg"},
],
},
}
encoded = _encode_msg_body_element(el)
decoded = _decode_msg_body_element(encoded)
assert decoded["msg_type"] == "TIMImageElem"
mc = decoded["msg_content"]
assert mc["uuid"] == "img-uuid-123"
assert mc["image_format"] == 2
assert mc["url"] == "https://example.com/img.jpg"
assert len(mc["image_info_array"]) == 1
assert mc["image_info_array"][0]["url"] == "https://thumb.jpg"
def test_file_elem_round_trip(self):
el = {
"msg_type": "TIMFileElem",
"msg_content": {
"url": "https://example.com/file.pdf",
"file_size": 204800,
"file_name": "document.pdf",
},
}
enc = _encode_msg_body_element(el)
dec = _decode_msg_body_element(enc)
assert dec["msg_content"]["file_name"] == "document.pdf"
assert dec["msg_content"]["file_size"] == 204800
def test_custom_elem_round_trip(self):
el = {
"msg_type": "TIMCustomElem",
"msg_content": {
"data": '{"key":"value"}',
"desc": "custom description",
"ext": "extra info",
},
}
enc = _encode_msg_body_element(el)
dec = _decode_msg_body_element(enc)
assert dec["msg_content"]["data"] == '{"key":"value"}'
assert dec["msg_content"]["desc"] == "custom description"
def test_empty_content(self):
el = {"msg_type": "TIMTextElem", "msg_content": {}}
enc = _encode_msg_body_element(el)
dec = _decode_msg_body_element(enc)
assert dec["msg_type"] == "TIMTextElem"
def test_fixed_text_elem_bytes(self):
"""
固定 bytes 验证TIMTextElem { text="hi" }
MsgBodyElement:
field1 (msg_type="TIMTextElem"): 0a 0b 54494d5465787445 6c656d
field2 (msg_content): 12 <len> <content>
MsgContent field1 (text="hi"): 0a 02 6869
"""
el = {
"msg_type": "TIMTextElem",
"msg_content": {"text": "hi"},
}
enc = _encode_msg_body_element(el)
# 手动计算期望值
# msg_type = "TIMTextElem" (11 bytes)
type_bytes = b"TIMTextElem"
# MsgContent: field1(text="hi") = tag(0a) + len(02) + "hi"
content_inner = bytes([0x0a, 0x02]) + b"hi"
# MsgBodyElement:
# field1: tag=0x0a, len=11, type_bytes
# field2: tag=0x12, len=len(content_inner), content_inner
expected = (
bytes([0x0a, len(type_bytes)]) + type_bytes
+ bytes([0x12, len(content_inner)]) + content_inner
)
assert enc == expected, f"got {enc.hex()}, expected {expected.hex()}"
# ===========================================================
# 5. decode_inbound_push 测试
# ===========================================================
class TestDecodeInboundPush:
def _build_inbound_push_bytes(
self,
from_account: str = "user123",
to_account: str = "bot456",
group_code: str = "",
msg_key: str = "key-001",
msg_seq: int = 12345,
text: str = "Hello!",
) -> bytes:
"""手工构造 InboundMessagePush bytes与 proto 字段顺序一致)"""
from gateway.platforms.yuanbao_proto import (
_encode_field, _encode_string, _encode_message,
_encode_varint, WT_LEN, WT_VARINT,
)
el = {
"msg_type": "TIMTextElem",
"msg_content": {"text": text},
}
el_bytes = _encode_msg_body_element(el)
buf = b""
buf += _encode_field(2, WT_LEN, _encode_string(from_account)) # from_account
buf += _encode_field(3, WT_LEN, _encode_string(to_account)) # to_account
if group_code:
buf += _encode_field(6, WT_LEN, _encode_string(group_code)) # group_code
buf += _encode_field(8, WT_VARINT, _encode_varint(msg_seq)) # msg_seq
buf += _encode_field(11, WT_LEN, _encode_string(msg_key)) # msg_key
buf += _encode_field(13, WT_LEN, _encode_message(el_bytes)) # msg_body[0]
return buf
def test_basic_c2c_text_message(self):
raw = self._build_inbound_push_bytes(
from_account="alice",
to_account="bot",
msg_key="k001",
msg_seq=100,
text="你好",
)
result = decode_inbound_push(raw)
assert result is not None
assert result["from_account"] == "alice"
assert result["to_account"] == "bot"
assert result["msg_seq"] == 100
assert result["msg_key"] == "k001"
assert len(result["msg_body"]) == 1
assert result["msg_body"][0]["msg_type"] == "TIMTextElem"
assert result["msg_body"][0]["msg_content"]["text"] == "你好"
def test_group_message(self):
raw = self._build_inbound_push_bytes(
from_account="bob",
to_account="bot",
group_code="group-789",
msg_seq=999,
text="group msg",
)
result = decode_inbound_push(raw)
assert result is not None
assert result["group_code"] == "group-789"
assert result["msg_body"][0]["msg_content"]["text"] == "group msg"
def test_returns_none_on_empty(self):
# 空 bytes 应返回空字段 dict而不是 None
result = decode_inbound_push(b"")
# 空消息解析结果是 {}(无字段),过滤后 msg_body=[] 也会保留
assert result is not None or result is None # 不崩溃即可
def test_multiple_msg_body_elements(self):
from gateway.platforms.yuanbao_proto import (
_encode_field, _encode_message, WT_LEN,
)
el1 = _encode_msg_body_element(
{"msg_type": "TIMTextElem", "msg_content": {"text": "part1"}}
)
el2 = _encode_msg_body_element(
{"msg_type": "TIMTextElem", "msg_content": {"text": "part2"}}
)
buf = (
_encode_field(2, WT_LEN, b"\x05alice")
+ _encode_field(13, WT_LEN, _encode_message(el1))
+ _encode_field(13, WT_LEN, _encode_message(el2))
)
result = decode_inbound_push(buf)
assert result is not None
assert len(result["msg_body"]) == 2
assert result["msg_body"][0]["msg_content"]["text"] == "part1"
assert result["msg_body"][1]["msg_content"]["text"] == "part2"
# ===========================================================
# 6. 出站消息编码
# ===========================================================
class TestEncodeOutbound:
def test_encode_send_c2c_message(self):
msg_body = [{"msg_type": "TIMTextElem", "msg_content": {"text": "hi"}}]
result = encode_send_c2c_message(
to_account="user_b",
msg_body=msg_body,
from_account="bot",
msg_id="msg-001",
)
assert isinstance(result, bytes)
assert len(result) > 0
# 解码验证 ConnMsg 结构
dec = decode_conn_msg(result)
assert dec["head"]["cmd"] == "send_c2c_message"
assert dec["head"]["msg_id"] == "msg-001"
assert dec["head"]["module"] == "yuanbao_openclaw_proxy"
assert len(dec["data"]) > 0
def test_encode_send_group_message(self):
msg_body = [{"msg_type": "TIMTextElem", "msg_content": {"text": "group hello"}}]
result = encode_send_group_message(
group_code="grp-100",
msg_body=msg_body,
from_account="bot",
msg_id="msg-002",
)
assert isinstance(result, bytes)
dec = decode_conn_msg(result)
assert dec["head"]["cmd"] == "send_group_message"
assert dec["head"]["msg_id"] == "msg-002"
assert len(dec["data"]) > 0
def test_c2c_biz_payload_contains_to_account(self):
"""验证 biz payload 包含 to_account 字段"""
from gateway.platforms.yuanbao_proto import _parse_fields, _fields_to_dict, _get_string
msg_body = [{"msg_type": "TIMTextElem", "msg_content": {"text": "test"}}]
result = encode_send_c2c_message(
to_account="target_user",
msg_body=msg_body,
from_account="bot",
)
dec = decode_conn_msg(result)
biz_data = dec["data"]
fdict = _fields_to_dict(_parse_fields(biz_data))
to_acc = _get_string(fdict, 2) # SendC2CMessageReq.to_account = field 2
assert to_acc == "target_user"
def test_group_biz_payload_contains_group_code(self):
from gateway.platforms.yuanbao_proto import _parse_fields, _fields_to_dict, _get_string
msg_body = [{"msg_type": "TIMTextElem", "msg_content": {"text": "test"}}]
result = encode_send_group_message(
group_code="group-xyz",
msg_body=msg_body,
from_account="bot",
)
dec = decode_conn_msg(result)
biz_data = dec["data"]
fdict = _fields_to_dict(_parse_fields(biz_data))
grp = _get_string(fdict, 2) # SendGroupMessageReq.group_code = field 2
assert grp == "group-xyz"
# ===========================================================
# 7. AuthBind / Ping 编码
# ===========================================================
class TestAuthAndPing:
def test_encode_auth_bind(self):
result = encode_auth_bind(
biz_id="ybBot",
uid="user_001",
source="app",
token="tok_abc",
msg_id="auth-001",
app_version="1.0.0",
operation_system="Linux",
bot_version="0.1.0",
)
assert isinstance(result, bytes)
dec = decode_conn_msg(result)
assert dec["head"]["cmd"] == "auth-bind"
assert dec["head"]["module"] == "conn_access"
assert dec["head"]["msg_id"] == "auth-001"
assert len(dec["data"]) > 0
def test_encode_ping(self):
result = encode_ping("ping-001")
assert isinstance(result, bytes)
dec = decode_conn_msg(result)
assert dec["head"]["cmd"] == "ping"
assert dec["head"]["module"] == "conn_access"
def test_encode_push_ack(self):
original_head = {
"cmd_type": CMD_TYPE["Push"],
"cmd": "some-push",
"seq_no": 100,
"msg_id": "push-001",
"module": "im_module",
"need_ack": True,
"status": 0,
}
result = encode_push_ack(original_head)
dec = decode_conn_msg(result)
assert dec["head"]["cmd_type"] == CMD_TYPE["PushAck"]
assert dec["head"]["cmd"] == "some-push"
assert dec["head"]["msg_id"] == "push-001"
# ===========================================================
# 8. 常量验证
# ===========================================================
class TestConstants:
def test_pb_msg_types_keys(self):
assert "ConnMsg" in PB_MSG_TYPES
assert "AuthBindReq" in PB_MSG_TYPES
assert "PingReq" in PB_MSG_TYPES
assert "KickoutMsg" in PB_MSG_TYPES
assert "PushMsg" in PB_MSG_TYPES
def test_biz_services_keys(self):
assert "SendC2CMessageReq" in BIZ_SERVICES
assert "SendGroupMessageReq" in BIZ_SERVICES
assert "InboundMessagePush" in BIZ_SERVICES
def test_cmd_type_values(self):
assert CMD_TYPE["Request"] == 0
assert CMD_TYPE["Response"] == 1
assert CMD_TYPE["Push"] == 2
assert CMD_TYPE["PushAck"] == 3
def test_pkg_prefix(self):
for k, v in BIZ_SERVICES.items():
assert v.startswith("yuanbao_openclaw_proxy"), \
f"{k}: unexpected prefix in {v}"
# ===========================================================
# 9. seq_no 生成
# ===========================================================
class TestSeqNo:
def test_monotonic(self):
a = next_seq_no()
b = next_seq_no()
c = next_seq_no()
assert b > a
assert c > b
def test_thread_safety(self):
import threading
results = []
lock = threading.Lock()
def worker():
for _ in range(100):
v = next_seq_no()
with lock:
results.append(v)
threads = [threading.Thread(target=worker) for _ in range(10)]
for t in threads:
t.start()
for t in threads:
t.join()
# 无重复
assert len(results) == len(set(results)), "duplicate seq_no detected"
# ===========================================================
# 10. 完整端到端流程(模拟 send -> recv
# ===========================================================
class TestEndToEnd:
def test_send_recv_c2c(self):
"""模拟发送 C2C 消息,然后(在接收方)解码"""
msg_body = [
{"msg_type": "TIMTextElem", "msg_content": {"text": "端到端测试"}},
]
# 发送方编码
wire_bytes = encode_send_c2c_message(
to_account="recv_user",
msg_body=msg_body,
from_account="send_bot",
msg_id="e2e-001",
)
# 接收方解码 ConnMsg
dec = decode_conn_msg(wire_bytes)
assert dec["head"]["cmd"] == "send_c2c_message"
assert dec["head"]["msg_id"] == "e2e-001"
# 从 biz payload 中读取 to_account 和 msg_body
from gateway.platforms.yuanbao_proto import (
_parse_fields, _fields_to_dict, _get_string, _get_repeated_bytes, WT_LEN
)
biz = dec["data"]
fdict = _fields_to_dict(_parse_fields(biz))
assert _get_string(fdict, 2) == "recv_user" # to_account
assert _get_string(fdict, 3) == "send_bot" # from_account
el_list = _get_repeated_bytes(fdict, 5) # msg_body repeated
assert len(el_list) == 1
el_dec = _decode_msg_body_element(el_list[0])
assert el_dec["msg_type"] == "TIMTextElem"
assert el_dec["msg_content"]["text"] == "端到端测试"
def test_inbound_push_full_flow(self):
"""构造服务端 push -> 解码入站消息"""
from gateway.platforms.yuanbao_proto import (
_encode_field, _encode_string, _encode_message,
_encode_varint, WT_LEN, WT_VARINT,
)
# 构造入站消息 biz payload
el_bytes = _encode_msg_body_element(
{"msg_type": "TIMTextElem", "msg_content": {"text": "server push"}}
)
biz_payload = (
_encode_field(2, WT_LEN, _encode_string("alice"))
+ _encode_field(3, WT_LEN, _encode_string("bot"))
+ _encode_field(6, WT_LEN, _encode_string("grp-001"))
+ _encode_field(8, WT_VARINT, _encode_varint(555))
+ _encode_field(11, WT_LEN, _encode_string("msg-key-xyz"))
+ _encode_field(13, WT_LEN, _encode_message(el_bytes))
)
# 封装成 ConnMsg模拟服务端 push
wire = encode_conn_msg_full(
cmd_type=CMD_TYPE["Push"],
cmd="/im/new_message",
seq_no=77,
msg_id="push-abc",
module="yuanbao_openclaw_proxy",
data=biz_payload,
need_ack=True,
)
# 接收方解码
conn = decode_conn_msg(wire)
assert conn["head"]["cmd_type"] == CMD_TYPE["Push"]
assert conn["head"]["need_ack"] is True
msg = decode_inbound_push(conn["data"])
assert msg is not None
assert msg["from_account"] == "alice"
assert msg["group_code"] == "grp-001"
assert msg["msg_seq"] == 555
assert msg["msg_key"] == "msg-key-xyz"
assert msg["msg_body"][0]["msg_content"]["text"] == "server push"
if __name__ == "__main__":
pytest.main([__file__, "-v"])

View File

@@ -235,3 +235,21 @@ class TestPostRedirectSsrf:
assert result["success"] is True
assert result["url"] == final
class TestAllowPrivateUrlsConfig:
@pytest.fixture(autouse=True)
def _reset_cache(self):
browser_tool._allow_private_urls_resolved = False
browser_tool._cached_allow_private_urls = None
yield
browser_tool._allow_private_urls_resolved = False
browser_tool._cached_allow_private_urls = None
def test_browser_config_string_false_stays_disabled(self, monkeypatch):
monkeypatch.setattr(
"hermes_cli.config.read_raw_config",
lambda: {"browser": {"allow_private_urls": "false"}},
)
assert browser_tool._allow_private_urls() is False

View File

@@ -717,3 +717,193 @@ class TestGpgAndGlobalConfigIsolation:
mgr = CheckpointManager(enabled=True)
assert mgr.ensure_checkpoint(str(work_dir), reason="prefix-shadow") is True
assert len(mgr.list_checkpoints(str(work_dir))) == 1
# =========================================================================
# Auto-maintenance: prune_checkpoints + maybe_auto_prune_checkpoints
# =========================================================================
class TestPruneCheckpoints:
"""Sweep orphan/stale shadow repos under CHECKPOINT_BASE (issue #3015 follow-up)."""
def _seed_shadow_repo(
self, base: Path, dir_hash: str, workdir: Path, mtime: float = None
) -> Path:
"""Create a minimal shadow repo on disk without invoking real git."""
import time as _time
shadow = base / dir_hash
shadow.mkdir(parents=True)
(shadow / "HEAD").write_text("ref: refs/heads/main\n")
(shadow / "HERMES_WORKDIR").write_text(str(workdir) + "\n")
(shadow / "info").mkdir()
(shadow / "info" / "exclude").write_text("node_modules/\n")
if mtime is not None:
for p in shadow.rglob("*"):
import os
os.utime(p, (mtime, mtime))
import os
os.utime(shadow, (mtime, mtime))
return shadow
def test_deletes_orphan_when_workdir_missing(self, tmp_path):
from tools.checkpoint_manager import prune_checkpoints
base = tmp_path / "checkpoints"
alive_work = tmp_path / "alive"
alive_work.mkdir()
alive_repo = self._seed_shadow_repo(base, "aaaa" * 4, alive_work)
orphan_repo = self._seed_shadow_repo(
base, "bbbb" * 4, tmp_path / "was-deleted"
)
result = prune_checkpoints(retention_days=0, checkpoint_base=base)
assert result["scanned"] == 2
assert result["deleted_orphan"] == 1
assert result["deleted_stale"] == 0
assert alive_repo.exists()
assert not orphan_repo.exists()
def test_deletes_stale_by_mtime_when_workdir_alive(self, tmp_path):
from tools.checkpoint_manager import prune_checkpoints
import time as _time
base = tmp_path / "checkpoints"
work = tmp_path / "work"
work.mkdir()
fresh_repo = self._seed_shadow_repo(base, "cccc" * 4, work)
stale_work = tmp_path / "stale_work"
stale_work.mkdir()
old = _time.time() - 60 * 86400 # 60 days ago
stale_repo = self._seed_shadow_repo(base, "dddd" * 4, stale_work, mtime=old)
result = prune_checkpoints(
retention_days=30, delete_orphans=False, checkpoint_base=base
)
assert result["deleted_orphan"] == 0
assert result["deleted_stale"] == 1
assert fresh_repo.exists()
assert not stale_repo.exists()
def test_orphan_takes_priority_over_stale(self, tmp_path):
"""Orphan detection counts first — reason="orphan" even if also stale."""
from tools.checkpoint_manager import prune_checkpoints
import time as _time
base = tmp_path / "checkpoints"
old = _time.time() - 60 * 86400
self._seed_shadow_repo(base, "eeee" * 4, tmp_path / "gone", mtime=old)
result = prune_checkpoints(retention_days=30, checkpoint_base=base)
assert result["deleted_orphan"] == 1
assert result["deleted_stale"] == 0
def test_delete_orphans_disabled_keeps_orphans(self, tmp_path):
from tools.checkpoint_manager import prune_checkpoints
base = tmp_path / "checkpoints"
orphan = self._seed_shadow_repo(base, "ffff" * 4, tmp_path / "gone")
result = prune_checkpoints(
retention_days=0, delete_orphans=False, checkpoint_base=base
)
assert result["deleted_orphan"] == 0
assert orphan.exists()
def test_skips_non_shadow_dirs(self, tmp_path):
"""Dirs without HEAD (non-initialised) are left alone."""
from tools.checkpoint_manager import prune_checkpoints
base = tmp_path / "checkpoints"
base.mkdir()
(base / "garbage-dir").mkdir()
(base / "garbage-dir" / "random.txt").write_text("hi")
result = prune_checkpoints(retention_days=0, checkpoint_base=base)
assert result["scanned"] == 0
assert (base / "garbage-dir").exists()
def test_tracks_bytes_freed(self, tmp_path):
from tools.checkpoint_manager import prune_checkpoints
base = tmp_path / "checkpoints"
orphan = self._seed_shadow_repo(base, "1234" * 4, tmp_path / "gone")
(orphan / "objects").mkdir()
(orphan / "objects" / "pack.bin").write_bytes(b"x" * 5000)
result = prune_checkpoints(retention_days=0, checkpoint_base=base)
assert result["deleted_orphan"] == 1
assert result["bytes_freed"] >= 5000
def test_base_missing_returns_empty_counts(self, tmp_path):
from tools.checkpoint_manager import prune_checkpoints
result = prune_checkpoints(checkpoint_base=tmp_path / "does-not-exist")
assert result == {
"scanned": 0, "deleted_orphan": 0, "deleted_stale": 0,
"errors": 0, "bytes_freed": 0,
}
class TestMaybeAutoPruneCheckpoints:
def _seed(self, base, dir_hash, workdir):
base.mkdir(parents=True, exist_ok=True)
shadow = base / dir_hash
shadow.mkdir()
(shadow / "HEAD").write_text("ref: refs/heads/main\n")
(shadow / "HERMES_WORKDIR").write_text(str(workdir) + "\n")
return shadow
def test_first_call_prunes_and_writes_marker(self, tmp_path):
from tools.checkpoint_manager import maybe_auto_prune_checkpoints
base = tmp_path / "checkpoints"
self._seed(base, "0000" * 4, tmp_path / "gone")
out = maybe_auto_prune_checkpoints(checkpoint_base=base)
assert out["skipped"] is False
assert out["result"]["deleted_orphan"] == 1
assert (base / ".last_prune").exists()
def test_second_call_within_interval_skips(self, tmp_path):
from tools.checkpoint_manager import maybe_auto_prune_checkpoints
base = tmp_path / "checkpoints"
self._seed(base, "1111" * 4, tmp_path / "gone")
first = maybe_auto_prune_checkpoints(
checkpoint_base=base, min_interval_hours=24
)
assert first["skipped"] is False
self._seed(base, "2222" * 4, tmp_path / "also-gone")
second = maybe_auto_prune_checkpoints(
checkpoint_base=base, min_interval_hours=24
)
assert second["skipped"] is True
# The second orphan must still exist — skip was honoured.
assert (base / ("2222" * 4)).exists()
def test_corrupt_marker_treated_as_no_prior_run(self, tmp_path):
from tools.checkpoint_manager import maybe_auto_prune_checkpoints
base = tmp_path / "checkpoints"
base.mkdir()
(base / ".last_prune").write_text("not-a-timestamp")
self._seed(base, "3333" * 4, tmp_path / "gone")
out = maybe_auto_prune_checkpoints(checkpoint_base=base)
assert out["skipped"] is False
assert out["result"]["deleted_orphan"] == 1
def test_missing_base_no_raise(self, tmp_path):
from tools.checkpoint_manager import maybe_auto_prune_checkpoints
out = maybe_auto_prune_checkpoints(
checkpoint_base=tmp_path / "does-not-exist"
)
assert out["skipped"] is False
assert out["result"]["scanned"] == 0

View File

@@ -16,8 +16,11 @@ from unittest.mock import patch, MagicMock
from tools.file_tools import (
read_file_tool,
write_file_tool,
reset_file_dedup,
_is_blocked_device,
_invalidate_dedup_for_path,
_READ_DEDUP_STATUS_MESSAGE,
_get_max_read_chars,
_DEFAULT_MAX_READ_CHARS,
_read_tracker,
@@ -161,7 +164,7 @@ class TestFileDedup(unittest.TestCase):
@patch("tools.file_tools._get_file_ops")
def test_second_read_returns_dedup_stub(self, mock_ops):
"""Second read of same file+range returns dedup stub."""
"""Second read of same file+range returns non-content dedup status."""
mock_ops.return_value = _make_fake_ops(
content="line one\nline two\n", file_size=20,
)
@@ -172,7 +175,83 @@ class TestFileDedup(unittest.TestCase):
# Second read — should get dedup stub
r2 = json.loads(read_file_tool(self._tmpfile, task_id="dup"))
self.assertTrue(r2.get("dedup"), "Second read should return dedup stub")
self.assertIn("unchanged", r2.get("content", ""))
self.assertEqual(r2.get("status"), "unchanged")
self.assertIn("unchanged", r2.get("message", ""))
self.assertFalse(r2.get("content_returned"))
self.assertNotIn("content", r2)
@patch("tools.file_tools._get_file_ops")
def test_write_rejects_internal_read_status_text(self, mock_ops):
"""write_file must not persist internal read_file status text."""
fake = MagicMock()
fake.write_file = MagicMock()
mock_ops.return_value = fake
result = json.loads(write_file_tool(
self._tmpfile,
_READ_DEDUP_STATUS_MESSAGE,
task_id="guard",
))
self.assertIn("error", result)
self.assertIn("internal read_file status text", result["error"])
fake.write_file.assert_not_called()
@patch("tools.file_tools._get_file_ops")
def test_write_rejects_status_text_with_small_framing(self, mock_ops):
"""write_file rejects small wrappers around the status text too.
Real-world corruption shapes aren't always the verbatim message — the
model sometimes prepends a short note or appends a trailing comment
before calling write_file. A short, status-dominated write is still
corruption, not legitimate file content.
"""
fake = MagicMock()
fake.write_file = MagicMock()
mock_ops.return_value = fake
wrapped = "Note: " + _READ_DEDUP_STATUS_MESSAGE + "\n\n(continuing.)"
result = json.loads(write_file_tool(
self._tmpfile,
wrapped,
task_id="guard",
))
self.assertIn("error", result)
self.assertIn("internal read_file status text", result["error"])
fake.write_file.assert_not_called()
@patch("tools.file_tools._get_file_ops")
def test_write_allows_large_file_that_quotes_status_text(self, mock_ops):
"""Legitimate large content that happens to quote the status is allowed.
Hermes' own docs / SKILL.md files may legitimately mention the dedup
message verbatim. Only short, status-dominated writes are rejected —
a normal file that contains the message as one line out of many must
still write successfully.
"""
fake = MagicMock()
fake.write_file = lambda path, content: MagicMock(
to_dict=lambda: {"success": True, "path": path}
)
mock_ops.return_value = fake
# Build content that contains the status text but is much larger,
# so the status doesn't "dominate" — this is a legitimate file.
large_content = (
"# Skill reference\n\n"
"Example internal message (do not write back):\n\n"
f" {_READ_DEDUP_STATUS_MESSAGE}\n\n"
+ ("This is documentation content. " * 200)
)
result = json.loads(write_file_tool(
self._tmpfile,
large_content,
task_id="guard",
))
self.assertNotIn("error", result)
self.assertTrue(result.get("success"))
@patch("tools.file_tools._get_file_ops")
def test_modified_file_not_deduped(self, mock_ops):
@@ -374,5 +453,174 @@ class TestConfigOverride(unittest.TestCase):
self.assertIn("content", result)
# ---------------------------------------------------------------------------
# Write invalidates dedup cache (fixes #13144)
# ---------------------------------------------------------------------------
class TestWriteInvalidatesDedup(unittest.TestCase):
"""write_file_tool and patch_tool must invalidate the read_file dedup
cache for the written path. Without this, a read→write→read sequence
within the same mtime second returns a stale 'File unchanged' stub.
Regression test for https://github.com/NousResearch/hermes-agent/issues/13144
"""
def setUp(self):
_read_tracker.clear()
self._tmpdir = tempfile.mkdtemp()
self._tmpfile = os.path.join(self._tmpdir, "write_dedup.txt")
with open(self._tmpfile, "w") as f:
f.write("original content\n")
def tearDown(self):
_read_tracker.clear()
try:
os.unlink(self._tmpfile)
os.rmdir(self._tmpdir)
except OSError:
pass
@patch("tools.file_tools._get_file_ops")
def test_write_invalidates_dedup_same_second(self, mock_ops):
"""read→write→read within the same mtime second returns fresh content.
This is the core #13144 scenario: on filesystems with ≥1ms mtime
granularity, a write that lands in the same timestamp as the prior
read would previously cause the second read to return a stale dedup
stub because the mtime comparison saw no change.
"""
fake = MagicMock()
fake.read_file = lambda path, offset=1, limit=500: _FakeReadResult(
content="original content\n", total_lines=1, file_size=18,
)
fake.write_file = lambda path, content: MagicMock(
to_dict=lambda: {"success": True, "path": path}
)
mock_ops.return_value = fake
# 1. Read — populates dedup cache.
r1 = json.loads(read_file_tool(self._tmpfile, task_id="wr"))
self.assertNotEqual(r1.get("dedup"), True)
# 2. Write — must invalidate dedup for this path.
# (No sleep — we intentionally stay in the same mtime second.)
write_file_tool(self._tmpfile, "new content\n", task_id="wr")
# 3. Read again — should get full content, NOT dedup stub.
fake.read_file = lambda path, offset=1, limit=500: _FakeReadResult(
content="new content\n", total_lines=1, file_size=13,
)
r2 = json.loads(read_file_tool(self._tmpfile, task_id="wr"))
self.assertNotEqual(r2.get("dedup"), True,
"read after write must not return dedup stub")
self.assertIn("content", r2)
@patch("tools.file_tools._get_file_ops")
def test_write_invalidates_all_offsets(self, mock_ops):
"""A write invalidates dedup entries for ALL offset/limit combos."""
fake = MagicMock()
fake.read_file = lambda path, offset=1, limit=500: _FakeReadResult(
content="line1\nline2\nline3\n", total_lines=3, file_size=20,
)
fake.write_file = lambda path, content: MagicMock(
to_dict=lambda: {"success": True, "path": path}
)
mock_ops.return_value = fake
# Read with different offsets to populate multiple dedup entries.
read_file_tool(self._tmpfile, offset=1, limit=100, task_id="off")
read_file_tool(self._tmpfile, offset=50, limit=100, task_id="off")
# Write — should invalidate BOTH dedup entries.
write_file_tool(self._tmpfile, "replaced\n", task_id="off")
# Both reads should return fresh content.
r1 = json.loads(read_file_tool(self._tmpfile, offset=1, limit=100, task_id="off"))
r2 = json.loads(read_file_tool(self._tmpfile, offset=50, limit=100, task_id="off"))
self.assertNotEqual(r1.get("dedup"), True,
"offset=1 should not dedup after write")
self.assertNotEqual(r2.get("dedup"), True,
"offset=50 should not dedup after write")
@patch("tools.file_tools._get_file_ops")
def test_write_does_not_invalidate_other_files(self, mock_ops):
"""Writing file A should not invalidate dedup for file B."""
other = os.path.join(self._tmpdir, "other.txt")
with open(other, "w") as f:
f.write("other content\n")
fake = MagicMock()
fake.read_file = lambda path, offset=1, limit=500: _FakeReadResult(
content="other content\n", total_lines=1, file_size=15,
)
fake.write_file = lambda path, content: MagicMock(
to_dict=lambda: {"success": True, "path": path}
)
mock_ops.return_value = fake
# Read file B.
read_file_tool(other, task_id="iso")
# Write file A.
write_file_tool(self._tmpfile, "changed A\n", task_id="iso")
# File B should still dedup (untouched).
r2 = json.loads(read_file_tool(other, task_id="iso"))
self.assertTrue(r2.get("dedup"),
"Unrelated file should still dedup after writing another file")
try:
os.unlink(other)
except OSError:
pass
@patch("tools.file_tools._get_file_ops")
def test_write_does_not_invalidate_other_tasks(self, mock_ops):
"""Writing in task A should not invalidate dedup for task B."""
fake = MagicMock()
fake.read_file = lambda path, offset=1, limit=500: _FakeReadResult(
content="original content\n", total_lines=1, file_size=18,
)
fake.write_file = lambda path, content: MagicMock(
to_dict=lambda: {"success": True, "path": path}
)
mock_ops.return_value = fake
# Both tasks read the file.
read_file_tool(self._tmpfile, task_id="taskA")
read_file_tool(self._tmpfile, task_id="taskB")
# Task A writes.
write_file_tool(self._tmpfile, "new\n", task_id="taskA")
# Task A's dedup should be invalidated.
rA = json.loads(read_file_tool(self._tmpfile, task_id="taskA"))
self.assertNotEqual(rA.get("dedup"), True,
"Writing task's dedup should be invalidated")
# Task B still sees dedup (its cache is separate — the file
# *may* have changed on disk, but mtime comparison handles that;
# here we test that invalidation is scoped to the writing task).
# Note: on real FS, task B's dedup might or might not hit depending
# on mtime. The point is that _invalidate_dedup_for_path is
# correctly scoped to task_id.
def test_invalidate_dedup_for_path_noop_on_missing_task(self):
"""_invalidate_dedup_for_path is safe when task_id doesn't exist."""
_read_tracker.clear()
# Should not raise.
_invalidate_dedup_for_path("/nonexistent/path", "no_such_task")
def test_invalidate_dedup_for_path_noop_on_empty_dedup(self):
"""_invalidate_dedup_for_path is safe when dedup dict is empty."""
_read_tracker.clear()
_read_tracker["t"] = {
"last_key": None, "consecutive": 0,
"read_history": set(), "dedup": {},
}
_invalidate_dedup_for_path("/some/path", "t")
self.assertEqual(_read_tracker["t"]["dedup"], {})
if __name__ == "__main__":
unittest.main()

View File

@@ -81,37 +81,51 @@ class TestStdioPidTracking:
def test_kill_orphaned_noop_when_empty(self):
"""_kill_orphaned_mcp_children does nothing when no PIDs tracked."""
from tools.mcp_tool import _kill_orphaned_mcp_children, _stdio_pids, _lock
from tools.mcp_tool import (
_kill_orphaned_mcp_children,
_orphan_stdio_pids,
_stdio_pids,
_lock,
)
with _lock:
_stdio_pids.clear()
_orphan_stdio_pids.clear()
# Should not raise
_kill_orphaned_mcp_children()
def test_kill_orphaned_handles_dead_pids(self):
"""_kill_orphaned_mcp_children gracefully handles already-dead PIDs."""
from tools.mcp_tool import _kill_orphaned_mcp_children, _stdio_pids, _lock
from tools.mcp_tool import (
_kill_orphaned_mcp_children,
_orphan_stdio_pids,
_lock,
)
# Use a PID that definitely doesn't exist
fake_pid = 999999999
with _lock:
_stdio_pids[fake_pid] = "test"
_orphan_stdio_pids.add(fake_pid)
# Should not raise (ProcessLookupError is caught)
_kill_orphaned_mcp_children()
with _lock:
assert fake_pid not in _stdio_pids
assert fake_pid not in _orphan_stdio_pids
def test_kill_orphaned_uses_sigkill_when_available(self, monkeypatch):
"""SIGTERM-first then SIGKILL after 2s for orphan cleanup."""
from tools.mcp_tool import _kill_orphaned_mcp_children, _stdio_pids, _lock
from tools.mcp_tool import (
_kill_orphaned_mcp_children,
_orphan_stdio_pids,
_lock,
)
fake_pid = 424242
with _lock:
_stdio_pids.clear()
_stdio_pids[fake_pid] = "test"
_orphan_stdio_pids.clear()
_orphan_stdio_pids.add(fake_pid)
fake_sigkill = 9
monkeypatch.setattr(signal, "SIGKILL", fake_sigkill, raising=False)
@@ -128,16 +142,20 @@ class TestStdioPidTracking:
mock_sleep.assert_called_once_with(2)
with _lock:
assert fake_pid not in _stdio_pids
assert fake_pid not in _orphan_stdio_pids
def test_kill_orphaned_falls_back_without_sigkill(self, monkeypatch):
"""Without SIGKILL, SIGTERM is used for both phases."""
from tools.mcp_tool import _kill_orphaned_mcp_children, _stdio_pids, _lock
from tools.mcp_tool import (
_kill_orphaned_mcp_children,
_orphan_stdio_pids,
_lock,
)
fake_pid = 434343
with _lock:
_stdio_pids.clear()
_stdio_pids[fake_pid] = "test"
_orphan_stdio_pids.clear()
_orphan_stdio_pids.add(fake_pid)
monkeypatch.delattr(signal, "SIGKILL", raising=False)
@@ -150,7 +168,7 @@ class TestStdioPidTracking:
assert mock_sleep.called
with _lock:
assert fake_pid not in _stdio_pids
assert fake_pid not in _orphan_stdio_pids
# ---------------------------------------------------------------------------

View File

@@ -317,6 +317,7 @@ class TestBuiltinDiscovery:
"tools.tts_tool",
"tools.vision_tools",
"tools.web_tools",
"tools.yuanbao_tools",
}
with patch("tools.registry.importlib.import_module"):

View File

@@ -167,6 +167,39 @@ class TestSendMessageTool:
media_files=[],
)
def test_mirror_receives_current_session_user_id(self):
config, _telegram_cfg = _make_config()
with patch("gateway.config.load_gateway_config", return_value=config), \
patch("tools.interrupt.is_interrupted", return_value=False), \
patch("model_tools._run_async", side_effect=_run_async_immediately), \
patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})), \
patch("gateway.session_context.get_session_env") as get_session_env_mock, \
patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock:
get_session_env_mock.side_effect = lambda name, default="": {
"HERMES_SESSION_PLATFORM": "telegram",
"HERMES_SESSION_USER_ID": "user-123",
}.get(name, default)
result = json.loads(
send_message_tool(
{
"action": "send",
"target": "telegram:12345",
"message": "hello",
}
)
)
assert result["success"] is True
mirror_mock.assert_called_once_with(
"telegram",
"12345",
"hello",
source_label="telegram",
thread_id=None,
user_id="user-123",
)
def test_top_level_send_failure_redacts_query_token(self):
config, _telegram_cfg = _make_config()
leaked = "very-secret-query-token-123456"
@@ -810,6 +843,44 @@ class TestParseTargetRefE164:
assert _parse_target_ref("matrix", "+15551234567")[2] is False
class TestParseTargetRefSlack:
"""_parse_target_ref recognizes Slack channel/user IDs as explicit."""
def test_public_channel_id_is_explicit(self):
chat_id, thread_id, is_explicit = _parse_target_ref("slack", "C0B0QV5434G")
assert chat_id == "C0B0QV5434G"
assert thread_id is None
assert is_explicit is True
def test_private_channel_id_is_explicit(self):
assert _parse_target_ref("slack", "G123ABCDEF")[2] is True
def test_dm_id_is_explicit(self):
assert _parse_target_ref("slack", "D123ABCDEF")[2] is True
def test_user_id_is_not_explicit(self):
"""Slack user IDs (U...) and workspace IDs (W...) are NOT explicit send
targets. chat.postMessage rejects them — a DM must be opened first via
conversations.open to obtain a D... conversation ID.
"""
assert _parse_target_ref("slack", "U123ABCDEF")[2] is False
assert _parse_target_ref("slack", "W123ABCDEF")[2] is False
def test_whitespace_is_stripped(self):
chat_id, _, is_explicit = _parse_target_ref("slack", " C0B0QV5434G ")
assert chat_id == "C0B0QV5434G"
assert is_explicit is True
def test_lowercase_or_short_id_is_not_explicit(self):
assert _parse_target_ref("slack", "c0b0qv5434g")[2] is False
assert _parse_target_ref("slack", "C123")[2] is False
assert _parse_target_ref("slack", "X0B0QV5434G")[2] is False
def test_slack_id_not_explicit_for_other_platforms(self):
assert _parse_target_ref("discord", "C0B0QV5434G")[2] is False
assert _parse_target_ref("telegram", "C0B0QV5434G")[2] is False
class TestSendDiscordThreadId:
"""_send_discord uses thread_id when provided."""

View File

@@ -10,6 +10,7 @@ from tools.session_search_tool import (
_format_conversation,
_truncate_around_matches,
_get_session_search_max_concurrency,
_list_recent_sessions,
_HIDDEN_SESSION_SOURCES,
MAX_SESSION_CHARS,
SESSION_SEARCH_SCHEMA,
@@ -240,6 +241,54 @@ class TestSessionSearchConcurrency:
assert max_seen["value"] == 1
class TestRecentSessionListing:
def test_current_child_session_excludes_root_lineage_even_when_child_id_is_longer(self):
from unittest.mock import MagicMock
mock_db = MagicMock()
mock_db.list_sessions_rich.return_value = [
{
"id": "root",
"title": "Current conversation",
"source": "cli",
"started_at": 1709500000,
"last_active": 1709500100,
"message_count": 4,
"preview": "current root",
"parent_session_id": None,
},
{
"id": "other_session",
"title": "Other conversation",
"source": "cli",
"started_at": 1709400000,
"last_active": 1709400100,
"message_count": 3,
"preview": "other root",
"parent_session_id": None,
},
]
def _get_session(session_id):
if session_id == "child_session_id_that_is_definitely_longer":
return {"parent_session_id": "root"}
if session_id == "root":
return {"parent_session_id": None}
return None
mock_db.get_session.side_effect = _get_session
result = json.loads(_list_recent_sessions(
mock_db,
limit=5,
current_session_id="child_session_id_that_is_definitely_longer",
))
assert result["success"] is True
assert [item["session_id"] for item in result["results"]] == ["other_session"]
assert all(item["session_id"] != "root" for item in result["results"])
# =========================================================================
# session_search (dispatcher)
# =========================================================================

View File

@@ -0,0 +1,107 @@
"""
Regression tests for the shared-container task_id mapping.
The top-level agent and all delegate_task subagents share a single
terminal sandbox keyed by ``"default"``. ``_resolve_container_task_id``
is the sole gatekeeper for which tool-call task_ids go to the shared
container vs. get their own isolated sandbox. RL / benchmark
environments opt in to isolation by calling
``register_task_env_overrides(task_id, {...})`` before the agent loop;
every other task_id collapses back to ``"default"``.
If you change the collapse logic, update both the helper and these
tests -- see `hermes-agent-dev` skill, "Why do subagents get their own
containers?" section, and the Container lifecycle paragraph under
Docker Backend in ``website/docs/user-guide/configuration.md``.
"""
import pytest
from tools import terminal_tool
@pytest.fixture(autouse=True)
def _clean_overrides():
"""Ensure no stray overrides from other tests leak in."""
before = dict(terminal_tool._task_env_overrides)
terminal_tool._task_env_overrides.clear()
yield
terminal_tool._task_env_overrides.clear()
terminal_tool._task_env_overrides.update(before)
def test_none_task_id_maps_to_default():
assert terminal_tool._resolve_container_task_id(None) == "default"
def test_empty_task_id_maps_to_default():
assert terminal_tool._resolve_container_task_id("") == "default"
def test_literal_default_stays_default():
assert terminal_tool._resolve_container_task_id("default") == "default"
def test_subagent_task_id_collapses_to_default():
# delegate_task constructs IDs like "subagent-<N>-<uuid_hex>"; these
# should share the parent's container, not spin up their own.
assert terminal_tool._resolve_container_task_id("subagent-0-deadbeef") == "default"
assert terminal_tool._resolve_container_task_id("subagent-42-cafef00d") == "default"
def test_arbitrary_session_id_collapses_to_default():
# Session UUIDs or anything else without an override still collapse.
assert terminal_tool._resolve_container_task_id("sess-123e4567-e89b-12d3") == "default"
def test_rl_task_with_override_keeps_its_own_id():
# RL / benchmark pattern: register a per-task image, then the task_id
# must survive ``_resolve_container_task_id`` so the rollout lands in
# its own sandbox.
terminal_tool.register_task_env_overrides(
"tb2-task-fix-git", {"docker_image": "tb2:fix-git", "cwd": "/app"}
)
try:
assert (
terminal_tool._resolve_container_task_id("tb2-task-fix-git")
== "tb2-task-fix-git"
)
finally:
terminal_tool.clear_task_env_overrides("tb2-task-fix-git")
def test_cleared_override_collapses_again():
terminal_tool.register_task_env_overrides("tb2-x", {"docker_image": "x:y"})
assert terminal_tool._resolve_container_task_id("tb2-x") == "tb2-x"
terminal_tool.clear_task_env_overrides("tb2-x")
assert terminal_tool._resolve_container_task_id("tb2-x") == "default"
def test_get_active_env_reads_shared_container_from_subagent_id():
"""``get_active_env`` must see the shared ``"default"`` sandbox when
called with a subagent's task_id, so the agent loop's turn-budget
enforcement reads the real env (not None) during delegation."""
sentinel = object()
terminal_tool._active_environments["default"] = sentinel
try:
assert terminal_tool.get_active_env("subagent-7-cafe") is sentinel
assert terminal_tool.get_active_env(None) is sentinel
assert terminal_tool.get_active_env("default") is sentinel
finally:
terminal_tool._active_environments.pop("default", None)
def test_get_active_env_honours_rl_override():
rl_env = object()
default_env = object()
terminal_tool._active_environments["default"] = default_env
terminal_tool._active_environments["rl-42"] = rl_env
terminal_tool.register_task_env_overrides("rl-42", {"docker_image": "x"})
try:
# With an override registered, lookup returns the task's own env,
# not the shared "default" one.
assert terminal_tool.get_active_env("rl-42") is rl_env
finally:
terminal_tool.clear_task_env_overrides("rl-42")
terminal_tool._active_environments.pop("default", None)
terminal_tool._active_environments.pop("rl-42", None)

View File

@@ -22,6 +22,7 @@ from tools.tool_backend_helpers import (
managed_nous_tools_enabled,
normalize_browser_cloud_provider,
normalize_modal_mode,
prefers_gateway,
resolve_modal_backend_state,
resolve_openai_audio_api_key,
)
@@ -189,6 +190,27 @@ class TestHasDirectModalCredentials:
assert has_direct_modal_credentials() is True
# ---------------------------------------------------------------------------
# prefers_gateway
# ---------------------------------------------------------------------------
class TestPrefersGateway:
"""Honor bool-ish config values for tool gateway routing."""
def test_returns_false_for_quoted_false(self, monkeypatch):
monkeypatch.setattr(
"hermes_cli.config.load_config",
lambda: {"web": {"use_gateway": "false"}},
)
assert prefers_gateway("web") is False
def test_returns_true_for_quoted_true(self, monkeypatch):
monkeypatch.setattr(
"hermes_cli.config.load_config",
lambda: {"web": {"use_gateway": "true"}},
)
assert prefers_gateway("web") is True
# ---------------------------------------------------------------------------
# resolve_modal_backend_state
# ---------------------------------------------------------------------------

View File

@@ -259,6 +259,20 @@ class TestGlobalAllowPrivateUrls:
with patch("hermes_cli.config.read_raw_config", return_value=cfg):
assert _global_allow_private_urls() is True
def test_config_security_string_false_stays_disabled(self, monkeypatch):
"""Quoted false must not opt out of SSRF protection."""
monkeypatch.delenv("HERMES_ALLOW_PRIVATE_URLS", raising=False)
cfg = {"security": {"allow_private_urls": "false"}}
with patch("hermes_cli.config.read_raw_config", return_value=cfg):
assert _global_allow_private_urls() is False
def test_config_browser_string_false_stays_disabled(self, monkeypatch):
"""Legacy browser.allow_private_urls also normalises quoted false."""
monkeypatch.delenv("HERMES_ALLOW_PRIVATE_URLS", raising=False)
cfg = {"browser": {"allow_private_urls": "false"}}
with patch("hermes_cli.config.read_raw_config", return_value=cfg):
assert _global_allow_private_urls() is False
def test_config_security_takes_precedence_over_browser(self, monkeypatch):
"""security section is checked before browser section."""
monkeypatch.delenv("HERMES_ALLOW_PRIVATE_URLS", raising=False)