From a8b7db35b2173f9adef77fd46a0e06a080b2149a Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 14 Apr 2026 22:07:28 -0700
Subject: [PATCH 01/25] fix: interrupt agent immediately when user messages
 during active run (#10068)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a user sends a message while the agent is executing a task on the
gateway, the agent is now interrupted immediately — not silently queued.
Previously, messages were stored in _pending_messages with zero feedback
to the user, potentially leaving them waiting 1+ hours.

Root cause: Level 1 guard (base.py) intercepted all messages for active
sessions and returned with no response. Level 2 (gateway/run.py) which
calls agent.interrupt() was never reached.

Fix: Expand _handle_active_session_busy_message to handle the normal
(non-draining) case:
  1. Call running_agent.interrupt(text) to abort in-flight tool calls
     and exit the agent loop at the next check point
  2. Store the message as pending so it becomes the next turn once the
     interrupted run returns
  3. Send a brief ack: 'Interrupting current task (10 min elapsed,
     iteration 21/60, running: terminal). I'll respond shortly.'
  4. Debounce acks to once per 30s to avoid spam on rapid messages

Reported by @Lonely__MH.
---
 gateway/run.py                         | 106 +++++++--
 tests/gateway/test_busy_session_ack.py | 293 +++++++++++++++++++++++++
 2 files changed, 385 insertions(+), 14 deletions(-)
 create mode 100644 tests/gateway/test_busy_session_ack.py

diff --git a/gateway/run.py b/gateway/run.py
index 1bef295c3..cfb4af82e 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -573,6 +573,7 @@ class GatewayRunner:
         self._running_agents: Dict[str, Any] = {}
         self._running_agents_ts: Dict[str, float] = {}  # start timestamp per session
         self._pending_messages: Dict[str, str] = {}  # Queued messages during interrupt
+        self._busy_ack_ts: Dict[str, float] = {}  # last busy-ack timestamp per session (debounce)
 
         # Cache AIAgent instances per session to preserve prompt caching.
         # Without this, a new AIAgent is created per message, rebuilding the
@@ -1329,26 +1330,100 @@ class GatewayRunner:
         merge_pending_message_event(adapter._pending_messages, session_key, event)
 
     async def _handle_active_session_busy_message(self, event: MessageEvent, session_key: str) -> bool:
-        if not self._draining:
-            return False
+        # --- Draining case (gateway restarting/stopping) ---
+        if self._draining:
+            adapter = self.adapters.get(event.source.platform)
+            if not adapter:
+                return True
+
+            thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
+            if self._queue_during_drain_enabled():
+                self._queue_or_replace_pending_event(session_key, event)
+                message = f"⏳ Gateway {self._status_action_gerund()} — queued for the next turn after it comes back."
+            else:
+                message = f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
+
+            await adapter._send_with_retry(
+                chat_id=event.source.chat_id,
+                content=message,
+                reply_to=event.message_id,
+                metadata=thread_meta,
+            )
+            return True
+
+        # --- Normal busy case (agent actively running a task) ---
+        # The user sent a message while the agent is working.  Interrupt the
+        # agent immediately so it stops the current tool-calling loop and
+        # processes the new message.  The pending message is stored in the
+        # adapter so the base adapter picks it up once the interrupted run
+        # returns.  A brief ack tells the user what's happening (debounced
+        # to avoid spam when they fire multiple messages quickly).
 
         adapter = self.adapters.get(event.source.platform)
         if not adapter:
-            return True
+            return False  # let default path handle it
+
+        # Store the message so it's processed as the next turn after the
+        # interrupt causes the current run to exit.
+        from gateway.platforms.base import merge_pending_message_event
+        merge_pending_message_event(adapter._pending_messages, session_key, event)
+
+        # Interrupt the running agent — this aborts in-flight tool calls and
+        # causes the agent loop to exit at the next check point.
+        running_agent = self._running_agents.get(session_key)
+        if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
+            try:
+                running_agent.interrupt(event.text)
+            except Exception:
+                pass  # don't let interrupt failure block the ack
+
+        # Debounce: only send an acknowledgment once every 30 seconds per session
+        # to avoid spamming the user when they send multiple messages quickly
+        _BUSY_ACK_COOLDOWN = 30
+        now = time.time()
+        last_ack = self._busy_ack_ts.get(session_key, 0)
+        if now - last_ack < _BUSY_ACK_COOLDOWN:
+            return True  # interrupt sent, ack already delivered recently
+
+        self._busy_ack_ts[session_key] = now
+
+        # Build a status-rich acknowledgment
+        status_parts = []
+        if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
+            try:
+                summary = running_agent.get_activity_summary()
+                iteration = summary.get("api_call_count", 0)
+                max_iter = summary.get("max_iterations", 0)
+                current_tool = summary.get("current_tool")
+                start_ts = self._running_agents_ts.get(session_key, 0)
+                if start_ts:
+                    elapsed_min = int((now - start_ts) / 60)
+                    if elapsed_min > 0:
+                        status_parts.append(f"{elapsed_min} min elapsed")
+                if max_iter:
+                    status_parts.append(f"iteration {iteration}/{max_iter}")
+                if current_tool:
+                    status_parts.append(f"running: {current_tool}")
+            except Exception:
+                pass
+
+        status_detail = f" ({', '.join(status_parts)})" if status_parts else ""
+        message = (
+            f"⚡ Interrupting current task{status_detail}. "
+            f"I'll respond to your message shortly."
+        )
 
         thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
-        if self._queue_during_drain_enabled():
-            self._queue_or_replace_pending_event(session_key, event)
-            message = f"⏳ Gateway {self._status_action_gerund()} — queued for the next turn after it comes back."
-        else:
-            message = f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
+        try:
+            await adapter._send_with_retry(
+                chat_id=event.source.chat_id,
+                content=message,
+                reply_to=event.message_id,
+                metadata=thread_meta,
+            )
+        except Exception as e:
+            logger.debug("Failed to send busy-ack: %s", e)
 
-        await adapter._send_with_retry(
-            chat_id=event.source.chat_id,
-            content=message,
-            reply_to=event.message_id,
-            metadata=thread_meta,
-        )
         return True
 
     async def _drain_active_agents(self, timeout: float) -> tuple[Dict[str, Any], bool]:
@@ -2237,6 +2312,8 @@ class GatewayRunner:
             self._running_agents.clear()
             self._pending_messages.clear()
             self._pending_approvals.clear()
+            if hasattr(self, '_busy_ack_ts'):
+                self._busy_ack_ts.clear()
             self._shutdown_event.set()
 
             # Global cleanup: kill any remaining tool subprocesses not tied
@@ -2721,6 +2798,7 @@ class GatewayRunner:
                 )
                 del self._running_agents[_quick_key]
                 self._running_agents_ts.pop(_quick_key, None)
+                self._busy_ack_ts.pop(_quick_key, None)
 
         if _quick_key in self._running_agents:
             if event.get_command() == "status":
diff --git a/tests/gateway/test_busy_session_ack.py b/tests/gateway/test_busy_session_ack.py
new file mode 100644
index 000000000..07fe5fa27
--- /dev/null
+++ b/tests/gateway/test_busy_session_ack.py
@@ -0,0 +1,293 @@
+"""Tests for busy-session acknowledgment when user sends messages during active agent runs.
+
+Verifies that users get an immediate status response instead of total silence
+when the agent is working on a task. See PR fix for the @Lonely__MH report.
+"""
+import asyncio
+import time
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Minimal stubs so we can import gateway code without heavy deps
+# ---------------------------------------------------------------------------
+import sys, types
+
+_tg = types.ModuleType("telegram")
+_tg.constants = types.ModuleType("telegram.constants")
+_ct = MagicMock()
+_ct.SUPERGROUP = "supergroup"
+_ct.GROUP = "group"
+_ct.PRIVATE = "private"
+_tg.constants.ChatType = _ct
+sys.modules.setdefault("telegram", _tg)
+sys.modules.setdefault("telegram.constants", _tg.constants)
+sys.modules.setdefault("telegram.ext", types.ModuleType("telegram.ext"))
+
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+    SessionSource,
+    build_session_key,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_event(text="hello", chat_id="123", platform_val="telegram"):
+    """Build a minimal MessageEvent."""
+    source = SessionSource(
+        platform=MagicMock(value=platform_val),
+        chat_id=chat_id,
+        chat_type="private",
+        user_id="user1",
+    )
+    evt = MessageEvent(
+        text=text,
+        message_type=MessageType.TEXT,
+        source=source,
+        message_id="msg1",
+    )
+    return evt
+
+
+def _make_runner():
+    """Build a minimal GatewayRunner-like object for testing."""
+    from gateway.run import GatewayRunner, _AGENT_PENDING_SENTINEL
+
+    runner = object.__new__(GatewayRunner)
+    runner._running_agents = {}
+    runner._running_agents_ts = {}
+    runner._pending_messages = {}
+    runner._busy_ack_ts = {}
+    runner._draining = False
+    runner.adapters = {}
+    runner.config = MagicMock()
+    runner.session_store = None
+    runner.hooks = MagicMock()
+    runner.hooks.emit = AsyncMock()
+    return runner, _AGENT_PENDING_SENTINEL
+
+
+def _make_adapter(platform_val="telegram"):
+    """Build a minimal adapter mock."""
+    adapter = MagicMock()
+    adapter._pending_messages = {}
+    adapter._send_with_retry = AsyncMock()
+    adapter.config = MagicMock()
+    adapter.config.extra = {}
+    adapter.platform = MagicMock(value=platform_val)
+    return adapter
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+class TestBusySessionAck:
+    """User sends a message while agent is running — should get acknowledgment."""
+
+    @pytest.mark.asyncio
+    async def test_sends_ack_when_agent_running(self):
+        """First message during busy session should get a status ack."""
+        runner, sentinel = _make_runner()
+        adapter = _make_adapter()
+
+        event = _make_event(text="Are you working?")
+        sk = build_session_key(event.source)
+
+        # Simulate running agent
+        agent = MagicMock()
+        agent.get_activity_summary.return_value = {
+            "api_call_count": 21,
+            "max_iterations": 60,
+            "current_tool": "terminal",
+            "last_activity_ts": time.time(),
+            "last_activity_desc": "terminal",
+            "seconds_since_activity": 1.0,
+        }
+        runner._running_agents[sk] = agent
+        runner._running_agents_ts[sk] = time.time() - 600  # 10 min ago
+        runner.adapters[event.source.platform] = adapter
+
+        result = await runner._handle_active_session_busy_message(event, sk)
+
+        assert result is True  # handled
+        # Verify ack was sent
+        adapter._send_with_retry.assert_called_once()
+        call_kwargs = adapter._send_with_retry.call_args
+        content = call_kwargs.kwargs.get("content") or call_kwargs[1].get("content", "")
+        if not content and call_kwargs.args:
+            # positional args
+            content = str(call_kwargs)
+        assert "Interrupting" in content or "respond" in content
+        assert "/stop" not in content  # no need — we ARE interrupting
+
+        # Verify message was queued in adapter pending
+        assert sk in adapter._pending_messages
+
+        # Verify agent interrupt was called
+        agent.interrupt.assert_called_once_with("Are you working?")
+
+    @pytest.mark.asyncio
+    async def test_debounce_suppresses_rapid_acks(self):
+        """Second message within 30s should NOT send another ack."""
+        runner, sentinel = _make_runner()
+        adapter = _make_adapter()
+
+        event1 = _make_event(text="hello?")
+        # Reuse the same source so platform mock matches
+        event2 = MessageEvent(
+            text="still there?",
+            message_type=MessageType.TEXT,
+            source=event1.source,
+            message_id="msg2",
+        )
+        sk = build_session_key(event1.source)
+
+        agent = MagicMock()
+        agent.get_activity_summary.return_value = {
+            "api_call_count": 5,
+            "max_iterations": 60,
+            "current_tool": None,
+            "last_activity_ts": time.time(),
+            "last_activity_desc": "api_call",
+            "seconds_since_activity": 0.5,
+        }
+        runner._running_agents[sk] = agent
+        runner._running_agents_ts[sk] = time.time() - 60
+        runner.adapters[event1.source.platform] = adapter
+
+        # First message — should get ack
+        result1 = await runner._handle_active_session_busy_message(event1, sk)
+        assert result1 is True
+        assert adapter._send_with_retry.call_count == 1
+
+        # Second message within cooldown — should be queued but no ack
+        result2 = await runner._handle_active_session_busy_message(event2, sk)
+        assert result2 is True
+        assert adapter._send_with_retry.call_count == 1  # still 1, no new ack
+
+        # But interrupt should still be called for both
+        assert agent.interrupt.call_count == 2
+
+    @pytest.mark.asyncio
+    async def test_ack_after_cooldown_expires(self):
+        """After 30s cooldown, a new message should send a fresh ack."""
+        runner, sentinel = _make_runner()
+        adapter = _make_adapter()
+
+        event = _make_event(text="hello?")
+        sk = build_session_key(event.source)
+
+        agent = MagicMock()
+        agent.get_activity_summary.return_value = {
+            "api_call_count": 10,
+            "max_iterations": 60,
+            "current_tool": "web_search",
+            "last_activity_ts": time.time(),
+            "last_activity_desc": "tool",
+            "seconds_since_activity": 0.5,
+        }
+        runner._running_agents[sk] = agent
+        runner._running_agents_ts[sk] = time.time() - 120
+        runner.adapters[event.source.platform] = adapter
+
+        # First ack
+        await runner._handle_active_session_busy_message(event, sk)
+        assert adapter._send_with_retry.call_count == 1
+
+        # Fake that cooldown expired
+        runner._busy_ack_ts[sk] = time.time() - 31
+
+        # Second ack should go through
+        await runner._handle_active_session_busy_message(event, sk)
+        assert adapter._send_with_retry.call_count == 2
+
+    @pytest.mark.asyncio
+    async def test_includes_status_detail(self):
+        """Ack message should include iteration and tool info when available."""
+        runner, sentinel = _make_runner()
+        adapter = _make_adapter()
+
+        event = _make_event(text="yo")
+        sk = build_session_key(event.source)
+
+        agent = MagicMock()
+        agent.get_activity_summary.return_value = {
+            "api_call_count": 21,
+            "max_iterations": 60,
+            "current_tool": "terminal",
+            "last_activity_ts": time.time(),
+            "last_activity_desc": "terminal",
+            "seconds_since_activity": 0.5,
+        }
+        runner._running_agents[sk] = agent
+        runner._running_agents_ts[sk] = time.time() - 600  # 10 min
+        runner.adapters[event.source.platform] = adapter
+
+        await runner._handle_active_session_busy_message(event, sk)
+
+        call_kwargs = adapter._send_with_retry.call_args
+        content = call_kwargs.kwargs.get("content", "")
+        assert "21/60" in content  # iteration
+        assert "terminal" in content  # current tool
+        assert "10 min" in content  # elapsed
+
+    @pytest.mark.asyncio
+    async def test_draining_still_works(self):
+        """Draining case should still produce the drain-specific message."""
+        runner, sentinel = _make_runner()
+        runner._draining = True
+        adapter = _make_adapter()
+
+        event = _make_event(text="hello")
+        sk = build_session_key(event.source)
+        runner.adapters[event.source.platform] = adapter
+
+        # Mock the drain-specific methods
+        runner._queue_during_drain_enabled = lambda: False
+        runner._status_action_gerund = lambda: "restarting"
+
+        result = await runner._handle_active_session_busy_message(event, sk)
+        assert result is True
+
+        call_kwargs = adapter._send_with_retry.call_args
+        content = call_kwargs.kwargs.get("content", "")
+        assert "restarting" in content
+
+    @pytest.mark.asyncio
+    async def test_pending_sentinel_no_interrupt(self):
+        """When agent is PENDING_SENTINEL, don't call interrupt (it has no method)."""
+        runner, sentinel = _make_runner()
+        adapter = _make_adapter()
+
+        event = _make_event(text="hey")
+        sk = build_session_key(event.source)
+
+        runner._running_agents[sk] = sentinel
+        runner._running_agents_ts[sk] = time.time()
+        runner.adapters[event.source.platform] = adapter
+
+        result = await runner._handle_active_session_busy_message(event, sk)
+        assert result is True
+        # Should still send ack
+        adapter._send_with_retry.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_no_adapter_falls_through(self):
+        """If adapter is missing, return False so default path handles it."""
+        runner, sentinel = _make_runner()
+
+        event = _make_event(text="hello")
+        sk = build_session_key(event.source)
+
+        # No adapter registered
+        runner._running_agents[sk] = MagicMock()
+
+        result = await runner._handle_active_session_busy_message(event, sk)
+        assert result is False  # not handled, let default path try

From 93fe4ead83bb72586118a91c34076ca615ae3368 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 14 Apr 2026 22:14:27 -0700
Subject: [PATCH 02/25] fix: warn on invalid context_length format in
 config.yaml (#10067)

Previously, non-integer context_length values (e.g. '256K') in
config.yaml were silently ignored, causing the agent to fall back
to 128K auto-detection with no user feedback. This was confusing
for users with custom LiteLLM endpoints expecting larger context.

Now prints a clear stderr warning and logs at WARNING level when
model.context_length or custom_providers[].models.<model>.context_length
cannot be parsed as an integer, telling users to use plain integers
(e.g. 256000 instead of '256K').

Reported by community user ChFarhan via Discord.
---
 run_agent.py                                  |  28 ++++-
 .../test_invalid_context_length_warning.py    | 111 ++++++++++++++++++
 2 files changed, 138 insertions(+), 1 deletion(-)
 create mode 100644 tests/run_agent/test_invalid_context_length_warning.py

diff --git a/run_agent.py b/run_agent.py
index 080156051..e98a6e798 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1268,6 +1268,19 @@ class AIAgent:
             try:
                 _config_context_length = int(_config_context_length)
             except (TypeError, ValueError):
+                logger.warning(
+                    "Invalid model.context_length in config.yaml: %r — "
+                    "must be a plain integer (e.g. 256000, not '256K'). "
+                    "Falling back to auto-detection.",
+                    _config_context_length,
+                )
+                import sys
+                print(
+                    f"\n⚠ Invalid model.context_length in config.yaml: {_config_context_length!r}\n"
+                    f"  Must be a plain integer (e.g. 256000, not '256K').\n"
+                    f"  Falling back to auto-detected context window.\n",
+                    file=sys.stderr,
+                )
                 _config_context_length = None
 
         # Store for reuse in switch_model (so config override persists across model switches)
@@ -1296,7 +1309,20 @@ class AIAgent:
                                 try:
                                     _config_context_length = int(_cp_ctx)
                                 except (TypeError, ValueError):
-                                    pass
+                                    logger.warning(
+                                        "Invalid context_length for model %r in "
+                                        "custom_providers: %r — must be a plain "
+                                        "integer (e.g. 256000, not '256K'). "
+                                        "Falling back to auto-detection.",
+                                        self.model, _cp_ctx,
+                                    )
+                                    import sys
+                                    print(
+                                        f"\n⚠ Invalid context_length for model {self.model!r} in custom_providers: {_cp_ctx!r}\n"
+                                        f"  Must be a plain integer (e.g. 256000, not '256K').\n"
+                                        f"  Falling back to auto-detected context window.\n",
+                                        file=sys.stderr,
+                                    )
                     break
         
         # Select context engine: config-driven (like memory providers).
diff --git a/tests/run_agent/test_invalid_context_length_warning.py b/tests/run_agent/test_invalid_context_length_warning.py
new file mode 100644
index 000000000..1ed72c951
--- /dev/null
+++ b/tests/run_agent/test_invalid_context_length_warning.py
@@ -0,0 +1,111 @@
+"""Tests that invalid context_length values in config produce visible warnings."""
+
+from unittest.mock import patch, MagicMock, call
+
+
+def _build_agent(model_cfg, custom_providers=None, model="anthropic/claude-opus-4.6"):
+    """Build an AIAgent with the given model config."""
+    cfg = {"model": model_cfg}
+    if custom_providers is not None:
+        cfg["custom_providers"] = custom_providers
+
+    with (
+        patch("hermes_cli.config.load_config", return_value=cfg),
+        patch("agent.model_metadata.get_model_context_length", return_value=128_000),
+        patch("run_agent.get_tool_definitions", return_value=[]),
+        patch("run_agent.check_toolset_requirements", return_value={}),
+        patch("run_agent.OpenAI"),
+    ):
+        from run_agent import AIAgent
+
+        agent = AIAgent(
+            model=model,
+            api_key="test-key-1234567890",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+    return agent
+
+
+def test_valid_integer_context_length_no_warning():
+    """Plain integer context_length should work silently."""
+    with patch("run_agent.logger") as mock_logger:
+        agent = _build_agent({"default": "gpt5.4", "provider": "custom",
+                              "base_url": "http://localhost:4000/v1",
+                              "context_length": 256000})
+    assert agent._config_context_length == 256000
+    # No warning about invalid context_length
+    for c in mock_logger.warning.call_args_list:
+        assert "Invalid" not in str(c)
+
+
+def test_string_k_suffix_context_length_warns():
+    """context_length: '256K' should warn the user clearly."""
+    with patch("run_agent.logger") as mock_logger:
+        agent = _build_agent({"default": "gpt5.4", "provider": "custom",
+                              "base_url": "http://localhost:4000/v1",
+                              "context_length": "256K"})
+    assert agent._config_context_length is None
+    # Should have warned
+    warning_calls = [c for c in mock_logger.warning.call_args_list
+                     if "Invalid" in str(c) and "256K" in str(c)]
+    assert len(warning_calls) == 1
+    assert "plain integer" in str(warning_calls[0])
+
+
+def test_string_numeric_context_length_works():
+    """context_length: '256000' (string) should parse fine via int()."""
+    with patch("run_agent.logger") as mock_logger:
+        agent = _build_agent({"default": "gpt5.4", "provider": "custom",
+                              "base_url": "http://localhost:4000/v1",
+                              "context_length": "256000"})
+    assert agent._config_context_length == 256000
+    for c in mock_logger.warning.call_args_list:
+        assert "Invalid" not in str(c)
+
+
+def test_custom_providers_invalid_context_length_warns():
+    """Invalid context_length in custom_providers should warn."""
+    custom_providers = [
+        {
+            "name": "LiteLLM",
+            "base_url": "http://localhost:4000/v1",
+            "models": {
+                "gpt5.4": {"context_length": "256K"}
+            },
+        }
+    ]
+    with patch("run_agent.logger") as mock_logger:
+        agent = _build_agent(
+            {"default": "gpt5.4", "provider": "custom",
+             "base_url": "http://localhost:4000/v1"},
+            custom_providers=custom_providers,
+            model="gpt5.4",
+        )
+    warning_calls = [c for c in mock_logger.warning.call_args_list
+                     if "Invalid" in str(c) and "256K" in str(c)]
+    assert len(warning_calls) == 1
+    assert "custom_providers" in str(warning_calls[0])
+
+
+def test_custom_providers_valid_context_length():
+    """Valid integer in custom_providers should work silently."""
+    custom_providers = [
+        {
+            "name": "LiteLLM",
+            "base_url": "http://localhost:4000/v1",
+            "models": {
+                "gpt5.4": {"context_length": 256000}
+            },
+        }
+    ]
+    with patch("run_agent.logger") as mock_logger:
+        agent = _build_agent(
+            {"default": "gpt5.4", "provider": "custom",
+             "base_url": "http://localhost:4000/v1"},
+            custom_providers=custom_providers,
+            model="gpt5.4",
+        )
+    for c in mock_logger.warning.call_args_list:
+        assert "Invalid" not in str(c)

From 50c35dcabe9f6f909630a44cf007ae39d2ddbaf3 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 14 Apr 2026 21:15:53 -0700
Subject: [PATCH 03/25] fix: stale agent timeout, uv venv detection, empty
 response after tools (#9051, #8620, #9400)

Three independent fixes:

1. Reset activity timestamp on cached agent reuse (#9051)
   When the gateway reuses a cached AIAgent for a new turn, the
   _last_activity_ts from the previous turn (possibly hours ago)
   carried over. The inactivity timeout handler immediately saw
   the agent as idle for hours and killed it.

   Fix: reset _last_activity_ts, _last_activity_desc, and
   _api_call_count when retrieving an agent from the cache.

2. Detect uv-managed virtual environments (#8620 sub-issue 1)
   The systemd unit generator fell back to sys.executable (uv's
   standalone Python) when running under 'uv run', because
   sys.prefix == sys.base_prefix (uv doesn't set up traditional
   venv activation). The generated ExecStart pointed to a Python
   binary without site-packages, crashing the service on startup.

   Fix: check VIRTUAL_ENV env var before falling back to
   sys.executable. uv sets VIRTUAL_ENV even when sys.prefix
   doesn't reflect the venv.

3. Nudge model to continue after empty post-tool response (#9400)
   Weaker models (GLM-5, mimo-v2-pro) sometimes return empty
   responses after tool calls instead of continuing to the next
   step. The agent silently abandoned the remaining work with
   '(empty)' or used prior-turn fallback text.

   Fix: when the model returns empty after tool calls AND there's
   no prior-turn content to fall back on, inject a one-time user
   nudge message telling the model to process the tool results and
   continue. The flag resets after each successful tool round so it
   can fire again on later rounds.

Test plan: 97 gateway + CLI tests pass, 9 venv detection tests pass
---
 gateway/run.py        |  6 ++++++
 hermes_cli/gateway.py | 13 +++++++++++-
 run_agent.py          | 47 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/gateway/run.py b/gateway/run.py
index cfb4af82e..d360d453f 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -8458,6 +8458,12 @@ class GatewayRunner:
                     cached = _cache.get(session_key)
                     if cached and cached[1] == _sig:
                         agent = cached[0]
+                        # Reset activity timestamp so the inactivity timeout
+                        # handler doesn't see stale idle time from the previous
+                        # turn and immediately kill this agent.  (#9051)
+                        agent._last_activity_ts = time.time()
+                        agent._last_activity_desc = "starting new turn (cached)"
+                        agent._api_call_count = 0
                         logger.debug("Reusing cached agent for session %s", session_key)
 
             if agent is None:
diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index 58d9f92ed..6d46bdde6 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -715,7 +715,9 @@ def _detect_venv_dir() -> Path | None:
     """Detect the active virtualenv directory.
 
     Checks ``sys.prefix`` first (works regardless of the directory name),
-    then falls back to probing common directory names under PROJECT_ROOT.
+    then ``VIRTUAL_ENV`` env var (covers uv-managed environments where
+    sys.prefix == sys.base_prefix), then falls back to probing common
+    directory names under PROJECT_ROOT.
     Returns ``None`` when no virtualenv can be found.
     """
     # If we're running inside a virtualenv, sys.prefix points to it.
@@ -724,6 +726,15 @@ def _detect_venv_dir() -> Path | None:
         if venv.is_dir():
             return venv
 
+    # uv and some other tools set VIRTUAL_ENV without changing sys.prefix.
+    # This catches `uv run` where sys.prefix == sys.base_prefix but the
+    # environment IS a venv.  (#8620)
+    _virtual_env = os.environ.get("VIRTUAL_ENV")
+    if _virtual_env:
+        venv = Path(_virtual_env)
+        if venv.is_dir():
+            return venv
+
     # Fallback: check common virtualenv directory names under the project root.
     for candidate in (".venv", "venv"):
         venv = PROJECT_ROOT / candidate
diff --git a/run_agent.py b/run_agent.py
index e98a6e798..55b4efaa6 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -7858,6 +7858,7 @@ class AIAgent:
         self._incomplete_scratchpad_retries = 0
         self._codex_incomplete_retries = 0
         self._thinking_prefill_retries = 0
+        self._post_tool_empty_retried = False
         self._last_content_with_tools = None
         self._mute_post_response = False
         self._unicode_sanitization_passes = 0
@@ -10131,6 +10132,10 @@ class AIAgent:
                     if _had_prefill:
                         self._thinking_prefill_retries = 0
                         self._empty_content_retries = 0
+                    # Successful tool execution — reset the post-tool nudge
+                    # flag so it can fire again if the model goes empty on
+                    # a LATER tool round.
+                    self._post_tool_empty_retried = False
 
                     messages.append(assistant_msg)
                     self._emit_interim_assistant_message(assistant_msg)
@@ -10299,6 +10304,48 @@ class AIAgent:
                             self._response_was_previewed = True
                             break
 
+                        # ── Post-tool-call empty response nudge ───────────
+                        # The model returned empty after executing tool calls
+                        # but there's no prior-turn content to fall back on.
+                        # Instead of giving up, nudge the model to continue by
+                        # appending a user-level hint.  This is the #9400 case:
+                        # weaker models (GLM-5, etc.) sometimes return empty
+                        # after tool results instead of continuing to the next
+                        # step.  One retry with a nudge usually fixes it.
+                        _prior_was_tool = any(
+                            m.get("role") == "tool"
+                            for m in messages[-5:]  # check recent messages
+                        )
+                        if (
+                            _prior_was_tool
+                            and not getattr(self, "_post_tool_empty_retried", False)
+                        ):
+                            self._post_tool_empty_retried = True
+                            logger.info(
+                                "Empty response after tool calls — nudging model "
+                                "to continue processing"
+                            )
+                            self._emit_status(
+                                "⚠️ Model returned empty after tool calls — "
+                                "nudging to continue"
+                            )
+                            # Append the empty assistant message first so the
+                            # message sequence stays valid:
+                            #   tool(result) → assistant("(empty)") → user(nudge)
+                            # Without this, we'd have tool → user which most
+                            # APIs reject as an invalid sequence.
+                            assistant_msg["content"] = "(empty)"
+                            messages.append(assistant_msg)
+                            messages.append({
+                                "role": "user",
+                                "content": (
+                                    "You just executed tool calls but returned an "
+                                    "empty response. Please process the tool "
+                                    "results above and continue with the task."
+                                ),
+                            })
+                            continue
+
                         # ── Thinking-only prefill continuation ──────────
                         # The model produced structured reasoning (via API
                         # fields) but no visible text content.  Rather than

From 9855190f23a2354b1c796b83bd58582946485c7d Mon Sep 17 00:00:00 2001
From: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Date: Tue, 14 Apr 2026 22:21:04 -0700
Subject: [PATCH 04/25] feat(compressor): smart collapse, dedup,
 anti-thrashing, template upgrade, hardening
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Combined salvage of PRs #9661, #9663, #9674, #9677, #9678 by kshitijk4poor.

- Smart tool output collapse: informative 1-line summaries replace generic placeholder
- Dedup identical tool results via MD5 hash, truncate large tool_call arguments
- Anti-thrashing: skip compression after 2 consecutive <10% savings passes
- Structured action-log summary template with numbered actions and Active State
- Hardening: max_tokens 1.3x cap, multimodal safety, note idempotency, adaptive cooldown

Follow-up fixes applied during salvage:
- web_extract: reads 'urls' (list) not 'url' (original PR bug)
- Multimodal list content guards in dedup and prune passes
- Kept 'Relevant Files' section in template (original PR removed it)

Skipped PRs #9665 (user msg preservation — duplication risk) and #9675 (dead code).
---
 agent/context_compressor.py | 314 +++++++++++++++++++++++++++++++-----
 1 file changed, 278 insertions(+), 36 deletions(-)

diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 4163966aa..74bbbd5d0 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -17,7 +17,10 @@ Improvements over v2:
   - Richer tool call/result detail in summarizer input
 """
 
+import hashlib
+import json
 import logging
+import re
 import time
 from typing import Any, Dict, List, Optional
 
@@ -57,6 +60,128 @@ _CHARS_PER_TOKEN = 4
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
 
 
+def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
+    """Create an informative 1-line summary of a tool call + result.
+
+    Used during the pre-compression pruning pass to replace large tool
+    outputs with a short but useful description of what the tool did,
+    rather than a generic placeholder that carries zero information.
+
+    Returns strings like::
+
+        [terminal] ran `npm test` -> exit 0, 47 lines output
+        [read_file] read config.py from line 1 (1,200 chars)
+        [search_files] content search for 'compress' in agent/ -> 12 matches
+    """
+    try:
+        args = json.loads(tool_args) if tool_args else {}
+    except (json.JSONDecodeError, TypeError):
+        args = {}
+
+    content = tool_content or ""
+    content_len = len(content)
+    line_count = content.count("\n") + 1 if content.strip() else 0
+
+    if tool_name == "terminal":
+        cmd = args.get("command", "")
+        if len(cmd) > 80:
+            cmd = cmd[:77] + "..."
+        exit_match = re.search(r'"exit_code"\s*:\s*(-?\d+)', content)
+        exit_code = exit_match.group(1) if exit_match else "?"
+        return f"[terminal] ran `{cmd}` -> exit {exit_code}, {line_count} lines output"
+
+    if tool_name == "read_file":
+        path = args.get("path", "?")
+        offset = args.get("offset", 1)
+        return f"[read_file] read {path} from line {offset} ({content_len:,} chars)"
+
+    if tool_name == "write_file":
+        path = args.get("path", "?")
+        written_lines = args.get("content", "").count("\n") + 1 if args.get("content") else "?"
+        return f"[write_file] wrote to {path} ({written_lines} lines)"
+
+    if tool_name == "search_files":
+        pattern = args.get("pattern", "?")
+        path = args.get("path", ".")
+        target = args.get("target", "content")
+        match_count = re.search(r'"total_count"\s*:\s*(\d+)', content)
+        count = match_count.group(1) if match_count else "?"
+        return f"[search_files] {target} search for '{pattern}' in {path} -> {count} matches"
+
+    if tool_name == "patch":
+        path = args.get("path", "?")
+        mode = args.get("mode", "replace")
+        return f"[patch] {mode} in {path} ({content_len:,} chars result)"
+
+    if tool_name in ("browser_navigate", "browser_click", "browser_snapshot",
+                     "browser_type", "browser_scroll", "browser_vision"):
+        url = args.get("url", "")
+        ref = args.get("ref", "")
+        detail = f" {url}" if url else (f" ref={ref}" if ref else "")
+        return f"[{tool_name}]{detail} ({content_len:,} chars)"
+
+    if tool_name == "web_search":
+        query = args.get("query", "?")
+        return f"[web_search] query='{query}' ({content_len:,} chars result)"
+
+    if tool_name == "web_extract":
+        urls = args.get("urls", [])
+        url_desc = urls[0] if isinstance(urls, list) and urls else "?"
+        if isinstance(urls, list) and len(urls) > 1:
+            url_desc += f" (+{len(urls) - 1} more)"
+        return f"[web_extract] {url_desc} ({content_len:,} chars)"
+
+    if tool_name == "delegate_task":
+        goal = args.get("goal", "")
+        if len(goal) > 60:
+            goal = goal[:57] + "..."
+        return f"[delegate_task] '{goal}' ({content_len:,} chars result)"
+
+    if tool_name == "execute_code":
+        code_preview = (args.get("code") or "")[:60].replace("\n", " ")
+        if len(args.get("code", "")) > 60:
+            code_preview += "..."
+        return f"[execute_code] `{code_preview}` ({line_count} lines output)"
+
+    if tool_name in ("skill_view", "skills_list", "skill_manage"):
+        name = args.get("name", "?")
+        return f"[{tool_name}] name={name} ({content_len:,} chars)"
+
+    if tool_name == "vision_analyze":
+        question = args.get("question", "")[:50]
+        return f"[vision_analyze] '{question}' ({content_len:,} chars)"
+
+    if tool_name == "memory":
+        action = args.get("action", "?")
+        target = args.get("target", "?")
+        return f"[memory] {action} on {target}"
+
+    if tool_name == "todo":
+        return "[todo] updated task list"
+
+    if tool_name == "clarify":
+        return "[clarify] asked user a question"
+
+    if tool_name == "text_to_speech":
+        return f"[text_to_speech] generated audio ({content_len:,} chars)"
+
+    if tool_name == "cronjob":
+        action = args.get("action", "?")
+        return f"[cronjob] {action}"
+
+    if tool_name == "process":
+        action = args.get("action", "?")
+        sid = args.get("session_id", "?")
+        return f"[process] {action} session={sid}"
+
+    # Generic fallback
+    first_arg = ""
+    for k, v in list(args.items())[:2]:
+        sv = str(v)[:40]
+        first_arg += f" {k}={sv}"
+    return f"[{tool_name}]{first_arg} ({content_len:,} chars result)"
+
+
 class ContextCompressor(ContextEngine):
     """Default context engine — compresses conversation context via lossy summarization.
 
@@ -78,6 +203,8 @@ class ContextCompressor(ContextEngine):
         self._context_probed = False
         self._context_probe_persistable = False
         self._previous_summary = None
+        self._last_compression_savings_pct = 100.0
+        self._ineffective_compression_count = 0
 
     def update_model(
         self,
@@ -167,6 +294,9 @@ class ContextCompressor(ContextEngine):
 
         # Stores the previous compaction summary for iterative updates
         self._previous_summary: Optional[str] = None
+        # Anti-thrashing: track whether last compression was effective
+        self._last_compression_savings_pct: float = 100.0
+        self._ineffective_compression_count: int = 0
         self._summary_failure_cooldown_until: float = 0.0
 
     def update_from_response(self, usage: Dict[str, Any]):
@@ -175,9 +305,26 @@ class ContextCompressor(ContextEngine):
         self.last_completion_tokens = usage.get("completion_tokens", 0)
 
     def should_compress(self, prompt_tokens: int = None) -> bool:
-        """Check if context exceeds the compression threshold."""
+        """Check if context exceeds the compression threshold.
+
+        Includes anti-thrashing protection: if the last two compressions
+        each saved less than 10%, skip compression to avoid infinite loops
+        where each pass removes only 1-2 messages.
+        """
         tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
-        return tokens >= self.threshold_tokens
+        if tokens < self.threshold_tokens:
+            return False
+        # Anti-thrashing: back off if recent compressions were ineffective
+        if self._ineffective_compression_count >= 2:
+            if not self.quiet_mode:
+                logger.warning(
+                    "Compression skipped — last %d compressions saved <10%% each. "
+                    "Consider /new to start a fresh session, or /compress <topic> "
+                    "for focused compression.",
+                    self._ineffective_compression_count,
+                )
+            return False
+        return True
 
     # ------------------------------------------------------------------
     # Tool output pruning (cheap pre-pass, no LLM call)
@@ -187,7 +334,16 @@ class ContextCompressor(ContextEngine):
         self, messages: List[Dict[str, Any]], protect_tail_count: int,
         protect_tail_tokens: int | None = None,
     ) -> tuple[List[Dict[str, Any]], int]:
-        """Replace old tool result contents with a short placeholder.
+        """Replace old tool result contents with informative 1-line summaries.
+
+        Instead of a generic placeholder, generates a summary like::
+
+            [terminal] ran `npm test` -> exit 0, 47 lines output
+            [read_file] read config.py from line 1 (3,400 chars)
+
+        Also deduplicates identical tool results (e.g. reading the same file
+        5x keeps only the newest full copy) and truncates large tool_call
+        arguments in assistant messages outside the protected tail.
 
         Walks backward from the end, protecting the most recent messages that
         fall within ``protect_tail_tokens`` (when provided) OR the last
@@ -203,6 +359,22 @@ class ContextCompressor(ContextEngine):
         result = [m.copy() for m in messages]
         pruned = 0
 
+        # Build index: tool_call_id -> (tool_name, arguments_json)
+        call_id_to_tool: Dict[str, tuple] = {}
+        for msg in result:
+            if msg.get("role") == "assistant":
+                for tc in msg.get("tool_calls") or []:
+                    if isinstance(tc, dict):
+                        cid = tc.get("id", "")
+                        fn = tc.get("function", {})
+                        call_id_to_tool[cid] = (fn.get("name", "unknown"), fn.get("arguments", ""))
+                    else:
+                        cid = getattr(tc, "id", "") or ""
+                        fn = getattr(tc, "function", None)
+                        name = getattr(fn, "name", "unknown") if fn else "unknown"
+                        args_str = getattr(fn, "arguments", "") if fn else ""
+                        call_id_to_tool[cid] = (name, args_str)
+
         # Determine the prune boundary
         if protect_tail_tokens is not None and protect_tail_tokens > 0:
             # Token-budget approach: walk backward accumulating tokens
@@ -211,7 +383,8 @@ class ContextCompressor(ContextEngine):
             min_protect = min(protect_tail_count, len(result) - 1)
             for i in range(len(result) - 1, -1, -1):
                 msg = result[i]
-                content_len = len(msg.get("content") or "")
+                raw_content = msg.get("content") or ""
+                content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
                 msg_tokens = content_len // _CHARS_PER_TOKEN + 10
                 for tc in msg.get("tool_calls") or []:
                     if isinstance(tc, dict):
@@ -226,18 +399,69 @@ class ContextCompressor(ContextEngine):
         else:
             prune_boundary = len(result) - protect_tail_count
 
+        # Pass 1: Deduplicate identical tool results.
+        # When the same file is read multiple times, keep only the most recent
+        # full copy and replace older duplicates with a back-reference.
+        content_hashes: dict = {}  # hash -> (index, tool_call_id)
+        for i in range(len(result) - 1, -1, -1):
+            msg = result[i]
+            if msg.get("role") != "tool":
+                continue
+            content = msg.get("content") or ""
+            # Skip multimodal content (list of content blocks)
+            if isinstance(content, list):
+                continue
+            if len(content) < 200:
+                continue
+            h = hashlib.md5(content.encode("utf-8", errors="replace")).hexdigest()[:12]
+            if h in content_hashes:
+                # This is an older duplicate — replace with back-reference
+                result[i] = {**msg, "content": "[Duplicate tool output — same content as a more recent call]"}
+                pruned += 1
+            else:
+                content_hashes[h] = (i, msg.get("tool_call_id", "?"))
+
+        # Pass 2: Replace old tool results with informative summaries
         for i in range(prune_boundary):
             msg = result[i]
             if msg.get("role") != "tool":
                 continue
             content = msg.get("content", "")
+            # Skip multimodal content (list of content blocks)
+            if isinstance(content, list):
+                continue
             if not content or content == _PRUNED_TOOL_PLACEHOLDER:
                 continue
+            # Skip already-deduplicated or previously-summarized results
+            if content.startswith("[Duplicate tool output"):
+                continue
             # Only prune if the content is substantial (>200 chars)
             if len(content) > 200:
-                result[i] = {**msg, "content": _PRUNED_TOOL_PLACEHOLDER}
+                call_id = msg.get("tool_call_id", "")
+                tool_name, tool_args = call_id_to_tool.get(call_id, ("unknown", ""))
+                summary = _summarize_tool_result(tool_name, tool_args, content)
+                result[i] = {**msg, "content": summary}
                 pruned += 1
 
+        # Pass 3: Truncate large tool_call arguments in assistant messages
+        # outside the protected tail. write_file with 50KB content, for
+        # example, survives pruning entirely without this.
+        for i in range(prune_boundary):
+            msg = result[i]
+            if msg.get("role") != "assistant" or not msg.get("tool_calls"):
+                continue
+            new_tcs = []
+            modified = False
+            for tc in msg["tool_calls"]:
+                if isinstance(tc, dict):
+                    args = tc.get("function", {}).get("arguments", "")
+                    if len(args) > 500:
+                        tc = {**tc, "function": {**tc["function"], "arguments": args[:200] + "...[truncated]"}}
+                        modified = True
+                new_tcs.append(tc)
+            if modified:
+                result[i] = {**msg, "tool_calls": new_tcs}
+
         return result, pruned
 
     # ------------------------------------------------------------------
@@ -357,29 +581,37 @@ class ContextCompressor(ContextEngine):
         )
 
         # Shared structured template (used by both paths).
-        # Key changes vs v1:
-        #   - "Pending User Asks" section (from Claude Code) explicitly tracks
-        #     unanswered questions so the model knows what's resolved vs open
-        #   - "Remaining Work" replaces "Next Steps" to avoid reading as active
-        #     instructions
-        #   - "Resolved Questions" makes it clear which questions were already
-        #     answered (prevents model from re-answering them)
         _template_sections = f"""## Goal
 [What the user is trying to accomplish]
 
 ## Constraints & Preferences
 [User preferences, coding style, constraints, important decisions]
 
-## Progress
-### Done
-[Completed work — include specific file paths, commands run, results obtained]
-### In Progress
-[Work currently underway]
-### Blocked
-[Any blockers or issues encountered]
+## Completed Actions
+[Numbered list of concrete actions taken — include tool used, target, and outcome.
+Format each as: N. ACTION target — outcome [tool: name]
+Example:
+1. READ config.py:45 — found `==` should be `!=` [tool: read_file]
+2. PATCH config.py:45 — changed `==` to `!=` [tool: patch]
+3. TEST `pytest tests/` — 3/50 failed: test_parse, test_validate, test_edge [tool: terminal]
+Be specific with file paths, commands, line numbers, and results.]
+
+## Active State
+[Current working state — include:
+- Working directory and branch (if applicable)
+- Modified/created files with brief note on each
+- Test status (X/Y passing)
+- Any running processes or servers
+- Environment details that matter]
+
+## In Progress
+[Work currently underway — what was being done when compaction fired]
+
+## Blocked
+[Any blockers, errors, or issues not yet resolved. Include exact error messages.]
 
 ## Key Decisions
-[Important technical decisions and why they were made]
+[Important technical decisions and WHY they were made]
 
 ## Resolved Questions
 [Questions the user asked that were ALREADY answered — include the answer so the next assistant does not re-answer them]
@@ -396,10 +628,7 @@ class ContextCompressor(ContextEngine):
 ## Critical Context
 [Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
 
-## Tools & Patterns
-[Which tools were used, how they were used effectively, and any tool-specific discoveries]
-
-Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions.
+Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.
 
 Write only the summary body. Do not include any preamble or prefix."""
 
@@ -415,7 +644,7 @@ PREVIOUS SUMMARY:
 NEW TURNS TO INCORPORATE:
 {content_to_summarize}
 
-Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new progress. Move items from "In Progress" to "Done" when completed. Move answered questions to "Resolved Questions". Remove information only if it is clearly obsolete.
+Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete.
 
 {_template_sections}"""
         else:
@@ -450,7 +679,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                     "api_mode": self.api_mode,
                 },
                 "messages": [{"role": "user", "content": prompt}],
-                "max_tokens": summary_budget * 2,
+                "max_tokens": int(summary_budget * 1.3),
                 # timeout resolved from auxiliary.compression.timeout config by call_llm
             }
             if self.summary_model:
@@ -466,6 +695,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
             self._summary_failure_cooldown_until = 0.0
             return self._with_summary_prefix(summary)
         except RuntimeError:
+            # No provider configured — long cooldown, unlikely to self-resolve
             self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
             logging.warning("Context compression: no provider available for "
                             "summary. Middle turns will be dropped without summary "
@@ -473,12 +703,14 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                             _SUMMARY_FAILURE_COOLDOWN_SECONDS)
             return None
         except Exception as e:
-            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
+            # Transient errors (timeout, rate limit, network) — shorter cooldown
+            _transient_cooldown = 60
+            self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
             logging.warning(
                 "Failed to generate context summary: %s. "
                 "Further summary attempts paused for %d seconds.",
                 e,
-                _SUMMARY_FAILURE_COOLDOWN_SECONDS,
+                _transient_cooldown,
             )
             return None
 
@@ -744,11 +976,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
         compressed = []
         for i in range(compress_start):
             msg = messages[i].copy()
-            if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
-                msg["content"] = (
-                    (msg.get("content") or "")
-                    + "\n\n[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
-                )
+            if i == 0 and msg.get("role") == "system":
+                existing = msg.get("content") or ""
+                _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
+                if _compression_note not in existing:
+                    msg["content"] = existing + "\n\n" + _compression_note
             compressed.append(msg)
 
         # If LLM summary failed, insert a static fallback so the model
@@ -806,14 +1038,24 @@ The user has requested that this compaction PRIORITISE preserving all informatio
 
         compressed = self._sanitize_tool_pairs(compressed)
 
+        new_estimate = estimate_messages_tokens_rough(compressed)
+        saved_estimate = display_tokens - new_estimate
+
+        # Anti-thrashing: track compression effectiveness
+        savings_pct = (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0
+        self._last_compression_savings_pct = savings_pct
+        if savings_pct < 10:
+            self._ineffective_compression_count += 1
+        else:
+            self._ineffective_compression_count = 0
+
         if not self.quiet_mode:
-            new_estimate = estimate_messages_tokens_rough(compressed)
-            saved_estimate = display_tokens - new_estimate
             logger.info(
-                "Compressed: %d -> %d messages (~%d tokens saved)",
+                "Compressed: %d -> %d messages (~%d tokens saved, %.0f%%)",
                 n_messages,
                 len(compressed),
                 saved_estimate,
+                savings_pct,
             )
             logger.info("Compression #%d complete", self.compression_count)
 

From 5d5d21556e129ecab93b38bfe0a5b776249cf5f0 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 14 Apr 2026 22:37:45 -0700
Subject: [PATCH 05/25] fix: sync client.api_key during UnicodeEncodeError
 ASCII recovery (#10090)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The existing recovery block sanitized self.api_key and
self._client_kwargs['api_key'] but did not update self.client.api_key.
The OpenAI SDK stores its own copy of api_key and reads it dynamically
via the auth_headers property on every request. Without this fix, the
retry after sanitization would still send the corrupted key in the
Authorization header, causing the same UnicodeEncodeError.

The bug manifests when an API key contains Unicode lookalike characters
(e.g. ʋ U+028B instead of v) from copy-pasting out of PDFs, rich-text
editors, or web pages with decorative fonts. httpx hard-encodes all
HTTP headers as ASCII, so the non-ASCII char in the Authorization
header triggers the error.

Adds TestApiKeyClientSync with two tests verifying:
- All three key locations are synced after sanitization
- Recovery handles client=None (pre-init) without crashing
---
 run_agent.py                                |  5 ++
 tests/run_agent/test_unicode_ascii_codec.py | 64 +++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/run_agent.py b/run_agent.py
index 55b4efaa6..48382389e 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -9037,6 +9037,11 @@ class AIAgent:
                                     self.api_key = _clean_key
                                     if isinstance(getattr(self, "_client_kwargs", None), dict):
                                         self._client_kwargs["api_key"] = _clean_key
+                                    # Also update the live client — it holds its
+                                    # own copy of api_key which auth_headers reads
+                                    # dynamically on every request.
+                                    if getattr(self, "client", None) is not None and hasattr(self.client, "api_key"):
+                                        self.client.api_key = _clean_key
                                     _credential_sanitized = True
                                     self._vprint(
                                         f"{self.log_prefix}⚠️  API key contained non-ASCII characters "
diff --git a/tests/run_agent/test_unicode_ascii_codec.py b/tests/run_agent/test_unicode_ascii_codec.py
index ef4f3f339..a8a52c34a 100644
--- a/tests/run_agent/test_unicode_ascii_codec.py
+++ b/tests/run_agent/test_unicode_ascii_codec.py
@@ -230,3 +230,67 @@ class TestSanitizeStructureNonAscii:
         assert _sanitize_structure_non_ascii(payload) is True
         assert payload["default_headers"]["X-Title"] == "Hermes  Agent"
         assert payload["default_headers"]["User-Agent"] == "Hermes/1.0 "
+
+
+class TestApiKeyClientSync:
+    """Verify that ASCII recovery updates the live OpenAI client's api_key.
+
+    The OpenAI SDK stores its own copy of api_key which auth_headers reads
+    dynamically.  If only self.api_key is updated but self.client.api_key
+    is not, the next request still sends the corrupted key in the
+    Authorization header.
+    """
+
+    def test_client_api_key_updated_on_sanitize(self):
+        """Simulate the recovery path and verify client.api_key is synced."""
+        from unittest.mock import MagicMock
+        from run_agent import AIAgent
+
+        agent = AIAgent.__new__(AIAgent)
+        bad_key = "sk-proj-abc\u028bdef"  # ʋ lookalike at position 11
+        agent.api_key = bad_key
+        agent._client_kwargs = {"api_key": bad_key}
+        agent.quiet_mode = True
+
+        # Mock client with its own api_key attribute (like the real OpenAI client)
+        mock_client = MagicMock()
+        mock_client.api_key = bad_key
+        agent.client = mock_client
+
+        # --- replicate the recovery logic from run_agent.py ---
+        _raw_key = agent.api_key
+        _clean_key = _strip_non_ascii(_raw_key)
+        assert _clean_key != _raw_key, "test precondition: key should have non-ASCII"
+
+        agent.api_key = _clean_key
+        agent._client_kwargs["api_key"] = _clean_key
+        if getattr(agent, "client", None) is not None and hasattr(agent.client, "api_key"):
+            agent.client.api_key = _clean_key
+
+        # All three locations should now hold the clean key
+        assert agent.api_key == "sk-proj-abcdef"
+        assert agent._client_kwargs["api_key"] == "sk-proj-abcdef"
+        assert agent.client.api_key == "sk-proj-abcdef"
+        # The bad char should be gone from all of them
+        assert "\u028b" not in agent.api_key
+        assert "\u028b" not in agent._client_kwargs["api_key"]
+        assert "\u028b" not in agent.client.api_key
+
+    def test_client_none_does_not_crash(self):
+        """Recovery should not crash when client is None (pre-init)."""
+        from run_agent import AIAgent
+
+        agent = AIAgent.__new__(AIAgent)
+        bad_key = "sk-proj-\u028b"
+        agent.api_key = bad_key
+        agent._client_kwargs = {"api_key": bad_key}
+        agent.client = None
+
+        _clean_key = _strip_non_ascii(bad_key)
+        agent.api_key = _clean_key
+        agent._client_kwargs["api_key"] = _clean_key
+        if getattr(agent, "client", None) is not None and hasattr(agent.client, "api_key"):
+            agent.client.api_key = _clean_key
+
+        assert agent.api_key == "sk-proj-"
+        assert agent.client is None  # should not have been touched

From 772cfb6c4ec7770759b4e0c8552b934fe9ff897d Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 14 Apr 2026 22:38:17 -0700
Subject: [PATCH 06/25] fix: stale agent timeout, uv venv detection, empty
 response after tools, compression model fallback (#9051, #8620, #9400)
 (#10093)

Four independent fixes:

1. Reset activity timestamp on cached agent reuse (#9051)
   When the gateway reuses a cached AIAgent for a new turn, the
   _last_activity_ts from the previous turn (possibly hours ago)
   carried over. The inactivity timeout handler immediately saw
   the agent as idle for hours and killed it.

   Fix: reset _last_activity_ts, _last_activity_desc, and
   _api_call_count when retrieving an agent from the cache.

2. Detect uv-managed virtual environments (#8620 sub-issue 1)
   The systemd unit generator fell back to sys.executable (uv's
   standalone Python) when running under 'uv run', because
   sys.prefix == sys.base_prefix. The generated ExecStart pointed
   to a Python binary without site-packages.

   Fix: check VIRTUAL_ENV env var before falling back to
   sys.executable. uv sets VIRTUAL_ENV even when sys.prefix
   doesn't reflect the venv.

3. Nudge model to continue after empty post-tool response (#9400)
   Weaker models sometimes return empty after tool calls. The agent
   silently abandoned the remaining work.

   Fix: append assistant('(empty)') + user nudge message and retry
   once. Resets after each successful tool round.

4. Compression model fallback on permanent errors (#8620 sub-issue 4)
   When the default summary model (gemini-3-flash) returns 503
   'model_not_found' on custom proxies, the compressor entered a
   600s cooldown, leaving context growing unbounded.

   Fix: detect permanent model-not-found errors (503, 404,
   'model_not_found', 'no available channel') and fall back to
   using the main model for compression instead of entering
   cooldown. One-time fallback with immediate retry.

Test plan: 40 compressor tests + 97 gateway/CLI tests + 9 venv tests pass
---
 agent/context_compressor.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 74bbbd5d0..ac5db7762 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -693,6 +693,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
             # Store for iterative updates on next compaction
             self._previous_summary = summary
             self._summary_failure_cooldown_until = 0.0
+            self._summary_model_fallen_back = False
             return self._with_summary_prefix(summary)
         except RuntimeError:
             # No provider configured — long cooldown, unlikely to self-resolve
@@ -703,6 +704,34 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                             _SUMMARY_FAILURE_COOLDOWN_SECONDS)
             return None
         except Exception as e:
+            # If the summary model is different from the main model and the
+            # error looks permanent (model not found, 503, 404), fall back to
+            # using the main model instead of entering cooldown that leaves
+            # context growing unbounded.  (#8620 sub-issue 4)
+            _status = getattr(e, "status_code", None) or getattr(getattr(e, "response", None), "status_code", None)
+            _err_str = str(e).lower()
+            _is_model_not_found = (
+                _status in (404, 503)
+                or "model_not_found" in _err_str
+                or "does not exist" in _err_str
+                or "no available channel" in _err_str
+            )
+            if (
+                _is_model_not_found
+                and self.summary_model
+                and self.summary_model != self.model
+                and not getattr(self, "_summary_model_fallen_back", False)
+            ):
+                self._summary_model_fallen_back = True
+                logging.warning(
+                    "Summary model '%s' not available (%s). "
+                    "Falling back to main model '%s' for compression.",
+                    self.summary_model, e, self.model,
+                )
+                self.summary_model = ""  # empty = use main model
+                self._summary_failure_cooldown_until = 0.0  # no cooldown
+                return self._generate_summary(messages, summary_budget)  # retry immediately
+
             # Transient errors (timeout, rate limit, network) — shorter cooldown
             _transient_cooldown = 60
             self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown

From 029938fbed2e74ea818cd65df454219c91370c60 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 14 Apr 2026 23:13:02 -0700
Subject: [PATCH 07/25] fix(cli): defensive subparser routing for argparse
 bpo-9338 (#10113)

On some Python versions, argparse fails to route subcommand tokens when
the parent parser has nargs='?' optional arguments (--continue).  The
symptom: 'hermes model' produces 'unrecognized arguments: model' even
though 'model' is a registered subcommand.

Fix: when argv contains a token matching a known subcommand, set
subparsers.required=True to force deterministic routing.  If that fails
(e.g. 'hermes -c model' where 'model' is consumed as the session name
for --continue), fall back to the default optional-subparsers behaviour.

Adds 13 tests covering all key argument combinations.

Reported via user screenshot showing the exact error on an installed
version with the model subcommand listed in usage but rejected at parse
time.
---
 hermes_cli/main.py                            |  32 +++-
 .../test_subparser_routing_fallback.py        | 148 ++++++++++++++++++
 2 files changed, 179 insertions(+), 1 deletion(-)
 create mode 100644 tests/hermes_cli/test_subparser_routing_fallback.py

diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 017280184..b45c9abb8 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -6046,7 +6046,37 @@ Examples:
         sys.exit(1)
 
     _processed_argv = _coalesce_session_name_args(sys.argv[1:])
-    args = parser.parse_args(_processed_argv)
+
+    # ── Defensive subparser routing (bpo-9338 workaround) ───────────
+    # On some Python versions (notably <3.11), argparse fails to route
+    # subcommand tokens when the parent parser has nargs='?' optional
+    # arguments (--continue).  The symptom: "unrecognized arguments: model"
+    # even though 'model' is a registered subcommand.
+    #
+    # Fix: when argv contains a token matching a known subcommand, set
+    # subparsers.required=True to force deterministic routing.  If that
+    # fails (e.g. 'hermes -c model' where 'model' is consumed as the
+    # session name for --continue), fall back to the default behaviour.
+    import io as _io
+    _known_cmds = set(subparsers.choices.keys()) if hasattr(subparsers, "choices") else set()
+    _has_cmd_token = any(t in _known_cmds for t in _processed_argv if not t.startswith("-"))
+
+    if _has_cmd_token:
+        subparsers.required = True
+        _saved_stderr = sys.stderr
+        try:
+            sys.stderr = _io.StringIO()
+            args = parser.parse_args(_processed_argv)
+            sys.stderr = _saved_stderr
+        except SystemExit:
+            sys.stderr = _saved_stderr
+            # Subcommand name was consumed as a flag value (e.g. -c model).
+            # Fall back to optional subparsers so argparse handles it normally.
+            subparsers.required = False
+            args = parser.parse_args(_processed_argv)
+    else:
+        subparsers.required = False
+        args = parser.parse_args(_processed_argv)
 
     # Handle --version flag
     if args.version:
diff --git a/tests/hermes_cli/test_subparser_routing_fallback.py b/tests/hermes_cli/test_subparser_routing_fallback.py
new file mode 100644
index 000000000..ba907ca12
--- /dev/null
+++ b/tests/hermes_cli/test_subparser_routing_fallback.py
@@ -0,0 +1,148 @@
+"""Tests for the defensive subparser routing workaround (bpo-9338).
+
+The main() function in hermes_cli/main.py sets subparsers.required=True
+when argv contains a known subcommand name.  This forces deterministic
+routing on Python versions where argparse fails to match subcommand tokens
+when the parent parser has nargs='?' optional arguments (--continue).
+
+If the subcommand token is consumed as a flag value (e.g. `hermes -c model`
+to resume a session named 'model'), the required=True parse raises
+SystemExit and the code falls back to the default required=False behaviour.
+"""
+import argparse
+import io
+import sys
+
+import pytest
+
+
+def _build_parser():
+    """Build a minimal replica of the hermes top-level parser."""
+    parser = argparse.ArgumentParser(prog="hermes")
+    parser.add_argument("--version", "-V", action="store_true")
+    parser.add_argument("--resume", "-r", metavar="SESSION", default=None)
+    parser.add_argument(
+        "--continue", "-c",
+        dest="continue_last",
+        nargs="?",
+        const=True,
+        default=None,
+        metavar="SESSION_NAME",
+    )
+    parser.add_argument("--worktree", "-w", action="store_true", default=False)
+    parser.add_argument("--skills", "-s", action="append", default=None)
+    parser.add_argument("--yolo", action="store_true", default=False)
+    parser.add_argument("--pass-session-id", action="store_true", default=False)
+
+    subparsers = parser.add_subparsers(dest="command", help="Command to run")
+    chat_p = subparsers.add_parser("chat")
+    chat_p.add_argument("-q", "--query", default=None)
+    subparsers.add_parser("model")
+    subparsers.add_parser("gateway")
+    subparsers.add_parser("setup")
+    return parser, subparsers
+
+
+def _safe_parse(parser, subparsers, argv):
+    """Replica of the defensive parsing logic from main()."""
+    known_cmds = set(subparsers.choices.keys()) if hasattr(subparsers, "choices") else set()
+    has_cmd_token = any(t in known_cmds for t in argv if not t.startswith("-"))
+
+    if has_cmd_token:
+        subparsers.required = True
+        saved_stderr = sys.stderr
+        try:
+            sys.stderr = io.StringIO()
+            args = parser.parse_args(argv)
+            sys.stderr = saved_stderr
+            return args
+        except SystemExit:
+            sys.stderr = saved_stderr
+            subparsers.required = False
+            return parser.parse_args(argv)
+    else:
+        subparsers.required = False
+        return parser.parse_args(argv)
+
+
+class TestSubparserRoutingFallback:
+    """Verify the bpo-9338 defensive routing works for all key cases."""
+
+    def test_direct_subcommand(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["model"])
+        assert args.command == "model"
+
+    def test_subcommand_with_flags(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["--yolo", "model"])
+        assert args.command == "model"
+        assert args.yolo is True
+
+    def test_bare_hermes_defaults_to_none(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, [])
+        assert args.command is None
+
+    def test_flags_only_defaults_to_none(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["--yolo"])
+        assert args.command is None
+        assert args.yolo is True
+
+    def test_continue_flag_alone(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-c"])
+        assert args.command is None
+        assert args.continue_last is True
+
+    def test_continue_with_session_name(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-c", "myproject"])
+        assert args.command is None
+        assert args.continue_last == "myproject"
+
+    def test_continue_with_subcommand_name_as_session(self):
+        """Edge case: session named 'model' — should be treated as session name, not subcommand."""
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-c", "model"])
+        assert args.command is None
+        assert args.continue_last == "model"
+
+    def test_continue_with_session_then_subcommand(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-c", "myproject", "model"])
+        assert args.command == "model"
+        assert args.continue_last == "myproject"
+
+    def test_chat_with_query(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["chat", "-q", "hello"])
+        assert args.command == "chat"
+        assert args.query == "hello"
+
+    def test_resume_flag(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-r", "abc123"])
+        assert args.command is None
+        assert args.resume == "abc123"
+
+    def test_resume_with_subcommand(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-r", "abc123", "chat"])
+        assert args.command == "chat"
+        assert args.resume == "abc123"
+
+    def test_skills_flag_with_subcommand(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-s", "myskill", "chat"])
+        assert args.command == "chat"
+        assert args.skills == ["myskill"]
+
+    def test_all_flags_with_subcommand(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["--yolo", "-w", "-s", "myskill", "model"])
+        assert args.command == "model"
+        assert args.yolo is True
+        assert args.worktree is True
+        assert args.skills == ["myskill"]

From 9932366f3cac1b85eb1dd8a70ca32fffdc973512 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 14 Apr 2026 23:09:44 -0700
Subject: [PATCH 08/25] feat(doctor): add Command Installation check for hermes
 bin symlink

hermes doctor now checks whether the ~/.local/bin/hermes symlink exists
and points to the correct venv entry point. With --fix, it creates or
repairs the symlink automatically.

Covers:
- Missing symlink at ~/.local/bin/hermes (or $PREFIX/bin on Termux)
- Symlink pointing to wrong target
- Missing venv entry point (venv/bin/hermes or .venv/bin/hermes)
- PATH warning when ~/.local/bin is not on PATH
- Skipped on Windows (different mechanism)

Addresses user report: 'python -m hermes_cli.main doesn't have an option
to fix the local bin/install'

10 new tests covering all scenarios.
---
 hermes_cli/doctor.py                          |  83 +++++-
 .../hermes_cli/test_doctor_command_install.py | 275 ++++++++++++++++++
 2 files changed, 357 insertions(+), 1 deletion(-)
 create mode 100644 tests/hermes_cli/test_doctor_command_install.py

diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 892ff0021..b89a80409 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -8,6 +8,7 @@ import os
 import sys
 import subprocess
 import shutil
+from pathlib import Path
 
 from hermes_cli.config import get_project_root, get_hermes_home, get_env_path
 from hermes_constants import display_hermes_home
@@ -513,7 +514,87 @@ def run_doctor(args):
             pass
 
     _check_gateway_service_linger(issues)
-    
+
+    # =========================================================================
+    # Check: Command installation (hermes bin symlink)
+    # =========================================================================
+    if sys.platform != "win32":
+        print()
+        print(color("◆ Command Installation", Colors.CYAN, Colors.BOLD))
+
+        # Determine the venv entry point location
+        _venv_bin = None
+        for _venv_name in ("venv", ".venv"):
+            _candidate = PROJECT_ROOT / _venv_name / "bin" / "hermes"
+            if _candidate.exists():
+                _venv_bin = _candidate
+                break
+
+        # Determine the expected command link directory (mirrors install.sh logic)
+        _prefix = os.environ.get("PREFIX", "")
+        _is_termux_env = bool(os.environ.get("TERMUX_VERSION")) or "com.termux/files/usr" in _prefix
+        if _is_termux_env and _prefix:
+            _cmd_link_dir = Path(_prefix) / "bin"
+            _cmd_link_display = "$PREFIX/bin"
+        else:
+            _cmd_link_dir = Path.home() / ".local" / "bin"
+            _cmd_link_display = "~/.local/bin"
+        _cmd_link = _cmd_link_dir / "hermes"
+
+        if _venv_bin is None:
+            check_warn(
+                "Venv entry point not found",
+                "(hermes not in venv/bin/ or .venv/bin/ — reinstall with pip install -e '.[all]')"
+            )
+            manual_issues.append(
+                f"Reinstall entry point: cd {PROJECT_ROOT} && source venv/bin/activate && pip install -e '.[all]'"
+            )
+        else:
+            check_ok(f"Venv entry point exists ({_venv_bin.relative_to(PROJECT_ROOT)})")
+
+            # Check the symlink at the command link location
+            if _cmd_link.is_symlink():
+                _target = _cmd_link.resolve()
+                _expected = _venv_bin.resolve()
+                if _target == _expected:
+                    check_ok(f"{_cmd_link_display}/hermes → correct target")
+                else:
+                    check_warn(
+                        f"{_cmd_link_display}/hermes points to wrong target",
+                        f"(→ {_target}, expected → {_expected})"
+                    )
+                    if should_fix:
+                        _cmd_link.unlink()
+                        _cmd_link.symlink_to(_venv_bin)
+                        check_ok(f"Fixed symlink: {_cmd_link_display}/hermes → {_venv_bin}")
+                        fixed_count += 1
+                    else:
+                        issues.append(f"Broken symlink at {_cmd_link_display}/hermes — run 'hermes doctor --fix'")
+            elif _cmd_link.exists():
+                # It's a regular file, not a symlink — possibly a wrapper script
+                check_ok(f"{_cmd_link_display}/hermes exists (non-symlink)")
+            else:
+                check_fail(
+                    f"{_cmd_link_display}/hermes not found",
+                    "(hermes command may not work outside the venv)"
+                )
+                if should_fix:
+                    _cmd_link_dir.mkdir(parents=True, exist_ok=True)
+                    _cmd_link.symlink_to(_venv_bin)
+                    check_ok(f"Created symlink: {_cmd_link_display}/hermes → {_venv_bin}")
+                    fixed_count += 1
+
+                    # Check if the link dir is on PATH
+                    _path_dirs = os.environ.get("PATH", "").split(os.pathsep)
+                    if str(_cmd_link_dir) not in _path_dirs:
+                        check_warn(
+                            f"{_cmd_link_display} is not on your PATH",
+                            "(add it to your shell config: export PATH=\"$HOME/.local/bin:$PATH\")"
+                        )
+                        manual_issues.append(f"Add {_cmd_link_display} to your PATH")
+                else:
+                    issues.append(f"Missing {_cmd_link_display}/hermes symlink — run 'hermes doctor --fix'")
+
     # =========================================================================
     # Check: External tools
     # =========================================================================
diff --git a/tests/hermes_cli/test_doctor_command_install.py b/tests/hermes_cli/test_doctor_command_install.py
new file mode 100644
index 000000000..8b046b9c2
--- /dev/null
+++ b/tests/hermes_cli/test_doctor_command_install.py
@@ -0,0 +1,275 @@
+"""Tests for the Command Installation check in hermes doctor."""
+
+import os
+import sys
+import types
+from argparse import Namespace
+from pathlib import Path
+
+import pytest
+
+import hermes_cli.doctor as doctor_mod
+
+
+def _setup_doctor_env(monkeypatch, tmp_path, venv_name="venv"):
+    """Create a minimal HERMES_HOME + PROJECT_ROOT for doctor tests."""
+    home = tmp_path / ".hermes"
+    home.mkdir(parents=True, exist_ok=True)
+    (home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
+
+    project = tmp_path / "project"
+    project.mkdir(exist_ok=True)
+
+    # Create a fake venv entry point
+    venv_bin_dir = project / venv_name / "bin"
+    venv_bin_dir.mkdir(parents=True, exist_ok=True)
+    hermes_bin = venv_bin_dir / "hermes"
+    hermes_bin.write_text("#!/usr/bin/env python\n# entry point\n")
+    hermes_bin.chmod(0o755)
+
+    monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
+    monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
+    monkeypatch.setattr(doctor_mod, "_DHH", str(home))
+
+    # Stub model_tools so doctor doesn't fail on import
+    fake_model_tools = types.SimpleNamespace(
+        check_tool_availability=lambda *a, **kw: ([], []),
+        TOOLSET_REQUIREMENTS={},
+    )
+    monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
+
+    # Stub auth checks
+    try:
+        from hermes_cli import auth as _auth_mod
+        monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
+        monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
+    except Exception:
+        pass
+
+    # Stub httpx.get to avoid network calls
+    try:
+        import httpx
+        monkeypatch.setattr(httpx, "get", lambda *a, **kw: types.SimpleNamespace(status_code=200))
+    except Exception:
+        pass
+
+    return home, project, hermes_bin
+
+
+def _run_doctor(fix=False):
+    """Run doctor and capture stdout."""
+    import io
+    import contextlib
+
+    buf = io.StringIO()
+    with contextlib.redirect_stdout(buf):
+        doctor_mod.run_doctor(Namespace(fix=fix))
+    return buf.getvalue()
+
+
+class TestDoctorCommandInstallation:
+    """Tests for the ◆ Command Installation section."""
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_correct_symlink_shows_ok(self, monkeypatch, tmp_path):
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        # Create the command link dir with correct symlink
+        cmd_link_dir = tmp_path / ".local" / "bin"
+        cmd_link_dir.mkdir(parents=True)
+        cmd_link = cmd_link_dir / "hermes"
+        cmd_link.symlink_to(hermes_bin)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=False)
+        assert "Command Installation" in out
+        assert "Venv entry point exists" in out
+        assert "correct target" in out
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_missing_symlink_shows_fail(self, monkeypatch, tmp_path):
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+        # Don't create the symlink — it should be missing
+
+        out = _run_doctor(fix=False)
+        assert "Command Installation" in out
+        assert "Venv entry point exists" in out
+        assert "not found" in out
+        assert "hermes doctor --fix" in out
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_fix_creates_missing_symlink(self, monkeypatch, tmp_path):
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=True)
+        assert "Command Installation" in out
+        assert "Created symlink" in out
+
+        # Verify the symlink was actually created
+        cmd_link = tmp_path / ".local" / "bin" / "hermes"
+        assert cmd_link.is_symlink()
+        assert cmd_link.resolve() == hermes_bin.resolve()
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_wrong_target_symlink_shows_warn(self, monkeypatch, tmp_path):
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        # Create a symlink pointing to the wrong target
+        cmd_link_dir = tmp_path / ".local" / "bin"
+        cmd_link_dir.mkdir(parents=True)
+        cmd_link = cmd_link_dir / "hermes"
+        wrong_target = tmp_path / "wrong_hermes"
+        wrong_target.write_text("#!/usr/bin/env python\n")
+        cmd_link.symlink_to(wrong_target)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=False)
+        assert "Command Installation" in out
+        assert "wrong target" in out
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_fix_repairs_wrong_symlink(self, monkeypatch, tmp_path):
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        # Create a symlink pointing to wrong target
+        cmd_link_dir = tmp_path / ".local" / "bin"
+        cmd_link_dir.mkdir(parents=True)
+        cmd_link = cmd_link_dir / "hermes"
+        wrong_target = tmp_path / "wrong_hermes"
+        wrong_target.write_text("#!/usr/bin/env python\n")
+        cmd_link.symlink_to(wrong_target)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=True)
+        assert "Fixed symlink" in out
+
+        # Verify the symlink now points to the correct target
+        assert cmd_link.is_symlink()
+        assert cmd_link.resolve() == hermes_bin.resolve()
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_missing_venv_entry_point_shows_warn(self, monkeypatch, tmp_path):
+        home = tmp_path / ".hermes"
+        home.mkdir(parents=True, exist_ok=True)
+        (home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
+
+        project = tmp_path / "project"
+        project.mkdir(exist_ok=True)
+        # Do NOT create any venv entry point
+
+        monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
+        monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
+        monkeypatch.setattr(doctor_mod, "_DHH", str(home))
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        fake_model_tools = types.SimpleNamespace(
+            check_tool_availability=lambda *a, **kw: ([], []),
+            TOOLSET_REQUIREMENTS={},
+        )
+        monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
+        try:
+            from hermes_cli import auth as _auth_mod
+            monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
+            monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
+        except Exception:
+            pass
+        try:
+            import httpx
+            monkeypatch.setattr(httpx, "get", lambda *a, **kw: types.SimpleNamespace(status_code=200))
+        except Exception:
+            pass
+
+        out = _run_doctor(fix=False)
+        assert "Command Installation" in out
+        assert "Venv entry point not found" in out
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_dot_venv_dir_is_found(self, monkeypatch, tmp_path):
+        """The check finds entry points in .venv/ as well as venv/."""
+        home, project, _ = _setup_doctor_env(monkeypatch, tmp_path, venv_name=".venv")
+
+        # Create the command link with correct symlink
+        hermes_bin = project / ".venv" / "bin" / "hermes"
+        cmd_link_dir = tmp_path / ".local" / "bin"
+        cmd_link_dir.mkdir(parents=True)
+        cmd_link = cmd_link_dir / "hermes"
+        cmd_link.symlink_to(hermes_bin)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=False)
+        assert "Venv entry point exists" in out
+        assert ".venv/bin/hermes" in out
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_non_symlink_regular_file_shows_ok(self, monkeypatch, tmp_path):
+        """If ~/.local/bin/hermes is a regular file (not symlink), accept it."""
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        cmd_link_dir = tmp_path / ".local" / "bin"
+        cmd_link_dir.mkdir(parents=True)
+        cmd_link = cmd_link_dir / "hermes"
+        cmd_link.write_text("#!/bin/sh\nexec python -m hermes_cli.main \"$@\"\n")
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=False)
+        assert "non-symlink" in out
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_termux_uses_prefix_bin(self, monkeypatch, tmp_path):
+        """On Termux, the command link dir is $PREFIX/bin."""
+        prefix_dir = tmp_path / "termux_prefix"
+        prefix_bin = prefix_dir / "bin"
+        prefix_bin.mkdir(parents=True)
+
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        monkeypatch.setenv("TERMUX_VERSION", "0.118.3")
+        monkeypatch.setenv("PREFIX", str(prefix_dir))
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=False)
+        assert "Command Installation" in out
+        assert "$PREFIX/bin" in out
+
+    def test_windows_skips_check(self, monkeypatch, tmp_path):
+        """On Windows, the Command Installation section is skipped."""
+        home = tmp_path / ".hermes"
+        home.mkdir(parents=True, exist_ok=True)
+        (home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
+
+        project = tmp_path / "project"
+        project.mkdir(exist_ok=True)
+
+        monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
+        monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
+        monkeypatch.setattr(doctor_mod, "_DHH", str(home))
+        monkeypatch.setattr(sys, "platform", "win32")
+
+        fake_model_tools = types.SimpleNamespace(
+            check_tool_availability=lambda *a, **kw: ([], []),
+            TOOLSET_REQUIREMENTS={},
+        )
+        monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
+        try:
+            from hermes_cli import auth as _auth_mod
+            monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
+            monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
+        except Exception:
+            pass
+        try:
+            import httpx
+            monkeypatch.setattr(httpx, "get", lambda *a, **kw: types.SimpleNamespace(status_code=200))
+        except Exception:
+            pass
+
+        out = _run_doctor(fix=False)
+        assert "Command Installation" not in out

From da8bab77fb762bc554f45da3cc2eb3a823983d4b Mon Sep 17 00:00:00 2001
From: Ubuntu
 <davida@boombox.kh1kbv5xmjqelbtffzsvgoyjie.cx.internal.cloudapp.net>
Date: Mon, 13 Apr 2026 10:32:31 +0000
Subject: [PATCH 09/25] fix(cli): restore messaging toolset for gateway
 platforms

---
 hermes_cli/tools_config.py            |  1 +
 tests/hermes_cli/test_tools_config.py | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index b518c001e..5fe8cdc79 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -63,6 +63,7 @@ CONFIGURABLE_TOOLSETS = [
     ("clarify",         "❓ Clarifying Questions",      "clarify"),
     ("delegation",      "👥 Task Delegation",           "delegate_task"),
     ("cronjob",         "⏰ Cron Jobs",                 "create/list/update/pause/resume/run, with optional attached skills"),
+    ("messaging",       "📨 Cross-Platform Messaging",  "send_message"),
     ("rl",              "🧪 RL Training",               "Tinker-Atropos training tools"),
     ("homeassistant",    "🏠 Home Assistant",           "smart home device control"),
 ]
diff --git a/tests/hermes_cli/test_tools_config.py b/tests/hermes_cli/test_tools_config.py
index ed79559d2..3ad0be886 100644
--- a/tests/hermes_cli/test_tools_config.py
+++ b/tests/hermes_cli/test_tools_config.py
@@ -8,6 +8,7 @@ from hermes_cli.tools_config import (
     _platform_toolset_summary,
     _save_platform_tools,
     _toolset_has_keys,
+    CONFIGURABLE_TOOLSETS,
     TOOL_CATEGORIES,
     _visible_providers,
     tools_command,
@@ -22,6 +23,15 @@ def test_get_platform_tools_uses_default_when_platform_not_configured():
     assert enabled
 
 
+def test_configurable_toolsets_include_messaging():
+    assert any(ts_key == "messaging" for ts_key, _, _ in CONFIGURABLE_TOOLSETS)
+
+def test_get_platform_tools_default_telegram_includes_messaging():
+    enabled = _get_platform_tools({}, "telegram")
+
+    assert "messaging" in enabled
+
+
 def test_get_platform_tools_preserves_explicit_empty_selection():
     config = {"platform_toolsets": {"cli": []}}
 

From df7be3d8aef682e1cd03e028548fbad7a2d132a2 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 15 Apr 2026 00:07:50 -0700
Subject: [PATCH 10/25] fix(cli): /model picker shows curated models instead of
 full catalog (#10146)

The /model picker called provider_model_ids() which fetches the FULL
live API catalog (hundreds of models for Anthropic, Copilot, etc.) and
only fell back to the curated list when the live fetch failed.

This flips the priority: use the curated model list from
list_authenticated_providers() (same lists as `hermes model` and
gateway pickers), falling back to provider_model_ids() only when the
curated list is empty (e.g. user-defined endpoints).
---
 cli.py | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/cli.py b/cli.py
index b8f34c9bb..0ed2d3232 100644
--- a/cli.py
+++ b/cli.py
@@ -4588,16 +4588,19 @@ class HermesCLI:
                 self._close_model_picker()
                 return
             provider_data = providers[selected]
-            model_list = []
-            try:
-                from hermes_cli.models import provider_model_ids
-                live = provider_model_ids(provider_data["slug"])
-                if live:
-                    model_list = live
-            except Exception:
-                pass
+            # Use the curated model list from list_authenticated_providers()
+            # (same lists as `hermes model` and gateway pickers).
+            # Only fall back to the live provider catalog when the curated
+            # list is empty (e.g. user-defined endpoints with no curated list).
+            model_list = provider_data.get("models", [])
             if not model_list:
-                model_list = provider_data.get("models", [])
+                try:
+                    from hermes_cli.models import provider_model_ids
+                    live = provider_model_ids(provider_data["slug"])
+                    if live:
+                        model_list = live
+                except Exception:
+                    pass
             state["stage"] = "model"
             state["provider_data"] = provider_data
             state["model_list"] = model_list

From 03446e06bbfe60bff074d1bbb5336bdd6849ddc3 Mon Sep 17 00:00:00 2001
From: bkadish <146666892+bkadish@users.noreply.github.com>
Date: Wed, 8 Apr 2026 05:15:15 -0700
Subject: [PATCH 11/25] fix(send_message): accept Matrix room IDs and user
 MXIDs as explicit targets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`_parse_target_ref` has explicit-reference branches for Telegram, Feishu,
and numeric IDs, but none for Matrix. As a result, callers of
`send_message(target="matrix:!roomid:server")` or
`send_message(target="matrix:@user:server")` fall through to
`(None, None, False)` and the tool errors out with a resolution failure —
even though a raw Matrix room ID or MXID is the most unambiguous possible
target.

Three-line fix: recognize `!…` as a room ID and `@…` as a user MXID when
platform is `matrix`, and return them as explicit targets. Alias-based
targets (`#…`) continue to go through the normal resolve path.
---
 tools/send_message_tool.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py
index 391e03baa..f99bcdaf4 100644
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@@ -248,6 +248,9 @@ def _parse_target_ref(platform_name: str, target_ref: str):
             return match.group(1), None, True
     if target_ref.lstrip("-").isdigit():
         return target_ref, None, True
+    # Matrix room IDs (start with !) and user IDs (start with @) are explicit
+    if platform_name == "matrix" and (target_ref.startswith("!") or target_ref.startswith("@")):
+        return target_ref, None, True
     return None, None, False
 
 

From 180b14442f88003cabb11f7183e2bb57e81f5f1e Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 14 Apr 2026 23:15:19 -0700
Subject: [PATCH 12/25] test: add _parse_target_ref Matrix coverage for
 salvaged PR #6144

---
 tests/tools/test_send_message_tool.py | 32 +++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/tests/tools/test_send_message_tool.py b/tests/tools/test_send_message_tool.py
index d6f07e2e6..b9d99e2b3 100644
--- a/tests/tools/test_send_message_tool.py
+++ b/tests/tools/test_send_message_tool.py
@@ -752,6 +752,38 @@ class TestParseTargetRefDiscord:
         assert is_explicit is True
 
 
+class TestParseTargetRefMatrix:
+    """_parse_target_ref correctly handles Matrix room IDs and user MXIDs."""
+
+    def test_matrix_room_id_is_explicit(self):
+        """Matrix room IDs (!) are recognized as explicit targets."""
+        chat_id, thread_id, is_explicit = _parse_target_ref("matrix", "!HLOQwxYGgFPMPJUSNR:matrix.org")
+        assert chat_id == "!HLOQwxYGgFPMPJUSNR:matrix.org"
+        assert thread_id is None
+        assert is_explicit is True
+
+    def test_matrix_user_mxid_is_explicit(self):
+        """Matrix user MXIDs (@) are recognized as explicit targets."""
+        chat_id, thread_id, is_explicit = _parse_target_ref("matrix", "@hermes:matrix.org")
+        assert chat_id == "@hermes:matrix.org"
+        assert thread_id is None
+        assert is_explicit is True
+
+    def test_matrix_alias_is_not_explicit(self):
+        """Matrix room aliases (#) are NOT explicit — they need resolution."""
+        chat_id, thread_id, is_explicit = _parse_target_ref("matrix", "#general:matrix.org")
+        assert chat_id is None
+        assert is_explicit is False
+
+    def test_matrix_prefix_only_matches_matrix_platform(self):
+        """! and @ prefixes are only treated as explicit for the matrix platform."""
+        chat_id, _, is_explicit = _parse_target_ref("telegram", "!something")
+        assert is_explicit is False
+
+        chat_id, _, is_explicit = _parse_target_ref("discord", "@someone")
+        assert is_explicit is False
+
+
 class TestSendDiscordThreadId:
     """_send_discord uses thread_id when provided."""
 

From e69526be799edcfa02c25bf8966af2d8cce65ee3 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 15 Apr 2026 00:10:59 -0700
Subject: [PATCH 13/25] fix(send_message): URL-encode Matrix room IDs and add
 Matrix to schema examples (#10151)

Matrix room IDs contain ! and : which must be percent-encoded in URI
path segments per the Matrix C-S spec. Without encoding, some
homeservers reject the PUT request.

Also adds 'matrix:!roomid:server.org' and 'matrix:@user:server.org'
to the tool schema examples so models know the correct target format.
---
 tests/tools/test_send_message_tool.py | 36 +++++++++++++++++++++++++++
 tools/send_message_tool.py            |  6 +++--
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/tests/tools/test_send_message_tool.py b/tests/tools/test_send_message_tool.py
index b9d99e2b3..9a277d598 100644
--- a/tests/tools/test_send_message_tool.py
+++ b/tests/tools/test_send_message_tool.py
@@ -886,3 +886,39 @@ class TestSendToPlatformDiscordThread:
         send_mock.assert_awaited_once()
         _, call_kwargs = send_mock.await_args
         assert call_kwargs["thread_id"] is None
+
+
+class TestSendMatrixUrlEncoding:
+    """_send_matrix URL-encodes Matrix room IDs in the API path."""
+
+    def test_room_id_is_percent_encoded_in_url(self):
+        """Matrix room IDs with ! and : are percent-encoded in the PUT URL."""
+        import aiohttp
+
+        mock_resp = MagicMock()
+        mock_resp.status = 200
+        mock_resp.json = AsyncMock(return_value={"event_id": "$evt123"})
+        mock_resp.__aenter__ = AsyncMock(return_value=mock_resp)
+        mock_resp.__aexit__ = AsyncMock(return_value=None)
+
+        mock_session = MagicMock()
+        mock_session.put = MagicMock(return_value=mock_resp)
+        mock_session.__aenter__ = AsyncMock(return_value=mock_session)
+        mock_session.__aexit__ = AsyncMock(return_value=None)
+
+        with patch("aiohttp.ClientSession", return_value=mock_session):
+            from tools.send_message_tool import _send_matrix
+            result = asyncio.get_event_loop().run_until_complete(
+                _send_matrix(
+                    "test_token",
+                    {"homeserver": "https://matrix.example.org"},
+                    "!HLOQwxYGgFPMPJUSNR:matrix.org",
+                    "hello",
+                )
+            )
+
+        assert result["success"] is True
+        # Verify the URL was called with percent-encoded room ID
+        put_url = mock_session.put.call_args[0][0]
+        assert "%21HLOQwxYGgFPMPJUSNR%3Amatrix.org" in put_url
+        assert "!HLOQwxYGgFPMPJUSNR:matrix.org" not in put_url
diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py
index f99bcdaf4..6c9632b2c 100644
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@@ -68,7 +68,7 @@ SEND_MESSAGE_SCHEMA = {
             },
             "target": {
                 "type": "string",
-                "description": "Delivery target. Format: 'platform' (uses home channel), 'platform:#channel-name', 'platform:chat_id', or 'platform:chat_id:thread_id' for Telegram topics and Discord threads. Examples: 'telegram', 'telegram:-1001234567890:17585', 'discord:999888777:555444333', 'discord:#bot-home', 'slack:#engineering', 'signal:+155****4567'"
+                "description": "Delivery target. Format: 'platform' (uses home channel), 'platform:#channel-name', 'platform:chat_id', or 'platform:chat_id:thread_id' for Telegram topics and Discord threads. Examples: 'telegram', 'telegram:-1001234567890:17585', 'discord:999888777:555444333', 'discord:#bot-home', 'slack:#engineering', 'signal:+155****4567', 'matrix:!roomid:server.org', 'matrix:@user:server.org'"
             },
             "message": {
                 "type": "string",
@@ -819,7 +819,9 @@ async def _send_matrix(token, extra, chat_id, message):
         if not homeserver or not token:
             return {"error": "Matrix not configured (MATRIX_HOMESERVER, MATRIX_ACCESS_TOKEN required)"}
         txn_id = f"hermes_{int(time.time() * 1000)}_{os.urandom(4).hex()}"
-        url = f"{homeserver}/_matrix/client/v3/rooms/{chat_id}/send/m.room.message/{txn_id}"
+        from urllib.parse import quote
+        encoded_room = quote(chat_id, safe="")
+        url = f"{homeserver}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
         headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
 
         # Build message payload with optional HTML formatted_body.

From a4e1842f1217983f05fd40f544f79b8a785324b4 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 15 Apr 2026 03:19:43 -0700
Subject: [PATCH 14/25] fix: strip reasoning item IDs from Responses API input
 when store=False (#10217)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With store=False (our default for the Responses API), the API does not
persist response items.  When reasoning items with 'id' fields were
replayed on subsequent turns, the API attempted a server-side lookup
for those IDs and returned 404:

  Item with id 'rs_...' not found. Items are not persisted when store
  is set to false.

The encrypted_content blob is self-contained for reasoning chain
continuity — the id field is unnecessary and triggers the failed lookup.

Fix: strip 'id' from reasoning items in both _chat_messages_to_responses_input
(message conversion) and _preflight_codex_input_items (normalization layer).
The id is still used for local deduplication but never sent to the API.

Reported by @zuogl448 on GPT-5.4.
---
 run_agent.py                                  | 13 +++++++--
 .../test_run_agent_codex_responses.py         | 28 ++++++++++++-------
 2 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 48382389e..efaeba829 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -3589,7 +3589,12 @@ class AIAgent:
                                 item_id = ri.get("id")
                                 if item_id and item_id in seen_item_ids:
                                     continue
-                                items.append(ri)
+                                # Strip the "id" field — with store=False the
+                                # Responses API cannot look up items by ID and
+                                # returns 404.  The encrypted_content blob is
+                                # self-contained for reasoning chain continuity.
+                                replay_item = {k: v for k, v in ri.items() if k != "id"}
+                                items.append(replay_item)
                                 if item_id:
                                     seen_item_ids.add(item_id)
                                 has_codex_reasoning = True
@@ -3730,8 +3735,10 @@ class AIAgent:
                             continue
                         seen_ids.add(item_id)
                     reasoning_item = {"type": "reasoning", "encrypted_content": encrypted}
-                    if isinstance(item_id, str) and item_id:
-                        reasoning_item["id"] = item_id
+                    # Do NOT include the "id" in the outgoing item — with
+                    # store=False (our default) the API tries to resolve the
+                    # id server-side and returns 404.  The id is still used
+                    # above for local deduplication via seen_ids.
                     summary = item.get("summary")
                     if isinstance(summary, list):
                         reasoning_item["summary"] = summary
diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py
index 785d85886..2b2295565 100644
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@@ -1249,13 +1249,17 @@ def test_chat_messages_to_responses_input_deduplicates_reasoning_ids(monkeypatch
     ]
     items = agent._chat_messages_to_responses_input(messages)
 
-    reasoning_ids = [it["id"] for it in items if it.get("type") == "reasoning"]
-    # rs_aaa should appear only once (first occurrence kept)
-    assert reasoning_ids.count("rs_aaa") == 1
-    # rs_bbb and rs_ccc should each appear once
-    assert reasoning_ids.count("rs_bbb") == 1
-    assert reasoning_ids.count("rs_ccc") == 1
-    assert len(reasoning_ids) == 3
+    reasoning_items = [it for it in items if it.get("type") == "reasoning"]
+    # Dedup: rs_aaa appears in both turns but should only be emitted once.
+    # 3 unique items total: enc_1 (from rs_aaa), enc_2 (rs_bbb), enc_3 (rs_ccc).
+    assert len(reasoning_items) == 3
+    encrypted = [it["encrypted_content"] for it in reasoning_items]
+    assert encrypted.count("enc_1") == 1
+    assert "enc_2" in encrypted
+    assert "enc_3" in encrypted
+    # IDs must be stripped — with store=False the API 404s on id lookups.
+    for it in reasoning_items:
+        assert "id" not in it
 
 
 def test_preflight_codex_input_deduplicates_reasoning_ids(monkeypatch):
@@ -1272,7 +1276,11 @@ def test_preflight_codex_input_deduplicates_reasoning_ids(monkeypatch):
     normalized = agent._preflight_codex_input_items(raw_input)
 
     reasoning_items = [it for it in normalized if it.get("type") == "reasoning"]
-    reasoning_ids = [it["id"] for it in reasoning_items]
-    assert reasoning_ids.count("rs_xyz") == 1
-    assert reasoning_ids.count("rs_zzz") == 1
+    # rs_xyz duplicate should be collapsed to one item; rs_zzz kept.
     assert len(reasoning_items) == 2
+    encrypted = [it["encrypted_content"] for it in reasoning_items]
+    assert encrypted.count("enc_a") == 1
+    assert "enc_b" in encrypted
+    # IDs must be stripped — with store=False the API 404s on id lookups.
+    for it in reasoning_items:
+        assert "id" not in it

From 7b2700c9afca19f3653a0af98d5ee35dd39bc9c6 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 15 Apr 2026 03:29:37 -0700
Subject: [PATCH 15/25] fix(browser): use 127.0.0.1 instead of localhost for
 CDP default (#10231)

/browser connect set BROWSER_CDP_URL to http://localhost:9222, but
Chrome's --remote-debugging-port only binds to 127.0.0.1 (IPv4).
On macOS, 'localhost' can resolve to ::1 (IPv6) first, causing both
_resolve_cdp_override's /json/version fetch and agent-browser's
--cdp connection to fail when Chrome isn't listening on IPv6.

The socket check in the connect handler already used 127.0.0.1
explicitly and succeeded, masking the mismatch.

Use 127.0.0.1 in the default CDP URL to match what Chrome actually
binds to.
---
 cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cli.py b/cli.py
index 0ed2d3232..97698f133 100644
--- a/cli.py
+++ b/cli.py
@@ -5956,7 +5956,7 @@ class HermesCLI:
         parts = cmd.strip().split(None, 1)
         sub = parts[1].lower().strip() if len(parts) > 1 else "status"
 
-        _DEFAULT_CDP = "http://localhost:9222"
+        _DEFAULT_CDP = "http://127.0.0.1:9222"
         current = os.environ.get("BROWSER_CDP_URL", "").strip()
 
         if sub.startswith("connect"):

From 2546b7acea9b294429396e9196374127acd71024 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Wed, 15 Apr 2026 03:31:08 -0700
Subject: [PATCH 16/25] fix(gateway): suppress duplicate replies on interrupt
 and streaming flood control
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three fixes for the duplicate reply bug affecting all gateway platforms:

1. base.py: Suppress stale response when the session was interrupted by a
   new message that hasn't been consumed yet. Checks both interrupt_event
   and _pending_messages to avoid false positives. (#8221, #2483)

2. run.py (return path): Remove response_previewed guard from already_sent
   check. Stream consumer's already_sent alone is authoritative — if
   content was delivered via streaming, the duplicate send must be
   suppressed regardless of the agent's response_previewed flag. (#8375)

3. run.py (queued-message path): Same fix — already_sent without
   response_previewed now correctly marks the first response as already
   streamed, preventing re-send before processing the queued message.

The response_previewed field is still produced by the agent (run_agent.py)
but is no longer required as a gate for duplicate suppression. The stream
consumer's already_sent flag is the delivery-level truth about what the
user actually saw.

Concepts from PR #8380 (konsisumer). Closes #8375, #8221, #2483.
---
 gateway/platforms/base.py                     |  15 +
 gateway/run.py                                |  12 +-
 .../test_duplicate_reply_suppression.py       | 291 ++++++++++++++++++
 3 files changed, 308 insertions(+), 10 deletions(-)
 create mode 100644 tests/gateway/test_duplicate_reply_suppression.py

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index f7943da47..1561cd526 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -1624,6 +1624,21 @@ class BasePlatformAdapter(ABC):
             # streaming already delivered the text (already_sent=True) or
             # when the message was queued behind an active agent.  Log at
             # DEBUG to avoid noisy warnings for expected behavior.
+            #
+            # Suppress stale response when the session was interrupted by a
+            # new message that hasn't been consumed yet.  The pending message
+            # is processed by the pending-message handler below (#8221/#2483).
+            if (
+                response
+                and interrupt_event.is_set()
+                and session_key in self._pending_messages
+            ):
+                logger.info(
+                    "[%s] Suppressing stale response for interrupted session %s",
+                    self.name,
+                    session_key,
+                )
+                response = None
             if not response:
                 logger.debug("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
             if response:
diff --git a/gateway/run.py b/gateway/run.py
index d360d453f..2eb745f92 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -9231,15 +9231,11 @@ class GatewayRunner:
                                 pass
                         except Exception as e:
                             logger.debug("Stream consumer wait before queued message failed: %s", e)
-                    _response_previewed = bool(result.get("response_previewed"))
                     _already_streamed = bool(
                         _sc
                         and (
                             getattr(_sc, "final_response_sent", False)
-                            or (
-                                _response_previewed
-                                and getattr(_sc, "already_sent", False)
-                            )
+                            or getattr(_sc, "already_sent", False)
                         )
                     )
                     first_response = result.get("final_response", "")
@@ -9323,13 +9319,9 @@ class GatewayRunner:
         # them even if streaming had sent earlier partial output.
         _sc = stream_consumer_holder[0]
         if _sc and isinstance(response, dict) and not response.get("failed"):
-            _response_previewed = bool(response.get("response_previewed"))
             if (
                 getattr(_sc, "final_response_sent", False)
-                or (
-                    _response_previewed
-                    and getattr(_sc, "already_sent", False)
-                )
+                or getattr(_sc, "already_sent", False)
             ):
                 response["already_sent"] = True
         
diff --git a/tests/gateway/test_duplicate_reply_suppression.py b/tests/gateway/test_duplicate_reply_suppression.py
new file mode 100644
index 000000000..5a0ea02f3
--- /dev/null
+++ b/tests/gateway/test_duplicate_reply_suppression.py
@@ -0,0 +1,291 @@
+"""Tests for duplicate reply suppression across the gateway stack.
+
+Covers three fix paths:
+  1. base.py: stale response suppressed when interrupt_event is set and a
+     pending message exists (#8221 / #2483)
+  2. run.py return path: already_sent propagated from stream consumer's
+     already_sent flag without requiring response_previewed (#8375)
+  3. run.py queued-message path: first response correctly detected as
+     already-streamed when already_sent is True without response_previewed
+"""
+
+import asyncio
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+    ProcessingOutcome,
+    SendResult,
+)
+from gateway.session import SessionSource, build_session_key
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+class StubAdapter(BasePlatformAdapter):
+    """Minimal concrete adapter for testing."""
+
+    def __init__(self):
+        super().__init__(PlatformConfig(enabled=True, token="fake"), Platform.DISCORD)
+        self.sent = []
+
+    async def connect(self):
+        return True
+
+    async def disconnect(self):
+        pass
+
+    async def send(self, chat_id, content, reply_to=None, metadata=None):
+        self.sent.append({"chat_id": chat_id, "content": content})
+        return SendResult(success=True, message_id="msg1")
+
+    async def send_typing(self, chat_id, metadata=None):
+        pass
+
+    async def get_chat_info(self, chat_id):
+        return {"id": chat_id}
+
+
+def _make_event(text="hello", chat_id="c1", user_id="u1"):
+    return MessageEvent(
+        text=text,
+        source=SessionSource(
+            platform=Platform.DISCORD,
+            chat_id=chat_id,
+            chat_type="dm",
+            user_id=user_id,
+        ),
+        message_id="m1",
+    )
+
+
+# ===================================================================
+# Test 1: base.py — stale response suppressed on interrupt (#8221)
+# ===================================================================
+
+class TestBaseInterruptSuppression:
+    @pytest.mark.asyncio
+    async def test_stale_response_suppressed_when_interrupted(self):
+        """When interrupt_event is set AND a pending message exists,
+        base.py should suppress the stale response instead of sending it."""
+        adapter = StubAdapter()
+
+        stale_response = "This is the stale answer to the first question."
+        pending_response = "This is the answer to the second question."
+        call_count = 0
+
+        async def fake_handler(event):
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                return stale_response
+            return pending_response
+
+        adapter.set_message_handler(fake_handler)
+
+        event_a = _make_event(text="first question")
+        session_key = build_session_key(event_a.source)
+
+        # Simulate: message A is being processed, message B arrives
+        # The interrupt event is set and B is in pending_messages
+        interrupt_event = asyncio.Event()
+        interrupt_event.set()
+        adapter._active_sessions[session_key] = interrupt_event
+
+        event_b = _make_event(text="second question")
+        adapter._pending_messages[session_key] = event_b
+
+        await adapter._process_message_background(event_a, session_key)
+
+        # The stale response should NOT have been sent.
+        stale_sends = [s for s in adapter.sent if s["content"] == stale_response]
+        assert len(stale_sends) == 0, (
+            f"Stale response was sent {len(stale_sends)} time(s) — should be suppressed"
+        )
+        # The pending message's response SHOULD have been sent.
+        pending_sends = [s for s in adapter.sent if s["content"] == pending_response]
+        assert len(pending_sends) == 1, "Pending message response should be sent"
+
+    @pytest.mark.asyncio
+    async def test_response_not_suppressed_without_interrupt(self):
+        """Normal case: no interrupt, response should be sent."""
+        adapter = StubAdapter()
+
+        async def fake_handler(event):
+            return "Normal response"
+
+        adapter.set_message_handler(fake_handler)
+        event = _make_event()
+        session_key = build_session_key(event.source)
+
+        await adapter._process_message_background(event, session_key)
+
+        assert any(s["content"] == "Normal response" for s in adapter.sent)
+
+    @pytest.mark.asyncio
+    async def test_response_not_suppressed_with_interrupt_but_no_pending(self):
+        """Interrupt event set but no pending message (race already resolved) —
+        response should still be sent."""
+        adapter = StubAdapter()
+
+        async def fake_handler(event):
+            return "Valid response"
+
+        adapter.set_message_handler(fake_handler)
+        event = _make_event()
+        session_key = build_session_key(event.source)
+
+        # Set interrupt but no pending message
+        interrupt_event = asyncio.Event()
+        interrupt_event.set()
+        adapter._active_sessions[session_key] = interrupt_event
+
+        await adapter._process_message_background(event, session_key)
+
+        assert any(s["content"] == "Valid response" for s in adapter.sent)
+
+
+# ===================================================================
+# Test 2: run.py — already_sent without response_previewed (#8375)
+# ===================================================================
+
+class TestAlreadySentWithoutResponsePreviewed:
+    """The already_sent flag on the response dict should be set when the
+    stream consumer's already_sent is True, even if response_previewed is
+    False.  This prevents duplicate sends when streaming was interrupted
+    by flood control."""
+
+    def _make_mock_stream_consumer(self, already_sent=False, final_response_sent=False):
+        sc = SimpleNamespace(
+            already_sent=already_sent,
+            final_response_sent=final_response_sent,
+        )
+        return sc
+
+    def test_already_sent_set_without_response_previewed(self):
+        """Stream consumer already_sent=True should propagate to response
+        dict even when response_previewed is False."""
+        sc = self._make_mock_stream_consumer(already_sent=True, final_response_sent=False)
+        response = {"final_response": "text", "response_previewed": False}
+
+        # Reproduce the logic from run.py return path (post-fix)
+        if sc and isinstance(response, dict) and not response.get("failed"):
+            if (
+                getattr(sc, "final_response_sent", False)
+                or getattr(sc, "already_sent", False)
+            ):
+                response["already_sent"] = True
+
+        assert response.get("already_sent") is True
+
+    def test_already_sent_not_set_when_nothing_sent(self):
+        """When stream consumer hasn't sent anything, already_sent should
+        not be set on the response."""
+        sc = self._make_mock_stream_consumer(already_sent=False, final_response_sent=False)
+        response = {"final_response": "text", "response_previewed": False}
+
+        if sc and isinstance(response, dict) and not response.get("failed"):
+            if (
+                getattr(sc, "final_response_sent", False)
+                or getattr(sc, "already_sent", False)
+            ):
+                response["already_sent"] = True
+
+        assert "already_sent" not in response
+
+    def test_already_sent_set_on_final_response_sent(self):
+        """final_response_sent=True should still work as before."""
+        sc = self._make_mock_stream_consumer(already_sent=False, final_response_sent=True)
+        response = {"final_response": "text"}
+
+        if sc and isinstance(response, dict) and not response.get("failed"):
+            if (
+                getattr(sc, "final_response_sent", False)
+                or getattr(sc, "already_sent", False)
+            ):
+                response["already_sent"] = True
+
+        assert response.get("already_sent") is True
+
+    def test_already_sent_not_set_on_failed_response(self):
+        """Failed responses should never be suppressed — user needs to see
+        the error message even if streaming sent earlier partial output."""
+        sc = self._make_mock_stream_consumer(already_sent=True, final_response_sent=False)
+        response = {"final_response": "Error: something broke", "failed": True}
+
+        if sc and isinstance(response, dict) and not response.get("failed"):
+            if (
+                getattr(sc, "final_response_sent", False)
+                or getattr(sc, "already_sent", False)
+            ):
+                response["already_sent"] = True
+
+        assert "already_sent" not in response
+
+
+# ===================================================================
+# Test 3: run.py queued-message path — _already_streamed detection
+# ===================================================================
+
+class TestQueuedMessageAlreadyStreamed:
+    """The queued-message path should detect that the first response was
+    already streamed (already_sent=True) even without response_previewed."""
+
+    def _make_mock_sc(self, already_sent=False, final_response_sent=False):
+        return SimpleNamespace(
+            already_sent=already_sent,
+            final_response_sent=final_response_sent,
+        )
+
+    def test_queued_path_detects_already_streamed(self):
+        """already_sent=True on stream consumer means first response was
+        streamed — skip re-sending before processing queued message."""
+        _sc = self._make_mock_sc(already_sent=True)
+
+        # Reproduce the queued-message logic from run.py (post-fix)
+        _already_streamed = bool(
+            _sc
+            and (
+                getattr(_sc, "final_response_sent", False)
+                or getattr(_sc, "already_sent", False)
+            )
+        )
+
+        assert _already_streamed is True
+
+    def test_queued_path_sends_when_not_streamed(self):
+        """Nothing was streamed — first response should be sent before
+        processing the queued message."""
+        _sc = self._make_mock_sc(already_sent=False)
+
+        _already_streamed = bool(
+            _sc
+            and (
+                getattr(_sc, "final_response_sent", False)
+                or getattr(_sc, "already_sent", False)
+            )
+        )
+
+        assert _already_streamed is False
+
+    def test_queued_path_with_no_stream_consumer(self):
+        """No stream consumer at all (streaming disabled) — not streamed."""
+        _sc = None
+
+        _already_streamed = bool(
+            _sc
+            and (
+                getattr(_sc, "final_response_sent", False)
+                or getattr(_sc, "already_sent", False)
+            )
+        )
+
+        assert _already_streamed is False

From 8bc9b5a0b4c6ae8ceb33b8a8d4c8d813a280eb6e Mon Sep 17 00:00:00 2001
From: Misturi <haileymarshall005@gmail.com>
Date: Wed, 15 Apr 2026 08:54:33 +0100
Subject: [PATCH 17/25] fix(skills): use `is None` check for coordinates in
 find-nearby to avoid dropping valid 0.0 values

---
 skills/leisure/find-nearby/scripts/find_nearby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skills/leisure/find-nearby/scripts/find_nearby.py b/skills/leisure/find-nearby/scripts/find_nearby.py
index 543d35a0d..9d7fed78f 100644
--- a/skills/leisure/find-nearby/scripts/find_nearby.py
+++ b/skills/leisure/find-nearby/scripts/find_nearby.py
@@ -98,7 +98,7 @@ def find_nearby(lat: float, lon: float, types: list[str], radius: int = 1500, li
         # Get coordinates (nodes have lat/lon directly, ways/relations use center)
         plat = el.get("lat") or (el.get("center", {}) or {}).get("lat")
         plon = el.get("lon") or (el.get("center", {}) or {}).get("lon")
-        if not plat or not plon:
+        if plat is None or plon is None:
             continue
 
         dist = haversine(lat, lon, plat, plon)

From dedc4600dd31770612af213167f7b4808fafa3d8 Mon Sep 17 00:00:00 2001
From: Misturi <haileymarshall005@gmail.com>
Date: Wed, 15 Apr 2026 09:15:44 +0100
Subject: [PATCH 18/25] fix(skills): handle missing fields in Google Workspace
 token file gracefully instead of crashing with KeyError

---
 skills/productivity/google-workspace/scripts/gws_bridge.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/skills/productivity/google-workspace/scripts/gws_bridge.py b/skills/productivity/google-workspace/scripts/gws_bridge.py
index adecd33ad..7b5d351f8 100755
--- a/skills/productivity/google-workspace/scripts/gws_bridge.py
+++ b/skills/productivity/google-workspace/scripts/gws_bridge.py
@@ -25,6 +25,13 @@ def refresh_token(token_data: dict) -> dict:
     import urllib.parse
     import urllib.request
 
+    required_keys = ["client_id", "client_secret", "refresh_token", "token_uri"]
+    missing = [k for k in required_keys if k not in token_data]
+    if missing:
+        print(f"ERROR: google_token.json is missing required fields: {', '.join(missing)}", file=sys.stderr)
+        print("Please re-authenticate by running the Google Workspace setup script.", file=sys.stderr)
+        sys.exit(1)
+
     params = urllib.parse.urlencode({
         "client_id": token_data["client_id"],
         "client_secret": token_data["client_secret"],

From 1c4d3216d3855848a632847306c96f4c3090b259 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 15 Apr 2026 03:46:58 -0700
Subject: [PATCH 19/25] fix(cron): include job_id in delivery and guide models
 on removal workflow (#10242)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(gateway): suppress duplicate replies on interrupt and streaming flood control

Three fixes for the duplicate reply bug affecting all gateway platforms:

1. base.py: Suppress stale response when the session was interrupted by a
   new message that hasn't been consumed yet. Checks both interrupt_event
   and _pending_messages to avoid false positives. (#8221, #2483)

2. run.py (return path): Remove response_previewed guard from already_sent
   check. Stream consumer's already_sent alone is authoritative — if
   content was delivered via streaming, the duplicate send must be
   suppressed regardless of the agent's response_previewed flag. (#8375)

3. run.py (queued-message path): Same fix — already_sent without
   response_previewed now correctly marks the first response as already
   streamed, preventing re-send before processing the queued message.

The response_previewed field is still produced by the agent (run_agent.py)
but is no longer required as a gate for duplicate suppression. The stream
consumer's already_sent flag is the delivery-level truth about what the
user actually saw.

Concepts from PR #8380 (konsisumer). Closes #8375, #8221, #2483.

* fix(cron): include job_id in delivery and guide models on removal workflow

Users reported cron reminders keep firing after asking the agent to stop.
Root cause: the conversational agent didn't know the job_id (not in delivery)
and models don't reliably do the list→remove two-step without guidance.

1. Include job_id in the cron delivery wrapper so users and agents can
   reference it when requesting removal.

2. Replace confusing footer ('The agent cannot see this message') with
   actionable guidance ('To stop or manage this job, send me a new
   message').

3. Add explicit list→remove guidance in the cronjob tool schema so models
   know to list first and never guess job IDs.
---
 cron/scheduler.py            | 4 +++-
 tests/cron/test_scheduler.py | 3 ++-
 tools/cronjob_tools.py       | 2 ++
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/cron/scheduler.py b/cron/scheduler.py
index 83b7abb9b..cd4576c9f 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -288,11 +288,13 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
 
     if wrap_response:
         task_name = job.get("name", job["id"])
+        job_id = job.get("id", "")
         delivery_content = (
             f"Cronjob Response: {task_name}\n"
+            f"(job_id: {job_id})\n"
             f"-------------\n\n"
             f"{content}\n\n"
-            f"Note: The agent cannot see this message, and therefore cannot respond to it."
+            f"To stop or manage this job, send me a new message (e.g. \"stop reminder {task_name}\")."
         )
     else:
         delivery_content = content
diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py
index 08b57cfa8..50d3cf14f 100644
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -233,9 +233,10 @@ class TestDeliverResultWrapping:
         send_mock.assert_called_once()
         sent_content = send_mock.call_args.kwargs.get("content") or send_mock.call_args[0][-1]
         assert "Cronjob Response: daily-report" in sent_content
+        assert "(job_id: test-job)" in sent_content
         assert "-------------" in sent_content
         assert "Here is today's summary." in sent_content
-        assert "The agent cannot see this message" in sent_content
+        assert "To stop or manage this job" in sent_content
 
     def test_delivery_uses_job_id_when_no_name(self):
         """When a job has no name, the wrapper should fall back to job id."""
diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py
index 75dd4c31f..25a153041 100644
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@@ -391,6 +391,8 @@ Use action='create' to schedule a new job from a prompt or one or more skills.
 Use action='list' to inspect jobs.
 Use action='update', 'pause', 'resume', 'remove', or 'run' to manage an existing job.
 
+To stop a job the user no longer wants: first action='list' to find the job_id, then action='remove' with that job_id. Never guess job IDs — always list first.
+
 Jobs run in a fresh session with no current-chat context, so prompts must be self-contained.
 If skills are provided on create, the future cron run loads those skills in order, then follows the prompt as the task instruction.
 On update, passing skills=[] clears attached skills.

From 4bcb2f2d2632011008b11d40fe2aca32c94585e0 Mon Sep 17 00:00:00 2001
From: sprmn24 <oncuevtv@gmail.com>
Date: Wed, 15 Apr 2026 11:46:25 +0300
Subject: [PATCH 20/25] feat(send_message): add native media attachment support
 for Discord

Previously send_message only supported media delivery for Telegram.
Discord users received a warning that media was omitted.

- Add media_files parameter to _send_discord()
- Upload media via Discord multipart/form-data API (files[0] field)
- Handle Discord in _send_to_platform() same way as Telegram block
- Remove Discord from generic chunk loop (now handled above)
- Update error/warning strings to mention telegram and discord
---
 tools/send_message_tool.py | 88 ++++++++++++++++++++++++++++++++------
 1 file changed, 74 insertions(+), 14 deletions(-)

diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py
index 6c9632b2c..48468e103 100644
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@@ -387,11 +387,28 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
     if platform == Platform.WEIXIN:
         return await _send_weixin(pconfig, chat_id, message, media_files=media_files)
 
-    # --- Non-Telegram platforms ---
+    # --- Discord: special handling for media attachments ---
+    if platform == Platform.DISCORD:
+        last_result = None
+        for i, chunk in enumerate(chunks):
+            is_last = (i == len(chunks) - 1)
+            result = await _send_discord(
+                pconfig.token,
+                chat_id,
+                chunk,
+                media_files=media_files if is_last else [],
+                thread_id=thread_id,
+            )
+            if isinstance(result, dict) and result.get("error"):
+                return result
+            last_result = result
+        return last_result
+
+    # --- Non-Telegram/Discord platforms ---
     if media_files and not message.strip():
         return {
             "error": (
-                f"send_message MEDIA delivery is currently only supported for telegram; "
+                f"send_message MEDIA delivery is currently only supported for telegram and discord; "
                 f"target {platform.value} had only media attachments"
             )
         }
@@ -399,14 +416,12 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
     if media_files:
         warning = (
             f"MEDIA attachments were omitted for {platform.value}; "
-            "native send_message media delivery is currently only supported for telegram"
+            "native send_message media delivery is currently only supported for telegram and discord"
         )
 
     last_result = None
     for chunk in chunks:
-        if platform == Platform.DISCORD:
-            result = await _send_discord(pconfig.token, chat_id, chunk, thread_id=thread_id)
-        elif platform == Platform.SLACK:
+        if platform == Platform.SLACK:
             result = await _send_slack(pconfig.token, chat_id, chunk)
         elif platform == Platform.WHATSAPP:
             result = await _send_whatsapp(pconfig.extra, chat_id, chunk)
@@ -571,13 +586,16 @@ async def _send_telegram(token, chat_id, message, media_files=None, thread_id=No
         return _error(f"Telegram send failed: {e}")
 
 
-async def _send_discord(token, chat_id, message, thread_id=None):
+async def _send_discord(token, chat_id, message, thread_id=None, media_files=None):
     """Send a single message via Discord REST API (no websocket client needed).
 
     Chunking is handled by _send_to_platform() before this is called.
 
     When thread_id is provided, the message is sent directly to that thread
     via the /channels/{thread_id}/messages endpoint.
+
+    Media files are uploaded one-by-one via multipart/form-data after the
+    text message is sent (same pattern as Telegram).
     """
     try:
         import aiohttp
@@ -592,14 +610,56 @@ async def _send_discord(token, chat_id, message, thread_id=None):
             url = f"https://discord.com/api/v10/channels/{thread_id}/messages"
         else:
             url = f"https://discord.com/api/v10/channels/{chat_id}/messages"
-        headers = {"Authorization": f"Bot {token}", "Content-Type": "application/json"}
+        auth_headers = {"Authorization": f"Bot {token}"}
+        media_files = media_files or []
+        last_data = None
+        warnings = []
+
         async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=30), **_sess_kw) as session:
-            async with session.post(url, headers=headers, json={"content": message}, **_req_kw) as resp:
-                if resp.status not in (200, 201):
-                    body = await resp.text()
-                    return _error(f"Discord API error ({resp.status}): {body}")
-                data = await resp.json()
-        return {"success": True, "platform": "discord", "chat_id": chat_id, "message_id": data.get("id")}
+            # Send text message (skip if empty and media is present)
+            if message.strip() or not media_files:
+                headers = {**auth_headers, "Content-Type": "application/json"}
+                async with session.post(url, headers=headers, json={"content": message}, **_req_kw) as resp:
+                    if resp.status not in (200, 201):
+                        body = await resp.text()
+                        return _error(f"Discord API error ({resp.status}): {body}")
+                    last_data = await resp.json()
+
+            # Send each media file as a separate multipart upload
+            for media_path, _is_voice in media_files:
+                if not os.path.exists(media_path):
+                    warning = f"Media file not found, skipping: {media_path}"
+                    logger.warning(warning)
+                    warnings.append(warning)
+                    continue
+                try:
+                    form = aiohttp.FormData()
+                    filename = os.path.basename(media_path)
+                    with open(media_path, "rb") as f:
+                        form.add_field("files[0]", f, filename=filename)
+                    async with session.post(url, headers=auth_headers, data=form, **_req_kw) as resp:
+                        if resp.status not in (200, 201):
+                            body = await resp.text()
+                            warning = _sanitize_error_text(f"Failed to send media {media_path}: Discord API error ({resp.status}): {body}")
+                            logger.error(warning)
+                            warnings.append(warning)
+                            continue
+                        last_data = await resp.json()
+                except Exception as e:
+                    warning = _sanitize_error_text(f"Failed to send media {media_path}: {e}")
+                    logger.error(warning)
+                    warnings.append(warning)
+
+        if last_data is None:
+            error = "No deliverable text or media remained after processing"
+            if warnings:
+                return {"error": error, "warnings": warnings}
+            return {"error": error}
+
+        result = {"success": True, "platform": "discord", "chat_id": chat_id, "message_id": last_data.get("id")}
+        if warnings:
+            result["warnings"] = warnings
+        return result
     except Exception as e:
         return _error(f"Discord send failed: {e}")
 

From 47e6ea84bb362f951e5d603ec7672731c0539c4e Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Wed, 15 Apr 2026 03:53:14 -0700
Subject: [PATCH 21/25] fix: file handle bug, warning text, and tests for
 Discord media send

- Fix file handle closed before POST: nest session.post() inside
  the 'with open()' block so aiohttp can read the file during upload
- Update warning text to include weixin (also supports media delivery)
- Add 8 unit tests covering: text+media, media-only, missing files,
  upload failures, multiple files, and _send_to_platform routing
---
 tests/tools/test_send_message_tool.py | 186 ++++++++++++++++++++++++++
 tools/send_message_tool.py            |  20 +--
 2 files changed, 196 insertions(+), 10 deletions(-)

diff --git a/tests/tools/test_send_message_tool.py b/tests/tools/test_send_message_tool.py
index 9a277d598..07a1a9beb 100644
--- a/tests/tools/test_send_message_tool.py
+++ b/tests/tools/test_send_message_tool.py
@@ -888,6 +888,192 @@ class TestSendToPlatformDiscordThread:
         assert call_kwargs["thread_id"] is None
 
 
+# ---------------------------------------------------------------------------
+# Discord media attachment support
+# ---------------------------------------------------------------------------
+
+
+class TestSendDiscordMedia:
+    """_send_discord uploads media files via multipart/form-data."""
+
+    @staticmethod
+    def _build_mock(response_status, response_data=None, response_text="error body"):
+        """Build a properly-structured aiohttp mock chain."""
+        mock_resp = MagicMock()
+        mock_resp.status = response_status
+        mock_resp.json = AsyncMock(return_value=response_data or {"id": "msg123"})
+        mock_resp.text = AsyncMock(return_value=response_text)
+        mock_resp.__aenter__ = AsyncMock(return_value=mock_resp)
+        mock_resp.__aexit__ = AsyncMock(return_value=None)
+
+        mock_session = MagicMock()
+        mock_session.__aenter__ = AsyncMock(return_value=mock_session)
+        mock_session.__aexit__ = AsyncMock(return_value=None)
+        mock_session.post = MagicMock(return_value=mock_resp)
+
+        return mock_session, mock_resp
+
+    def test_text_and_media_sends_both(self, tmp_path):
+        """Text message is sent first, then each media file as multipart."""
+        img = tmp_path / "photo.png"
+        img.write_bytes(b"\x89PNG fake image data")
+
+        mock_session, _ = self._build_mock(200, {"id": "msg999"})
+        with patch("aiohttp.ClientSession", return_value=mock_session):
+            result = asyncio.run(
+                _send_discord("tok", "111", "hello", media_files=[(str(img), False)])
+            )
+
+        assert result["success"] is True
+        assert result["message_id"] == "msg999"
+        # Two POSTs: one text JSON, one multipart upload
+        assert mock_session.post.call_count == 2
+
+    def test_media_only_skips_text_post(self, tmp_path):
+        """When message is empty and media is present, text POST is skipped."""
+        img = tmp_path / "photo.png"
+        img.write_bytes(b"\x89PNG fake image data")
+
+        mock_session, _ = self._build_mock(200, {"id": "media_only"})
+        with patch("aiohttp.ClientSession", return_value=mock_session):
+            result = asyncio.run(
+                _send_discord("tok", "222", "  ", media_files=[(str(img), False)])
+            )
+
+        assert result["success"] is True
+        # Only one POST: the media upload (text was whitespace-only)
+        assert mock_session.post.call_count == 1
+
+    def test_missing_media_file_collected_as_warning(self):
+        """Non-existent media paths produce warnings but don't fail."""
+        mock_session, _ = self._build_mock(200, {"id": "txt_ok"})
+        with patch("aiohttp.ClientSession", return_value=mock_session):
+            result = asyncio.run(
+                _send_discord("tok", "333", "hello", media_files=[("/nonexistent/file.png", False)])
+            )
+
+        assert result["success"] is True
+        assert "warnings" in result
+        assert any("not found" in w for w in result["warnings"])
+        # Only the text POST was made, media was skipped
+        assert mock_session.post.call_count == 1
+
+    def test_media_upload_failure_collected_as_warning(self, tmp_path):
+        """Failed media upload becomes a warning, text still succeeds."""
+        img = tmp_path / "photo.png"
+        img.write_bytes(b"\x89PNG fake image data")
+
+        # First call (text) succeeds, second call (media) returns 413
+        text_resp = MagicMock()
+        text_resp.status = 200
+        text_resp.json = AsyncMock(return_value={"id": "txt_ok"})
+        text_resp.__aenter__ = AsyncMock(return_value=text_resp)
+        text_resp.__aexit__ = AsyncMock(return_value=None)
+
+        media_resp = MagicMock()
+        media_resp.status = 413
+        media_resp.text = AsyncMock(return_value="Request Entity Too Large")
+        media_resp.__aenter__ = AsyncMock(return_value=media_resp)
+        media_resp.__aexit__ = AsyncMock(return_value=None)
+
+        mock_session = MagicMock()
+        mock_session.__aenter__ = AsyncMock(return_value=mock_session)
+        mock_session.__aexit__ = AsyncMock(return_value=None)
+        mock_session.post = MagicMock(side_effect=[text_resp, media_resp])
+
+        with patch("aiohttp.ClientSession", return_value=mock_session):
+            result = asyncio.run(
+                _send_discord("tok", "444", "hello", media_files=[(str(img), False)])
+            )
+
+        assert result["success"] is True
+        assert result["message_id"] == "txt_ok"
+        assert "warnings" in result
+        assert any("413" in w for w in result["warnings"])
+
+    def test_no_text_no_media_returns_error(self):
+        """Empty text with no media returns error dict."""
+        mock_session, _ = self._build_mock(200)
+        with patch("aiohttp.ClientSession", return_value=mock_session):
+            result = asyncio.run(
+                _send_discord("tok", "555", "", media_files=[])
+            )
+
+        # Text is empty but media_files is empty, so text POST fires
+        # (the "skip text if media present" condition isn't met)
+        assert result["success"] is True
+
+    def test_multiple_media_files_uploaded_separately(self, tmp_path):
+        """Each media file gets its own multipart POST."""
+        img1 = tmp_path / "a.png"
+        img1.write_bytes(b"img1")
+        img2 = tmp_path / "b.jpg"
+        img2.write_bytes(b"img2")
+
+        mock_session, _ = self._build_mock(200, {"id": "last"})
+        with patch("aiohttp.ClientSession", return_value=mock_session):
+            result = asyncio.run(
+                _send_discord("tok", "666", "hi", media_files=[
+                    (str(img1), False), (str(img2), False)
+                ])
+            )
+
+        assert result["success"] is True
+        # 1 text POST + 2 media POSTs = 3
+        assert mock_session.post.call_count == 3
+
+
+class TestSendToPlatformDiscordMedia:
+    """_send_to_platform routes Discord media correctly."""
+
+    def test_media_files_passed_on_last_chunk_only(self):
+        """Discord media_files are only passed on the final chunk."""
+        call_log = []
+
+        async def mock_send_discord(token, chat_id, message, thread_id=None, media_files=None):
+            call_log.append({"message": message, "media_files": media_files or []})
+            return {"success": True, "platform": "discord", "chat_id": chat_id, "message_id": "1"}
+
+        # A message long enough to get chunked (Discord limit is 2000)
+        long_msg = "A" * 1900 + " " + "B" * 1900
+
+        with patch("tools.send_message_tool._send_discord", side_effect=mock_send_discord):
+            result = asyncio.run(
+                _send_to_platform(
+                    Platform.DISCORD,
+                    SimpleNamespace(enabled=True, token="tok", extra={}),
+                    "999",
+                    long_msg,
+                    media_files=[("/fake/img.png", False)],
+                )
+            )
+
+        assert result["success"] is True
+        assert len(call_log) == 2  # Message was chunked
+        assert call_log[0]["media_files"] == []  # First chunk: no media
+        assert call_log[1]["media_files"] == [("/fake/img.png", False)]  # Last chunk: media attached
+
+    def test_single_chunk_gets_media(self):
+        """Short message (single chunk) gets media_files directly."""
+        send_mock = AsyncMock(return_value={"success": True, "message_id": "1"})
+
+        with patch("tools.send_message_tool._send_discord", send_mock):
+            result = asyncio.run(
+                _send_to_platform(
+                    Platform.DISCORD,
+                    SimpleNamespace(enabled=True, token="tok", extra={}),
+                    "888",
+                    "short message",
+                    media_files=[("/fake/img.png", False)],
+                )
+            )
+
+        assert result["success"] is True
+        send_mock.assert_awaited_once()
+        call_kwargs = send_mock.await_args.kwargs
+        assert call_kwargs["media_files"] == [("/fake/img.png", False)]
+
+
 class TestSendMatrixUrlEncoding:
     """_send_matrix URL-encodes Matrix room IDs in the API path."""
 
diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py
index 48468e103..1c6417105 100644
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@@ -408,7 +408,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
     if media_files and not message.strip():
         return {
             "error": (
-                f"send_message MEDIA delivery is currently only supported for telegram and discord; "
+                f"send_message MEDIA delivery is currently only supported for telegram, discord, and weixin; "
                 f"target {platform.value} had only media attachments"
             )
         }
@@ -416,7 +416,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
     if media_files:
         warning = (
             f"MEDIA attachments were omitted for {platform.value}; "
-            "native send_message media delivery is currently only supported for telegram and discord"
+            "native send_message media delivery is currently only supported for telegram, discord, and weixin"
         )
 
     last_result = None
@@ -637,14 +637,14 @@ async def _send_discord(token, chat_id, message, thread_id=None, media_files=Non
                     filename = os.path.basename(media_path)
                     with open(media_path, "rb") as f:
                         form.add_field("files[0]", f, filename=filename)
-                    async with session.post(url, headers=auth_headers, data=form, **_req_kw) as resp:
-                        if resp.status not in (200, 201):
-                            body = await resp.text()
-                            warning = _sanitize_error_text(f"Failed to send media {media_path}: Discord API error ({resp.status}): {body}")
-                            logger.error(warning)
-                            warnings.append(warning)
-                            continue
-                        last_data = await resp.json()
+                        async with session.post(url, headers=auth_headers, data=form, **_req_kw) as resp:
+                            if resp.status not in (200, 201):
+                                body = await resp.text()
+                                warning = _sanitize_error_text(f"Failed to send media {media_path}: Discord API error ({resp.status}): {body}")
+                                logger.error(warning)
+                                warnings.append(warning)
+                                continue
+                            last_data = await resp.json()
                 except Exception as e:
                     warning = _sanitize_error_text(f"Failed to send media {media_path}: {e}")
                     logger.error(warning)

From 33ae403890718a341a780e4a19001b1fb5bcdf76 Mon Sep 17 00:00:00 2001
From: asheriif <ahmedsherif95@gmail.com>
Date: Wed, 15 Apr 2026 10:24:57 +0000
Subject: [PATCH 22/25] fix(gateway): fix matrix lingering typing indicator

---
 gateway/platforms/matrix.py  |  8 ++++++++
 tests/gateway/test_matrix.py | 24 +++++++++++++++++++++++-
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/gateway/platforms/matrix.py b/gateway/platforms/matrix.py
index 816d88b03..4aebd92b1 100644
--- a/gateway/platforms/matrix.py
+++ b/gateway/platforms/matrix.py
@@ -729,6 +729,14 @@ class MatrixAdapter(BasePlatformAdapter):
             except Exception:
                 pass
 
+    async def stop_typing(self, chat_id: str) -> None:
+        """Stop the Matrix typing indicator."""
+        if self._client:
+            try:
+                await self._client.set_typing(RoomID(chat_id), timeout=0)
+            except Exception:
+                pass
+
     async def edit_message(
         self, chat_id: str, message_id: str, content: str
     ) -> SendResult:
diff --git a/tests/gateway/test_matrix.py b/tests/gateway/test_matrix.py
index 5097ab633..90d820046 100644
--- a/tests/gateway/test_matrix.py
+++ b/tests/gateway/test_matrix.py
@@ -335,6 +335,29 @@ def _make_adapter():
     return adapter
 
 
+# ---------------------------------------------------------------------------
+# Typing indicator
+# ---------------------------------------------------------------------------
+
+class TestMatrixTypingIndicator:
+    def setup_method(self):
+        self.adapter = _make_adapter()
+        self.adapter._client = MagicMock()
+        self.adapter._client.set_typing = AsyncMock()
+
+    @pytest.mark.asyncio
+    async def test_stop_typing_clears_matrix_typing_state(self):
+        """stop_typing() should send typing=false instead of waiting for timeout expiry."""
+        from gateway.platforms.matrix import RoomID
+
+        await self.adapter.stop_typing("!room:example.org")
+
+        self.adapter._client.set_typing.assert_awaited_once_with(
+            RoomID("!room:example.org"),
+            timeout=0,
+        )
+
+
 # ---------------------------------------------------------------------------
 # mxc:// URL conversion
 # ---------------------------------------------------------------------------
@@ -1831,4 +1854,3 @@ class TestMatrixPresence:
         assert result is False
 
 
-

From 4da598b48ab5be2fee8d24c9d3f9cf9abb095b91 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 15 Apr 2026 04:39:34 -0700
Subject: [PATCH 23/25] =?UTF-8?q?docs:=20clarify=20hermes=20model=20vs=20/?=
 =?UTF-8?q?model=20=E2=80=94=20two=20commands,=20two=20purposes=20(#10276)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Users are confused about the difference between `hermes model` (terminal
command for full provider setup) and `/model` (session command for switching
between already-configured providers). This distinction was not documented
anywhere.

Changes across 4 doc pages:
- cli-commands.md: Added warning callout explaining the difference, added
  --global flag docs, added 'only see OpenRouter models?' info box
- slash-commands.md: Added notes on both TUI and messaging /model entries
  that /model only switches between configured providers
- providers.md: Added 'Two Commands for Model Management' comparison table
  near top of page, added warning callout in switching section
- faq.md: Added new FAQ entry '/model only shows one provider' with quick
  reference table

Prompted by user feedback in Discord — new users consistently hit this
confusion when trying to add providers from inside a session.
---
 website/docs/integrations/providers.md   | 21 +++++++++++++++++-
 website/docs/reference/cli-commands.md   | 27 ++++++++++++++++++++----
 website/docs/reference/faq.md            | 26 +++++++++++++++++++++++
 website/docs/reference/slash-commands.md |  4 ++--
 4 files changed, 71 insertions(+), 7 deletions(-)

diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index a44483a00..22deca638 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -49,6 +49,17 @@ The OpenAI Codex provider authenticates via device code (open a URL, enter a cod
 Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, web summarization, MoA) use a separate "auxiliary" model — by default Gemini Flash via OpenRouter. An `OPENROUTER_API_KEY` enables these tools automatically. You can also configure which model and provider these tools use — see [Auxiliary Models](/docs/user-guide/configuration#auxiliary-models).
 :::
 
+### Two Commands for Model Management
+
+Hermes has **two** model commands that serve different purposes:
+
+| Command | Where to run | What it does |
+|---------|-------------|--------------|
+| **`hermes model`** | Your terminal (outside any session) | Full setup wizard — add providers, run OAuth, enter API keys, configure endpoints |
+| **`/model`** | Inside a Hermes chat session | Quick switch between **already-configured** providers and models |
+
+If you're trying to switch to a provider you haven't set up yet (e.g. you only have OpenRouter configured and want to use Anthropic), you need `hermes model`, not `/model`. Exit your session first (`Ctrl+C` or `/quit`), run `hermes model`, complete the provider setup, then start a new session.
+
 ### Anthropic (Native)
 
 Use Claude models directly through the Anthropic API — no OpenRouter proxy needed. Supports three auth methods:
@@ -252,7 +263,15 @@ Both approaches persist to `config.yaml`, which is the source of truth for model
 
 ### Switching Models with `/model`
 
-Once a custom endpoint is configured, you can switch models mid-session:
+:::warning hermes model vs /model
+**`hermes model`** (run from your terminal, outside any chat session) is the **full provider setup wizard**. Use it to add new providers, run OAuth flows, enter API keys, and configure custom endpoints.
+
+**`/model`** (typed inside an active Hermes chat session) can only **switch between providers and models you've already set up**. It cannot add new providers, run OAuth, or prompt for API keys. If you've only configured one provider (e.g. OpenRouter), `/model` will only show models for that provider.
+
+**To add a new provider:** Exit your session (`Ctrl+C` or `/quit`), run `hermes model`, set up the new provider, then start a new session.
+:::
+
+Once you have at least one custom endpoint configured, you can switch models mid-session:
 
 ```
 /model custom:qwen-2.5          # Switch to a model on your custom endpoint
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index 2e054482f..fb93cf648 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -109,22 +109,31 @@ hermes chat --worktree -q "Review this repo and open a PR"
 
 ## `hermes model`
 
-Interactive provider + model selector.
+Interactive provider + model selector. **This is the command for adding new providers, setting up API keys, and running OAuth flows.** Run it from your terminal — not from inside an active Hermes chat session.
 
 ```bash
 hermes model
 ```
 
 Use this when you want to:
-- switch default providers
-- log into OAuth-backed providers during model selection
+- **add a new provider** (OpenRouter, Anthropic, Copilot, DeepSeek, custom, etc.)
+- log into OAuth-backed providers (Anthropic, Copilot, Codex, Nous Portal)
+- enter or update API keys
 - pick from provider-specific model lists
 - configure a custom/self-hosted endpoint
 - save the new default into config
 
+:::warning hermes model vs /model — know the difference
+**`hermes model`** (run from your terminal, outside any Hermes session) is the **full provider setup wizard**. It can add new providers, run OAuth flows, prompt for API keys, and configure endpoints.
+
+**`/model`** (typed inside an active Hermes chat session) can only **switch between providers and models you've already set up**. It cannot add new providers, run OAuth, or prompt for API keys.
+
+**If you need to add a new provider:** Exit your Hermes session first (`Ctrl+C` or `/quit`), then run `hermes model` from your terminal prompt.
+:::
+
 ### `/model` slash command (mid-session)
 
-Switch models without leaving a session:
+Switch between already-configured models without leaving a session:
 
 ```
 /model                              # Show current model and available options
@@ -136,6 +145,16 @@ Switch models without leaving a session:
 /model openrouter:anthropic/claude-sonnet-4  # Switch back to cloud
 ```
 
+By default, `/model` changes apply **to the current session only**. Add `--global` to persist the change to `config.yaml`:
+
+```
+/model claude-sonnet-4 --global     # Switch and save as new default
+```
+
+:::info What if I only see OpenRouter models?
+If you've only configured OpenRouter, `/model` will only show OpenRouter models. To add another provider (Anthropic, DeepSeek, Copilot, etc.), exit your session and run `hermes model` from the terminal.
+:::
+
 Provider and base URL changes are persisted to `config.yaml` automatically. When switching away from a custom endpoint, the stale base URL is cleared to prevent it leaking into other providers.
 
 ## `hermes gateway`
diff --git a/website/docs/reference/faq.md b/website/docs/reference/faq.md
index 6950fb1e9..c39f510b1 100644
--- a/website/docs/reference/faq.md
+++ b/website/docs/reference/faq.md
@@ -187,6 +187,32 @@ curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scri
 
 ### Provider & Model Issues
 
+#### `/model` only shows one provider / can't switch providers
+
+**Cause:** `/model` (inside a chat session) can only switch between providers you've **already configured**. If you've only set up OpenRouter, that's all `/model` will show.
+
+**Solution:** Exit your session and use `hermes model` from your terminal to add new providers:
+
+```bash
+# Exit the Hermes chat session first (Ctrl+C or /quit)
+
+# Run the full provider setup wizard
+hermes model
+
+# This lets you: add providers, run OAuth, enter API keys, configure endpoints
+```
+
+After adding a new provider via `hermes model`, start a new chat session — `/model` will now show all your configured providers.
+
+:::tip Quick reference
+| Want to... | Use |
+|-----------|-----|
+| Add a new provider | `hermes model` (from terminal) |
+| Enter/change API keys | `hermes model` (from terminal) |
+| Switch model mid-session | `/model <name>` (inside session) |
+| Switch to different configured provider | `/model provider:model` (inside session) |
+:::
+
 #### API key not working
 
 **Cause:** Key is missing, expired, incorrectly set, or for the wrong provider.
diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md
index 8e65d81f7..2ad3c62d8 100644
--- a/website/docs/reference/slash-commands.md
+++ b/website/docs/reference/slash-commands.md
@@ -46,7 +46,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 | Command | Description |
 |---------|-------------|
 | `/config` | Show current configuration |
-| `/model [model-name]` | Show or change the current model. Supports: `/model claude-sonnet-4`, `/model provider:model` (switch providers), `/model custom:model` (custom endpoint), `/model custom:name:model` (named custom provider), `/model custom` (auto-detect from endpoint). Use `--global` to persist the change to config.yaml. |
+| `/model [model-name]` | Show or change the current model. Supports: `/model claude-sonnet-4`, `/model provider:model` (switch providers), `/model custom:model` (custom endpoint), `/model custom:name:model` (named custom provider), `/model custom` (auto-detect from endpoint). Use `--global` to persist the change to config.yaml. **Note:** `/model` can only switch between already-configured providers. To add a new provider, exit the session and run `hermes model` from your terminal. |
 | `/provider` | Show available providers and current provider |
 | `/personality` | Set a predefined personality |
 | `/verbose` | Cycle tool progress display: off → new → all → verbose. Can be [enabled for messaging](#notes) via config. |
@@ -124,7 +124,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
 | `/reset` | Reset conversation history. |
 | `/status` | Show session info. |
 | `/stop` | Kill all running background processes and interrupt the running agent. |
-| `/model [provider:model]` | Show or change the model. Supports provider switches (`/model zai:glm-5`), custom endpoints (`/model custom:model`), named custom providers (`/model custom:local:qwen`), and auto-detect (`/model custom`). Use `--global` to persist the change to config.yaml. |
+| `/model [provider:model]` | Show or change the model. Supports provider switches (`/model zai:glm-5`), custom endpoints (`/model custom:model`), named custom providers (`/model custom:local:qwen`), and auto-detect (`/model custom`). Use `--global` to persist the change to config.yaml. **Note:** `/model` can only switch between already-configured providers. To add a new provider or set up API keys, use `hermes model` from your terminal (outside the chat session). |
 | `/provider` | Show provider availability and auth status. |
 | `/personality [name]` | Set a personality overlay for the session. |
 | `/fast [normal\|fast\|status]` | Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode. |

From 41e2d61b3fccc7bd9c3c060d66d80ee21c2dcb8c Mon Sep 17 00:00:00 2001
From: sprmn24 <oncuevtv@gmail.com>
Date: Wed, 15 Apr 2026 14:34:32 +0300
Subject: [PATCH 24/25] feat(discord): add native send_animation for inline GIF
 playback

---
 gateway/platforms/discord.py | 62 ++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index a80790ed5..2d2ea93f9 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -1379,6 +1379,68 @@ class DiscordAdapter(BasePlatformAdapter):
             )
             return await super().send_image(chat_id, image_url, caption, reply_to)
 
+    async def send_animation(
+        self,
+        chat_id: str,
+        animation_url: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send an animated GIF natively as a Discord file attachment."""
+        if not self._client:
+            return SendResult(success=False, error="Not connected")
+
+        if not is_safe_url(animation_url):
+            logger.warning("[%s] Blocked unsafe animation URL during Discord send_animation", self.name)
+            return await super().send_animation(chat_id, animation_url, caption, reply_to, metadata=metadata)
+
+        try:
+            import aiohttp
+
+            channel = self._client.get_channel(int(chat_id))
+            if not channel:
+                channel = await self._client.fetch_channel(int(chat_id))
+            if not channel:
+                return SendResult(success=False, error=f"Channel {chat_id} not found")
+
+            # Download the GIF and send as a Discord file attachment
+            # (Discord renders .gif attachments as auto-playing animations inline)
+            from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
+            _proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
+            _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy)
+            async with aiohttp.ClientSession(**_sess_kw) as session:
+                async with session.get(animation_url, timeout=aiohttp.ClientTimeout(total=30), **_req_kw) as resp:
+                    if resp.status != 200:
+                        raise Exception(f"Failed to download animation: HTTP {resp.status}")
+
+                    animation_data = await resp.read()
+
+                    import io
+                    file = discord.File(io.BytesIO(animation_data), filename="animation.gif")
+
+                    msg = await channel.send(
+                        content=caption if caption else None,
+                        file=file,
+                    )
+                    return SendResult(success=True, message_id=str(msg.id))
+
+        except ImportError:
+            logger.warning(
+                "[%s] aiohttp not installed, falling back to URL. Run: pip install aiohttp",
+                self.name,
+                exc_info=True,
+            )
+            return await super().send_animation(chat_id, animation_url, caption, reply_to, metadata=metadata)
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[%s] Failed to send animation attachment, falling back to URL: %s",
+                self.name,
+                e,
+                exc_info=True,
+            )
+            return await super().send_animation(chat_id, animation_url, caption, reply_to, metadata=metadata)
+
     async def send_video(
         self,
         chat_id: str,

From 722331a57de9e18f134c896d733870b4a493dc84 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 15 Apr 2026 04:57:55 -0700
Subject: [PATCH 25/25] fix: replace hardcoded ~/.hermes with
 display_hermes_home() in agent-facing text (#10285)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tool schema descriptions and tool return values contained hardcoded
~/.hermes paths that the model sees and uses. When HERMES_HOME is set
to a custom path (Docker containers, profiles), the agent would still
reference ~/.hermes — looking at the wrong directory.

Fixes 6 locations across 5 files:
- tools/tts_tool.py: output_path schema description
- tools/cronjob_tools.py: script path schema description
- tools/skill_manager_tool.py: skill_manage schema description
- tools/skills_tool.py: two tool return messages
- agent/skill_commands.py: skill config injection text

All now use display_hermes_home() which resolves to the actual
HERMES_HOME path (e.g. /opt/data for Docker, ~/.hermes/profiles/X
for profiles, ~/.hermes for default).

Reported by: Sandeep Narahari (PrithviDevs)
---
 agent/skill_commands.py     | 4 +++-
 tools/cronjob_tools.py      | 4 +++-
 tools/skill_manager_tool.py | 4 ++--
 tools/skills_tool.py        | 6 +++---
 tools/tts_tool.py           | 4 +++-
 5 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/agent/skill_commands.py b/agent/skill_commands.py
index 1f000eefe..149b4aaeb 100644
--- a/agent/skill_commands.py
+++ b/agent/skill_commands.py
@@ -12,6 +12,8 @@ from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, Optional
 
+from hermes_constants import display_hermes_home
+
 logger = logging.getLogger(__name__)
 
 _skill_commands: Dict[str, Dict[str, Any]] = {}
@@ -108,7 +110,7 @@ def _inject_skill_config(loaded_skill: dict[str, Any], parts: list[str]) -> None
         if not resolved:
             return
 
-        lines = ["", "[Skill config (from ~/.hermes/config.yaml):"]
+        lines = ["", f"[Skill config (from {display_hermes_home()}/config.yaml):"]
         for key, value in resolved.items():
             display_val = str(value) if value else "(not set)"
             lines.append(f"  {key} = {display_val}")
diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py
index 25a153041..8a685a8cc 100644
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@@ -13,6 +13,8 @@ import sys
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 
+from hermes_constants import display_hermes_home
+
 logger = logging.getLogger(__name__)
 
 # Import from cron module (will be available when properly installed)
@@ -455,7 +457,7 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
             },
             "script": {
                 "type": "string",
-                "description": "Optional path to a Python script that runs before each cron job execution. Its stdout is injected into the prompt as context. Use for data collection and change detection. Relative paths resolve under ~/.hermes/scripts/. On update, pass empty string to clear."
+                "description": f"Optional path to a Python script that runs before each cron job execution. Its stdout is injected into the prompt as context. Use for data collection and change detection. Relative paths resolve under {display_hermes_home()}/scripts/. On update, pass empty string to clear."
             },
         },
         "required": ["action"]
diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py
index 6c7307259..a3e585a58 100644
--- a/tools/skill_manager_tool.py
+++ b/tools/skill_manager_tool.py
@@ -39,7 +39,7 @@ import re
 import shutil
 import tempfile
 from pathlib import Path
-from hermes_constants import get_hermes_home
+from hermes_constants import get_hermes_home, display_hermes_home
 from typing import Dict, Any, Optional, Tuple
 
 logger = logging.getLogger(__name__)
@@ -655,7 +655,7 @@ SKILL_MANAGE_SCHEMA = {
     "description": (
         "Manage skills (create, update, delete). Skills are your procedural "
         "memory — reusable approaches for recurring task types. "
-        "New skills go to ~/.hermes/skills/; existing skills can be modified wherever they live.\n\n"
+        f"New skills go to {display_hermes_home()}/skills/; existing skills can be modified wherever they live.\n\n"
         "Actions: create (full SKILL.md + optional category), "
         "patch (old_string/new_string — preferred for fixes), "
         "edit (full SKILL.md rewrite — major overhauls only), "
diff --git a/tools/skills_tool.py b/tools/skills_tool.py
index f6328ab0b..340e4ed53 100644
--- a/tools/skills_tool.py
+++ b/tools/skills_tool.py
@@ -69,7 +69,7 @@ Usage:
 import json
 import logging
 
-from hermes_constants import get_hermes_home
+from hermes_constants import get_hermes_home, display_hermes_home
 import os
 import re
 from enum import Enum
@@ -408,7 +408,7 @@ def _gateway_setup_hint() -> str:
 
         return GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE
     except Exception:
-        return "Secure secret entry is not available. Load this skill in the local CLI to be prompted, or add the key to ~/.hermes/.env manually."
+        return f"Secure secret entry is not available. Load this skill in the local CLI to be prompted, or add the key to {display_hermes_home()}/.env manually."
 
 
 def _build_setup_note(
@@ -666,7 +666,7 @@ def skills_list(category: str = None, task_id: str = None) -> str:
                     "success": True,
                     "skills": [],
                     "categories": [],
-                    "message": "No skills found. Skills directory created at ~/.hermes/skills/",
+                    "message": f"No skills found. Skills directory created at {display_hermes_home()}/skills/",
                 },
                 ensure_ascii=False,
             )
diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index 769ae30a9..9fdb63866 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -40,6 +40,8 @@ from pathlib import Path
 from typing import Callable, Dict, Any, Optional
 from urllib.parse import urljoin
 
+from hermes_constants import display_hermes_home
+
 logger = logging.getLogger(__name__)
 from tools.managed_tool_gateway import resolve_managed_tool_gateway
 from tools.tool_backend_helpers import managed_nous_tools_enabled, resolve_openai_audio_api_key
@@ -1050,7 +1052,7 @@ TTS_SCHEMA = {
             },
             "output_path": {
                 "type": "string",
-                "description": "Optional custom file path to save the audio. Defaults to ~/.hermes/audio_cache/<timestamp>.mp3"
+                "description": f"Optional custom file path to save the audio. Defaults to {display_hermes_home()}/audio_cache/<timestamp>.mp3"
             }
         },
         "required": ["text"]