Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor

2026-04-15 10:21:00 -05:00
parent 33c615504d 722331a57d
commit 371166fe26
35 changed files with 2388 additions and 122 deletions
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -17,7 +17,10 @@ Improvements over v2:
  - Richer tool call/result detail in summarizer input
 """

+import hashlib
+import json
 import logging
+import re
 import time
 from typing import Any, Dict, List, Optional

@@ -57,6 +60,128 @@ _CHARS_PER_TOKEN = 4
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600


+def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
+    """Create an informative 1-line summary of a tool call + result.
+
+    Used during the pre-compression pruning pass to replace large tool
+    outputs with a short but useful description of what the tool did,
+    rather than a generic placeholder that carries zero information.
+
+    Returns strings like::
+
+        [terminal] ran `npm test` -> exit 0, 47 lines output
+        [read_file] read config.py from line 1 (1,200 chars)
+        [search_files] content search for 'compress' in agent/ -> 12 matches
+    """
+    try:
+        args = json.loads(tool_args) if tool_args else {}
+    except (json.JSONDecodeError, TypeError):
+        args = {}
+
+    content = tool_content or ""
+    content_len = len(content)
+    line_count = content.count("\n") + 1 if content.strip() else 0
+
+    if tool_name == "terminal":
+        cmd = args.get("command", "")
+        if len(cmd) > 80:
+            cmd = cmd[:77] + "..."
+        exit_match = re.search(r'"exit_code"\s*:\s*(-?\d+)', content)
+        exit_code = exit_match.group(1) if exit_match else "?"
+        return f"[terminal] ran `{cmd}` -> exit {exit_code}, {line_count} lines output"
+
+    if tool_name == "read_file":
+        path = args.get("path", "?")
+        offset = args.get("offset", 1)
+        return f"[read_file] read {path} from line {offset} ({content_len:,} chars)"
+
+    if tool_name == "write_file":
+        path = args.get("path", "?")
+        written_lines = args.get("content", "").count("\n") + 1 if args.get("content") else "?"
+        return f"[write_file] wrote to {path} ({written_lines} lines)"
+
+    if tool_name == "search_files":
+        pattern = args.get("pattern", "?")
+        path = args.get("path", ".")
+        target = args.get("target", "content")
+        match_count = re.search(r'"total_count"\s*:\s*(\d+)', content)
+        count = match_count.group(1) if match_count else "?"
+        return f"[search_files] {target} search for '{pattern}' in {path} -> {count} matches"
+
+    if tool_name == "patch":
+        path = args.get("path", "?")
+        mode = args.get("mode", "replace")
+        return f"[patch] {mode} in {path} ({content_len:,} chars result)"
+
+    if tool_name in ("browser_navigate", "browser_click", "browser_snapshot",
+                     "browser_type", "browser_scroll", "browser_vision"):
+        url = args.get("url", "")
+        ref = args.get("ref", "")
+        detail = f" {url}" if url else (f" ref={ref}" if ref else "")
+        return f"[{tool_name}]{detail} ({content_len:,} chars)"
+
+    if tool_name == "web_search":
+        query = args.get("query", "?")
+        return f"[web_search] query='{query}' ({content_len:,} chars result)"
+
+    if tool_name == "web_extract":
+        urls = args.get("urls", [])
+        url_desc = urls[0] if isinstance(urls, list) and urls else "?"
+        if isinstance(urls, list) and len(urls) > 1:
+            url_desc += f" (+{len(urls) - 1} more)"
+        return f"[web_extract] {url_desc} ({content_len:,} chars)"
+
+    if tool_name == "delegate_task":
+        goal = args.get("goal", "")
+        if len(goal) > 60:
+            goal = goal[:57] + "..."
+        return f"[delegate_task] '{goal}' ({content_len:,} chars result)"
+
+    if tool_name == "execute_code":
+        code_preview = (args.get("code") or "")[:60].replace("\n", " ")
+        if len(args.get("code", "")) > 60:
+            code_preview += "..."
+        return f"[execute_code] `{code_preview}` ({line_count} lines output)"
+
+    if tool_name in ("skill_view", "skills_list", "skill_manage"):
+        name = args.get("name", "?")
+        return f"[{tool_name}] name={name} ({content_len:,} chars)"
+
+    if tool_name == "vision_analyze":
+        question = args.get("question", "")[:50]
+        return f"[vision_analyze] '{question}' ({content_len:,} chars)"
+
+    if tool_name == "memory":
+        action = args.get("action", "?")
+        target = args.get("target", "?")
+        return f"[memory] {action} on {target}"
+
+    if tool_name == "todo":
+        return "[todo] updated task list"
+
+    if tool_name == "clarify":
+        return "[clarify] asked user a question"
+
+    if tool_name == "text_to_speech":
+        return f"[text_to_speech] generated audio ({content_len:,} chars)"
+
+    if tool_name == "cronjob":
+        action = args.get("action", "?")
+        return f"[cronjob] {action}"
+
+    if tool_name == "process":
+        action = args.get("action", "?")
+        sid = args.get("session_id", "?")
+        return f"[process] {action} session={sid}"
+
+    # Generic fallback
+    first_arg = ""
+    for k, v in list(args.items())[:2]:
+        sv = str(v)[:40]
+        first_arg += f" {k}={sv}"
+    return f"[{tool_name}]{first_arg} ({content_len:,} chars result)"
+
+
 class ContextCompressor(ContextEngine):
    """Default context engine — compresses conversation context via lossy summarization.

@@ -78,6 +203,8 @@ class ContextCompressor(ContextEngine):
        self._context_probed = False
        self._context_probe_persistable = False
        self._previous_summary = None
+        self._last_compression_savings_pct = 100.0
+        self._ineffective_compression_count = 0

    def update_model(
        self,
@@ -167,6 +294,9 @@ class ContextCompressor(ContextEngine):

        # Stores the previous compaction summary for iterative updates
        self._previous_summary: Optional[str] = None
+        # Anti-thrashing: track whether last compression was effective
+        self._last_compression_savings_pct: float = 100.0
+        self._ineffective_compression_count: int = 0
        self._summary_failure_cooldown_until: float = 0.0

    def update_from_response(self, usage: Dict[str, Any]):
@@ -175,9 +305,26 @@ class ContextCompressor(ContextEngine):
        self.last_completion_tokens = usage.get("completion_tokens", 0)

    def should_compress(self, prompt_tokens: int = None) -> bool:
-        """Check if context exceeds the compression threshold."""
+        """Check if context exceeds the compression threshold.
+
+        Includes anti-thrashing protection: if the last two compressions
+        each saved less than 10%, skip compression to avoid infinite loops
+        where each pass removes only 1-2 messages.
+        """
        tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
-        return tokens >= self.threshold_tokens
+        if tokens < self.threshold_tokens:
+            return False
+        # Anti-thrashing: back off if recent compressions were ineffective
+        if self._ineffective_compression_count >= 2:
+            if not self.quiet_mode:
+                logger.warning(
+                    "Compression skipped — last %d compressions saved <10%% each. "
+                    "Consider /new to start a fresh session, or /compress <topic> "
+                    "for focused compression.",
+                    self._ineffective_compression_count,
+                )
+            return False
+        return True

    # ------------------------------------------------------------------
    # Tool output pruning (cheap pre-pass, no LLM call)
@@ -187,7 +334,16 @@ class ContextCompressor(ContextEngine):
        self, messages: List[Dict[str, Any]], protect_tail_count: int,
        protect_tail_tokens: int | None = None,
    ) -> tuple[List[Dict[str, Any]], int]:
-        """Replace old tool result contents with a short placeholder.
+        """Replace old tool result contents with informative 1-line summaries.
+
+        Instead of a generic placeholder, generates a summary like::
+
+            [terminal] ran `npm test` -> exit 0, 47 lines output
+            [read_file] read config.py from line 1 (3,400 chars)
+
+        Also deduplicates identical tool results (e.g. reading the same file
+        5x keeps only the newest full copy) and truncates large tool_call
+        arguments in assistant messages outside the protected tail.

        Walks backward from the end, protecting the most recent messages that
        fall within ``protect_tail_tokens`` (when provided) OR the last
@@ -203,6 +359,22 @@ class ContextCompressor(ContextEngine):
        result = [m.copy() for m in messages]
        pruned = 0

+        # Build index: tool_call_id -> (tool_name, arguments_json)
+        call_id_to_tool: Dict[str, tuple] = {}
+        for msg in result:
+            if msg.get("role") == "assistant":
+                for tc in msg.get("tool_calls") or []:
+                    if isinstance(tc, dict):
+                        cid = tc.get("id", "")
+                        fn = tc.get("function", {})
+                        call_id_to_tool[cid] = (fn.get("name", "unknown"), fn.get("arguments", ""))
+                    else:
+                        cid = getattr(tc, "id", "") or ""
+                        fn = getattr(tc, "function", None)
+                        name = getattr(fn, "name", "unknown") if fn else "unknown"
+                        args_str = getattr(fn, "arguments", "") if fn else ""
+                        call_id_to_tool[cid] = (name, args_str)
+
        # Determine the prune boundary
        if protect_tail_tokens is not None and protect_tail_tokens > 0:
            # Token-budget approach: walk backward accumulating tokens
@@ -211,7 +383,8 @@ class ContextCompressor(ContextEngine):
            min_protect = min(protect_tail_count, len(result) - 1)
            for i in range(len(result) - 1, -1, -1):
                msg = result[i]
-                content_len = len(msg.get("content") or "")
+                raw_content = msg.get("content") or ""
+                content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
                msg_tokens = content_len // _CHARS_PER_TOKEN + 10
                for tc in msg.get("tool_calls") or []:
                    if isinstance(tc, dict):
@@ -226,18 +399,69 @@ class ContextCompressor(ContextEngine):
        else:
            prune_boundary = len(result) - protect_tail_count

+        # Pass 1: Deduplicate identical tool results.
+        # When the same file is read multiple times, keep only the most recent
+        # full copy and replace older duplicates with a back-reference.
+        content_hashes: dict = {}  # hash -> (index, tool_call_id)
+        for i in range(len(result) - 1, -1, -1):
+            msg = result[i]
+            if msg.get("role") != "tool":
+                continue
+            content = msg.get("content") or ""
+            # Skip multimodal content (list of content blocks)
+            if isinstance(content, list):
+                continue
+            if len(content) < 200:
+                continue
+            h = hashlib.md5(content.encode("utf-8", errors="replace")).hexdigest()[:12]
+            if h in content_hashes:
+                # This is an older duplicate — replace with back-reference
+                result[i] = {**msg, "content": "[Duplicate tool output — same content as a more recent call]"}
+                pruned += 1
+            else:
+                content_hashes[h] = (i, msg.get("tool_call_id", "?"))
+
+        # Pass 2: Replace old tool results with informative summaries
        for i in range(prune_boundary):
            msg = result[i]
            if msg.get("role") != "tool":
                continue
            content = msg.get("content", "")
+            # Skip multimodal content (list of content blocks)
+            if isinstance(content, list):
+                continue
            if not content or content == _PRUNED_TOOL_PLACEHOLDER:
                continue
+            # Skip already-deduplicated or previously-summarized results
+            if content.startswith("[Duplicate tool output"):
+                continue
            # Only prune if the content is substantial (>200 chars)
            if len(content) > 200:
-                result[i] = {**msg, "content": _PRUNED_TOOL_PLACEHOLDER}
+                call_id = msg.get("tool_call_id", "")
+                tool_name, tool_args = call_id_to_tool.get(call_id, ("unknown", ""))
+                summary = _summarize_tool_result(tool_name, tool_args, content)
+                result[i] = {**msg, "content": summary}
                pruned += 1

+        # Pass 3: Truncate large tool_call arguments in assistant messages
+        # outside the protected tail. write_file with 50KB content, for
+        # example, survives pruning entirely without this.
+        for i in range(prune_boundary):
+            msg = result[i]
+            if msg.get("role") != "assistant" or not msg.get("tool_calls"):
+                continue
+            new_tcs = []
+            modified = False
+            for tc in msg["tool_calls"]:
+                if isinstance(tc, dict):
+                    args = tc.get("function", {}).get("arguments", "")
+                    if len(args) > 500:
+                        tc = {**tc, "function": {**tc["function"], "arguments": args[:200] + "...[truncated]"}}
+                        modified = True
+                new_tcs.append(tc)
+            if modified:
+                result[i] = {**msg, "tool_calls": new_tcs}
+
        return result, pruned

    # ------------------------------------------------------------------
@@ -357,29 +581,37 @@ class ContextCompressor(ContextEngine):
        )

        # Shared structured template (used by both paths).
-        # Key changes vs v1:
-        #   - "Pending User Asks" section (from Claude Code) explicitly tracks
-        #     unanswered questions so the model knows what's resolved vs open
-        #   - "Remaining Work" replaces "Next Steps" to avoid reading as active
-        #     instructions
-        #   - "Resolved Questions" makes it clear which questions were already
-        #     answered (prevents model from re-answering them)
        _template_sections = f"""## Goal
 [What the user is trying to accomplish]

 ## Constraints & Preferences
 [User preferences, coding style, constraints, important decisions]

-## Progress
-### Done
-[Completed work — include specific file paths, commands run, results obtained]
-### In Progress
-[Work currently underway]
-### Blocked
-[Any blockers or issues encountered]
+## Completed Actions
+[Numbered list of concrete actions taken — include tool used, target, and outcome.
+Format each as: N. ACTION target — outcome [tool: name]
+Example:
+1. READ config.py:45 — found `==` should be `!=` [tool: read_file]
+2. PATCH config.py:45 — changed `==` to `!=` [tool: patch]
+3. TEST `pytest tests/` — 3/50 failed: test_parse, test_validate, test_edge [tool: terminal]
+Be specific with file paths, commands, line numbers, and results.]
+
+## Active State
+[Current working state — include:
+- Working directory and branch (if applicable)
+- Modified/created files with brief note on each
+- Test status (X/Y passing)
+- Any running processes or servers
+- Environment details that matter]
+
+## In Progress
+[Work currently underway — what was being done when compaction fired]
+
+## Blocked
+[Any blockers, errors, or issues not yet resolved. Include exact error messages.]

 ## Key Decisions
-[Important technical decisions and why they were made]
+[Important technical decisions and WHY they were made]

 ## Resolved Questions
 [Questions the user asked that were ALREADY answered — include the answer so the next assistant does not re-answer them]
@@ -396,10 +628,7 @@ class ContextCompressor(ContextEngine):
 ## Critical Context
 [Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]

-## Tools & Patterns
-[Which tools were used, how they were used effectively, and any tool-specific discoveries]
-
-Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions.
+Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.

 Write only the summary body. Do not include any preamble or prefix."""

@@ -415,7 +644,7 @@ PREVIOUS SUMMARY:
 NEW TURNS TO INCORPORATE:
 {content_to_summarize}

-Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new progress. Move items from "In Progress" to "Done" when completed. Move answered questions to "Resolved Questions". Remove information only if it is clearly obsolete.
+Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete.

 {_template_sections}"""
        else:
@@ -450,7 +679,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                    "api_mode": self.api_mode,
                },
                "messages": [{"role": "user", "content": prompt}],
-                "max_tokens": summary_budget * 2,
+                "max_tokens": int(summary_budget * 1.3),
                # timeout resolved from auxiliary.compression.timeout config by call_llm
            }
            if self.summary_model:
@@ -464,8 +693,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            # Store for iterative updates on next compaction
            self._previous_summary = summary
            self._summary_failure_cooldown_until = 0.0
+            self._summary_model_fallen_back = False
            return self._with_summary_prefix(summary)
        except RuntimeError:
+            # No provider configured — long cooldown, unlikely to self-resolve
            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
            logging.warning("Context compression: no provider available for "
                            "summary. Middle turns will be dropped without summary "
@@ -473,12 +704,42 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                            _SUMMARY_FAILURE_COOLDOWN_SECONDS)
            return None
        except Exception as e:
-            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
+            # If the summary model is different from the main model and the
+            # error looks permanent (model not found, 503, 404), fall back to
+            # using the main model instead of entering cooldown that leaves
+            # context growing unbounded.  (#8620 sub-issue 4)
+            _status = getattr(e, "status_code", None) or getattr(getattr(e, "response", None), "status_code", None)
+            _err_str = str(e).lower()
+            _is_model_not_found = (
+                _status in (404, 503)
+                or "model_not_found" in _err_str
+                or "does not exist" in _err_str
+                or "no available channel" in _err_str
+            )
+            if (
+                _is_model_not_found
+                and self.summary_model
+                and self.summary_model != self.model
+                and not getattr(self, "_summary_model_fallen_back", False)
+            ):
+                self._summary_model_fallen_back = True
+                logging.warning(
+                    "Summary model '%s' not available (%s). "
+                    "Falling back to main model '%s' for compression.",
+                    self.summary_model, e, self.model,
+                )
+                self.summary_model = ""  # empty = use main model
+                self._summary_failure_cooldown_until = 0.0  # no cooldown
+                return self._generate_summary(messages, summary_budget)  # retry immediately
+
+            # Transient errors (timeout, rate limit, network) — shorter cooldown
+            _transient_cooldown = 60
+            self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
            logging.warning(
                "Failed to generate context summary: %s. "
                "Further summary attempts paused for %d seconds.",
                e,
-                _SUMMARY_FAILURE_COOLDOWN_SECONDS,
+                _transient_cooldown,
            )
            return None

@@ -744,11 +1005,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        compressed = []
        for i in range(compress_start):
            msg = messages[i].copy()
-            if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
-                msg["content"] = (
-                    (msg.get("content") or "")
-                    + "\n\n[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
-                )
+            if i == 0 and msg.get("role") == "system":
+                existing = msg.get("content") or ""
+                _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
+                if _compression_note not in existing:
+                    msg["content"] = existing + "\n\n" + _compression_note
            compressed.append(msg)

        # If LLM summary failed, insert a static fallback so the model
@@ -806,14 +1067,24 @@ The user has requested that this compaction PRIORITISE preserving all informatio

        compressed = self._sanitize_tool_pairs(compressed)

+        new_estimate = estimate_messages_tokens_rough(compressed)
+        saved_estimate = display_tokens - new_estimate
+
+        # Anti-thrashing: track compression effectiveness
+        savings_pct = (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0
+        self._last_compression_savings_pct = savings_pct
+        if savings_pct < 10:
+            self._ineffective_compression_count += 1
+        else:
+            self._ineffective_compression_count = 0
+
        if not self.quiet_mode:
-            new_estimate = estimate_messages_tokens_rough(compressed)
-            saved_estimate = display_tokens - new_estimate
            logger.info(
-                "Compressed: %d -> %d messages (~%d tokens saved)",
+                "Compressed: %d -> %d messages (~%d tokens saved, %.0f%%)",
                n_messages,
                len(compressed),
                saved_estimate,
+                savings_pct,
            )
            logger.info("Compression #%d complete", self.compression_count)

--- a/agent/skill_commands.py
+++ b/agent/skill_commands.py
@@ -12,6 +12,8 @@ from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, Optional

+from hermes_constants import display_hermes_home
+
 logger = logging.getLogger(__name__)

 _skill_commands: Dict[str, Dict[str, Any]] = {}
@@ -108,7 +110,7 @@ def _inject_skill_config(loaded_skill: dict[str, Any], parts: list[str]) -> None
        if not resolved:
            return

-        lines = ["", "[Skill config (from ~/.hermes/config.yaml):"]
+        lines = ["", f"[Skill config (from {display_hermes_home()}/config.yaml):"]
        for key, value in resolved.items():
            display_val = str(value) if value else "(not set)"
            lines.append(f"  {key} = {display_val}")
--- a/cli.py
+++ b/cli.py
@@ -4692,16 +4692,19 @@ class HermesCLI:
                self._close_model_picker()
                return
            provider_data = providers[selected]
-            model_list = []
-            try:
-                from hermes_cli.models import provider_model_ids
-                live = provider_model_ids(provider_data["slug"])
-                if live:
-                    model_list = live
-            except Exception:
-                pass
+            # Use the curated model list from list_authenticated_providers()
+            # (same lists as `hermes model` and gateway pickers).
+            # Only fall back to the live provider catalog when the curated
+            # list is empty (e.g. user-defined endpoints with no curated list).
+            model_list = provider_data.get("models", [])
            if not model_list:
-                model_list = provider_data.get("models", [])
+                try:
+                    from hermes_cli.models import provider_model_ids
+                    live = provider_model_ids(provider_data["slug"])
+                    if live:
+                        model_list = live
+                except Exception:
+                    pass
            state["stage"] = "model"
            state["provider_data"] = provider_data
            state["model_list"] = model_list
@@ -6061,7 +6064,7 @@ class HermesCLI:
        parts = cmd.strip().split(None, 1)
        sub = parts[1].lower().strip() if len(parts) > 1 else "status"

-        _DEFAULT_CDP = "http://localhost:9222"
+        _DEFAULT_CDP = "http://127.0.0.1:9222"
        current = os.environ.get("BROWSER_CDP_URL", "").strip()

        if sub.startswith("connect"):
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -288,11 +288,13 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option

    if wrap_response:
        task_name = job.get("name", job["id"])
+        job_id = job.get("id", "")
        delivery_content = (
            f"Cronjob Response: {task_name}\n"
+            f"(job_id: {job_id})\n"
            f"-------------\n\n"
            f"{content}\n\n"
-            f"Note: The agent cannot see this message, and therefore cannot respond to it."
+            f"To stop or manage this job, send me a new message (e.g. \"stop reminder {task_name}\")."
        )
    else:
        delivery_content = content
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -1635,6 +1635,21 @@ class BasePlatformAdapter(ABC):
            # streaming already delivered the text (already_sent=True) or
            # when the message was queued behind an active agent.  Log at
            # DEBUG to avoid noisy warnings for expected behavior.
+            #
+            # Suppress stale response when the session was interrupted by a
+            # new message that hasn't been consumed yet.  The pending message
+            # is processed by the pending-message handler below (#8221/#2483).
+            if (
+                response
+                and interrupt_event.is_set()
+                and session_key in self._pending_messages
+            ):
+                logger.info(
+                    "[%s] Suppressing stale response for interrupted session %s",
+                    self.name,
+                    session_key,
+                )
+                response = None
            if not response:
                logger.debug("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
            if response:
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -1379,6 +1379,68 @@ class DiscordAdapter(BasePlatformAdapter):
            )
            return await super().send_image(chat_id, image_url, caption, reply_to)

+    async def send_animation(
+        self,
+        chat_id: str,
+        animation_url: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send an animated GIF natively as a Discord file attachment."""
+        if not self._client:
+            return SendResult(success=False, error="Not connected")
+
+        if not is_safe_url(animation_url):
+            logger.warning("[%s] Blocked unsafe animation URL during Discord send_animation", self.name)
+            return await super().send_animation(chat_id, animation_url, caption, reply_to, metadata=metadata)
+
+        try:
+            import aiohttp
+
+            channel = self._client.get_channel(int(chat_id))
+            if not channel:
+                channel = await self._client.fetch_channel(int(chat_id))
+            if not channel:
+                return SendResult(success=False, error=f"Channel {chat_id} not found")
+
+            # Download the GIF and send as a Discord file attachment
+            # (Discord renders .gif attachments as auto-playing animations inline)
+            from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
+            _proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
+            _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy)
+            async with aiohttp.ClientSession(**_sess_kw) as session:
+                async with session.get(animation_url, timeout=aiohttp.ClientTimeout(total=30), **_req_kw) as resp:
+                    if resp.status != 200:
+                        raise Exception(f"Failed to download animation: HTTP {resp.status}")
+
+                    animation_data = await resp.read()
+
+                    import io
+                    file = discord.File(io.BytesIO(animation_data), filename="animation.gif")
+
+                    msg = await channel.send(
+                        content=caption if caption else None,
+                        file=file,
+                    )
+                    return SendResult(success=True, message_id=str(msg.id))
+
+        except ImportError:
+            logger.warning(
+                "[%s] aiohttp not installed, falling back to URL. Run: pip install aiohttp",
+                self.name,
+                exc_info=True,
+            )
+            return await super().send_animation(chat_id, animation_url, caption, reply_to, metadata=metadata)
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[%s] Failed to send animation attachment, falling back to URL: %s",
+                self.name,
+                e,
+                exc_info=True,
+            )
+            return await super().send_animation(chat_id, animation_url, caption, reply_to, metadata=metadata)
+
    async def send_video(
        self,
        chat_id: str,
--- a/gateway/platforms/matrix.py
+++ b/gateway/platforms/matrix.py
@@ -729,6 +729,14 @@ class MatrixAdapter(BasePlatformAdapter):
            except Exception:
                pass

+    async def stop_typing(self, chat_id: str) -> None:
+        """Stop the Matrix typing indicator."""
+        if self._client:
+            try:
+                await self._client.set_typing(RoomID(chat_id), timeout=0)
+            except Exception:
+                pass
+
    async def edit_message(
        self, chat_id: str, message_id: str, content: str
    ) -> SendResult:
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -573,6 +573,7 @@ class GatewayRunner:
        self._running_agents: Dict[str, Any] = {}
        self._running_agents_ts: Dict[str, float] = {}  # start timestamp per session
        self._pending_messages: Dict[str, str] = {}  # Queued messages during interrupt
+        self._busy_ack_ts: Dict[str, float] = {}  # last busy-ack timestamp per session (debounce)

        # Cache AIAgent instances per session to preserve prompt caching.
        # Without this, a new AIAgent is created per message, rebuilding the
@@ -1329,26 +1330,100 @@ class GatewayRunner:
        merge_pending_message_event(adapter._pending_messages, session_key, event)

    async def _handle_active_session_busy_message(self, event: MessageEvent, session_key: str) -> bool:
-        if not self._draining:
-            return False
+        # --- Draining case (gateway restarting/stopping) ---
+        if self._draining:
+            adapter = self.adapters.get(event.source.platform)
+            if not adapter:
+                return True
+
+            thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
+            if self._queue_during_drain_enabled():
+                self._queue_or_replace_pending_event(session_key, event)
+                message = f"⏳ Gateway {self._status_action_gerund()} — queued for the next turn after it comes back."
+            else:
+                message = f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
+
+            await adapter._send_with_retry(
+                chat_id=event.source.chat_id,
+                content=message,
+                reply_to=event.message_id,
+                metadata=thread_meta,
+            )
+            return True
+
+        # --- Normal busy case (agent actively running a task) ---
+        # The user sent a message while the agent is working.  Interrupt the
+        # agent immediately so it stops the current tool-calling loop and
+        # processes the new message.  The pending message is stored in the
+        # adapter so the base adapter picks it up once the interrupted run
+        # returns.  A brief ack tells the user what's happening (debounced
+        # to avoid spam when they fire multiple messages quickly).

        adapter = self.adapters.get(event.source.platform)
        if not adapter:
-            return True
+            return False  # let default path handle it
+
+        # Store the message so it's processed as the next turn after the
+        # interrupt causes the current run to exit.
+        from gateway.platforms.base import merge_pending_message_event
+        merge_pending_message_event(adapter._pending_messages, session_key, event)
+
+        # Interrupt the running agent — this aborts in-flight tool calls and
+        # causes the agent loop to exit at the next check point.
+        running_agent = self._running_agents.get(session_key)
+        if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
+            try:
+                running_agent.interrupt(event.text)
+            except Exception:
+                pass  # don't let interrupt failure block the ack
+
+        # Debounce: only send an acknowledgment once every 30 seconds per session
+        # to avoid spamming the user when they send multiple messages quickly
+        _BUSY_ACK_COOLDOWN = 30
+        now = time.time()
+        last_ack = self._busy_ack_ts.get(session_key, 0)
+        if now - last_ack < _BUSY_ACK_COOLDOWN:
+            return True  # interrupt sent, ack already delivered recently
+
+        self._busy_ack_ts[session_key] = now
+
+        # Build a status-rich acknowledgment
+        status_parts = []
+        if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
+            try:
+                summary = running_agent.get_activity_summary()
+                iteration = summary.get("api_call_count", 0)
+                max_iter = summary.get("max_iterations", 0)
+                current_tool = summary.get("current_tool")
+                start_ts = self._running_agents_ts.get(session_key, 0)
+                if start_ts:
+                    elapsed_min = int((now - start_ts) / 60)
+                    if elapsed_min > 0:
+                        status_parts.append(f"{elapsed_min} min elapsed")
+                if max_iter:
+                    status_parts.append(f"iteration {iteration}/{max_iter}")
+                if current_tool:
+                    status_parts.append(f"running: {current_tool}")
+            except Exception:
+                pass
+
+        status_detail = f" ({', '.join(status_parts)})" if status_parts else ""
+        message = (
+            f"⚡ Interrupting current task{status_detail}. "
+            f"I'll respond to your message shortly."
+        )

        thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
-        if self._queue_during_drain_enabled():
-            self._queue_or_replace_pending_event(session_key, event)
-            message = f"⏳ Gateway {self._status_action_gerund()} — queued for the next turn after it comes back."
-        else:
-            message = f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
+        try:
+            await adapter._send_with_retry(
+                chat_id=event.source.chat_id,
+                content=message,
+                reply_to=event.message_id,
+                metadata=thread_meta,
+            )
+        except Exception as e:
+            logger.debug("Failed to send busy-ack: %s", e)

-        await adapter._send_with_retry(
-            chat_id=event.source.chat_id,
-            content=message,
-            reply_to=event.message_id,
-            metadata=thread_meta,
-        )
        return True

    async def _drain_active_agents(self, timeout: float) -> tuple[Dict[str, Any], bool]:
@@ -2237,6 +2312,8 @@ class GatewayRunner:
            self._running_agents.clear()
            self._pending_messages.clear()
            self._pending_approvals.clear()
+            if hasattr(self, '_busy_ack_ts'):
+                self._busy_ack_ts.clear()
            self._shutdown_event.set()

            # Global cleanup: kill any remaining tool subprocesses not tied
@@ -2721,6 +2798,7 @@ class GatewayRunner:
                )
                del self._running_agents[_quick_key]
                self._running_agents_ts.pop(_quick_key, None)
+                self._busy_ack_ts.pop(_quick_key, None)

        if _quick_key in self._running_agents:
            if event.get_command() == "status":
@@ -8477,6 +8555,12 @@ class GatewayRunner:
                    cached = _cache.get(session_key)
                    if cached and cached[1] == _sig:
                        agent = cached[0]
+                        # Reset activity timestamp so the inactivity timeout
+                        # handler doesn't see stale idle time from the previous
+                        # turn and immediately kill this agent.  (#9051)
+                        agent._last_activity_ts = time.time()
+                        agent._last_activity_desc = "starting new turn (cached)"
+                        agent._api_call_count = 0
                        logger.debug("Reusing cached agent for session %s", session_key)

            if agent is None:
@@ -9244,15 +9328,11 @@ class GatewayRunner:
                                pass
                        except Exception as e:
                            logger.debug("Stream consumer wait before queued message failed: %s", e)
-                    _response_previewed = bool(result.get("response_previewed"))
                    _already_streamed = bool(
                        _sc
                        and (
                            getattr(_sc, "final_response_sent", False)
-                            or (
-                                _response_previewed
-                                and getattr(_sc, "already_sent", False)
-                            )
+                            or getattr(_sc, "already_sent", False)
                        )
                    )
                    first_response = result.get("final_response", "")
@@ -9336,13 +9416,9 @@ class GatewayRunner:
        # them even if streaming had sent earlier partial output.
        _sc = stream_consumer_holder[0]
        if _sc and isinstance(response, dict) and not response.get("failed"):
-            _response_previewed = bool(response.get("response_previewed"))
            if (
                getattr(_sc, "final_response_sent", False)
-                or (
-                    _response_previewed
-                    and getattr(_sc, "already_sent", False)
-                )
+                or getattr(_sc, "already_sent", False)
            ):
                response["already_sent"] = True
        
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -8,6 +8,7 @@ import os
 import sys
 import subprocess
 import shutil
+from pathlib import Path

 from hermes_cli.config import get_project_root, get_hermes_home, get_env_path
 from hermes_constants import display_hermes_home
@@ -513,7 +514,87 @@ def run_doctor(args):
            pass

    _check_gateway_service_linger(issues)
-    
+
+    # =========================================================================
+    # Check: Command installation (hermes bin symlink)
+    # =========================================================================
+    if sys.platform != "win32":
+        print()
+        print(color("◆ Command Installation", Colors.CYAN, Colors.BOLD))
+
+        # Determine the venv entry point location
+        _venv_bin = None
+        for _venv_name in ("venv", ".venv"):
+            _candidate = PROJECT_ROOT / _venv_name / "bin" / "hermes"
+            if _candidate.exists():
+                _venv_bin = _candidate
+                break
+
+        # Determine the expected command link directory (mirrors install.sh logic)
+        _prefix = os.environ.get("PREFIX", "")
+        _is_termux_env = bool(os.environ.get("TERMUX_VERSION")) or "com.termux/files/usr" in _prefix
+        if _is_termux_env and _prefix:
+            _cmd_link_dir = Path(_prefix) / "bin"
+            _cmd_link_display = "$PREFIX/bin"
+        else:
+            _cmd_link_dir = Path.home() / ".local" / "bin"
+            _cmd_link_display = "~/.local/bin"
+        _cmd_link = _cmd_link_dir / "hermes"
+
+        if _venv_bin is None:
+            check_warn(
+                "Venv entry point not found",
+                "(hermes not in venv/bin/ or .venv/bin/ — reinstall with pip install -e '.[all]')"
+            )
+            manual_issues.append(
+                f"Reinstall entry point: cd {PROJECT_ROOT} && source venv/bin/activate && pip install -e '.[all]'"
+            )
+        else:
+            check_ok(f"Venv entry point exists ({_venv_bin.relative_to(PROJECT_ROOT)})")
+
+            # Check the symlink at the command link location
+            if _cmd_link.is_symlink():
+                _target = _cmd_link.resolve()
+                _expected = _venv_bin.resolve()
+                if _target == _expected:
+                    check_ok(f"{_cmd_link_display}/hermes → correct target")
+                else:
+                    check_warn(
+                        f"{_cmd_link_display}/hermes points to wrong target",
+                        f"(→ {_target}, expected → {_expected})"
+                    )
+                    if should_fix:
+                        _cmd_link.unlink()
+                        _cmd_link.symlink_to(_venv_bin)
+                        check_ok(f"Fixed symlink: {_cmd_link_display}/hermes → {_venv_bin}")
+                        fixed_count += 1
+                    else:
+                        issues.append(f"Broken symlink at {_cmd_link_display}/hermes — run 'hermes doctor --fix'")
+            elif _cmd_link.exists():
+                # It's a regular file, not a symlink — possibly a wrapper script
+                check_ok(f"{_cmd_link_display}/hermes exists (non-symlink)")
+            else:
+                check_fail(
+                    f"{_cmd_link_display}/hermes not found",
+                    "(hermes command may not work outside the venv)"
+                )
+                if should_fix:
+                    _cmd_link_dir.mkdir(parents=True, exist_ok=True)
+                    _cmd_link.symlink_to(_venv_bin)
+                    check_ok(f"Created symlink: {_cmd_link_display}/hermes → {_venv_bin}")
+                    fixed_count += 1
+
+                    # Check if the link dir is on PATH
+                    _path_dirs = os.environ.get("PATH", "").split(os.pathsep)
+                    if str(_cmd_link_dir) not in _path_dirs:
+                        check_warn(
+                            f"{_cmd_link_display} is not on your PATH",
+                            "(add it to your shell config: export PATH=\"$HOME/.local/bin:$PATH\")"
+                        )
+                        manual_issues.append(f"Add {_cmd_link_display} to your PATH")
+                else:
+                    issues.append(f"Missing {_cmd_link_display}/hermes symlink — run 'hermes doctor --fix'")
+
    # =========================================================================
    # Check: External tools
    # =========================================================================
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -715,7 +715,9 @@ def _detect_venv_dir() -> Path | None:
    """Detect the active virtualenv directory.

    Checks ``sys.prefix`` first (works regardless of the directory name),
-    then falls back to probing common directory names under PROJECT_ROOT.
+    then ``VIRTUAL_ENV`` env var (covers uv-managed environments where
+    sys.prefix == sys.base_prefix), then falls back to probing common
+    directory names under PROJECT_ROOT.
    Returns ``None`` when no virtualenv can be found.
    """
    # If we're running inside a virtualenv, sys.prefix points to it.
@@ -724,6 +726,15 @@ def _detect_venv_dir() -> Path | None:
        if venv.is_dir():
            return venv

+    # uv and some other tools set VIRTUAL_ENV without changing sys.prefix.
+    # This catches `uv run` where sys.prefix == sys.base_prefix but the
+    # environment IS a venv.  (#8620)
+    _virtual_env = os.environ.get("VIRTUAL_ENV")
+    if _virtual_env:
+        venv = Path(_virtual_env)
+        if venv.is_dir():
+            return venv
+
    # Fallback: check common virtualenv directory names under the project root.
    for candidate in (".venv", "venv"):
        venv = PROJECT_ROOT / candidate
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -6320,7 +6320,37 @@ Examples:
        sys.exit(1)

    _processed_argv = _coalesce_session_name_args(sys.argv[1:])
-    args = parser.parse_args(_processed_argv)
+
+    # ── Defensive subparser routing (bpo-9338 workaround) ───────────
+    # On some Python versions (notably <3.11), argparse fails to route
+    # subcommand tokens when the parent parser has nargs='?' optional
+    # arguments (--continue).  The symptom: "unrecognized arguments: model"
+    # even though 'model' is a registered subcommand.
+    #
+    # Fix: when argv contains a token matching a known subcommand, set
+    # subparsers.required=True to force deterministic routing.  If that
+    # fails (e.g. 'hermes -c model' where 'model' is consumed as the
+    # session name for --continue), fall back to the default behaviour.
+    import io as _io
+    _known_cmds = set(subparsers.choices.keys()) if hasattr(subparsers, "choices") else set()
+    _has_cmd_token = any(t in _known_cmds for t in _processed_argv if not t.startswith("-"))
+
+    if _has_cmd_token:
+        subparsers.required = True
+        _saved_stderr = sys.stderr
+        try:
+            sys.stderr = _io.StringIO()
+            args = parser.parse_args(_processed_argv)
+            sys.stderr = _saved_stderr
+        except SystemExit:
+            sys.stderr = _saved_stderr
+            # Subcommand name was consumed as a flag value (e.g. -c model).
+            # Fall back to optional subparsers so argparse handles it normally.
+            subparsers.required = False
+            args = parser.parse_args(_processed_argv)
+    else:
+        subparsers.required = False
+        args = parser.parse_args(_processed_argv)

    # Handle --version flag
    if args.version:
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -63,6 +63,7 @@ CONFIGURABLE_TOOLSETS = [
    ("clarify",         "❓ Clarifying Questions",      "clarify"),
    ("delegation",      "👥 Task Delegation",           "delegate_task"),
    ("cronjob",         "⏰ Cron Jobs",                 "create/list/update/pause/resume/run, with optional attached skills"),
+    ("messaging",       "📨 Cross-Platform Messaging",  "send_message"),
    ("rl",              "🧪 RL Training",               "Tinker-Atropos training tools"),
    ("homeassistant",    "🏠 Home Assistant",           "smart home device control"),
 ]
--- a/run_agent.py
+++ b/run_agent.py
@@ -1268,6 +1268,19 @@ class AIAgent:
            try:
                _config_context_length = int(_config_context_length)
            except (TypeError, ValueError):
+                logger.warning(
+                    "Invalid model.context_length in config.yaml: %r — "
+                    "must be a plain integer (e.g. 256000, not '256K'). "
+                    "Falling back to auto-detection.",
+                    _config_context_length,
+                )
+                import sys
+                print(
+                    f"\n⚠ Invalid model.context_length in config.yaml: {_config_context_length!r}\n"
+                    f"  Must be a plain integer (e.g. 256000, not '256K').\n"
+                    f"  Falling back to auto-detected context window.\n",
+                    file=sys.stderr,
+                )
                _config_context_length = None

        # Store for reuse in switch_model (so config override persists across model switches)
@@ -1296,7 +1309,20 @@ class AIAgent:
                                try:
                                    _config_context_length = int(_cp_ctx)
                                except (TypeError, ValueError):
-                                    pass
+                                    logger.warning(
+                                        "Invalid context_length for model %r in "
+                                        "custom_providers: %r — must be a plain "
+                                        "integer (e.g. 256000, not '256K'). "
+                                        "Falling back to auto-detection.",
+                                        self.model, _cp_ctx,
+                                    )
+                                    import sys
+                                    print(
+                                        f"\n⚠ Invalid context_length for model {self.model!r} in custom_providers: {_cp_ctx!r}\n"
+                                        f"  Must be a plain integer (e.g. 256000, not '256K').\n"
+                                        f"  Falling back to auto-detected context window.\n",
+                                        file=sys.stderr,
+                                    )
                    break
        
        # Select context engine: config-driven (like memory providers).
@@ -3563,7 +3589,12 @@ class AIAgent:
                                item_id = ri.get("id")
                                if item_id and item_id in seen_item_ids:
                                    continue
-                                items.append(ri)
+                                # Strip the "id" field — with store=False the
+                                # Responses API cannot look up items by ID and
+                                # returns 404.  The encrypted_content blob is
+                                # self-contained for reasoning chain continuity.
+                                replay_item = {k: v for k, v in ri.items() if k != "id"}
+                                items.append(replay_item)
                                if item_id:
                                    seen_item_ids.add(item_id)
                                has_codex_reasoning = True
@@ -3704,8 +3735,10 @@ class AIAgent:
                            continue
                        seen_ids.add(item_id)
                    reasoning_item = {"type": "reasoning", "encrypted_content": encrypted}
-                    if isinstance(item_id, str) and item_id:
-                        reasoning_item["id"] = item_id
+                    # Do NOT include the "id" in the outgoing item — with
+                    # store=False (our default) the API tries to resolve the
+                    # id server-side and returns 404.  The id is still used
+                    # above for local deduplication via seen_ids.
                    summary = item.get("summary")
                    if isinstance(summary, list):
                        reasoning_item["summary"] = summary
@@ -7833,6 +7866,7 @@ class AIAgent:
        self._incomplete_scratchpad_retries = 0
        self._codex_incomplete_retries = 0
        self._thinking_prefill_retries = 0
+        self._post_tool_empty_retried = False
        self._last_content_with_tools = None
        self._mute_post_response = False
        self._unicode_sanitization_passes = 0
@@ -9011,6 +9045,11 @@ class AIAgent:
                                    self.api_key = _clean_key
                                    if isinstance(getattr(self, "_client_kwargs", None), dict):
                                        self._client_kwargs["api_key"] = _clean_key
+                                    # Also update the live client — it holds its
+                                    # own copy of api_key which auth_headers reads
+                                    # dynamically on every request.
+                                    if getattr(self, "client", None) is not None and hasattr(self.client, "api_key"):
+                                        self.client.api_key = _clean_key
                                    _credential_sanitized = True
                                    self._vprint(
                                        f"{self.log_prefix}⚠️  API key contained non-ASCII characters "
@@ -10106,6 +10145,10 @@ class AIAgent:
                    if _had_prefill:
                        self._thinking_prefill_retries = 0
                        self._empty_content_retries = 0
+                    # Successful tool execution — reset the post-tool nudge
+                    # flag so it can fire again if the model goes empty on
+                    # a LATER tool round.
+                    self._post_tool_empty_retried = False

                    messages.append(assistant_msg)
                    self._emit_interim_assistant_message(assistant_msg)
@@ -10274,6 +10317,48 @@ class AIAgent:
                            self._response_was_previewed = True
                            break

+                        # ── Post-tool-call empty response nudge ───────────
+                        # The model returned empty after executing tool calls
+                        # but there's no prior-turn content to fall back on.
+                        # Instead of giving up, nudge the model to continue by
+                        # appending a user-level hint.  This is the #9400 case:
+                        # weaker models (GLM-5, etc.) sometimes return empty
+                        # after tool results instead of continuing to the next
+                        # step.  One retry with a nudge usually fixes it.
+                        _prior_was_tool = any(
+                            m.get("role") == "tool"
+                            for m in messages[-5:]  # check recent messages
+                        )
+                        if (
+                            _prior_was_tool
+                            and not getattr(self, "_post_tool_empty_retried", False)
+                        ):
+                            self._post_tool_empty_retried = True
+                            logger.info(
+                                "Empty response after tool calls — nudging model "
+                                "to continue processing"
+                            )
+                            self._emit_status(
+                                "⚠️ Model returned empty after tool calls — "
+                                "nudging to continue"
+                            )
+                            # Append the empty assistant message first so the
+                            # message sequence stays valid:
+                            #   tool(result) → assistant("(empty)") → user(nudge)
+                            # Without this, we'd have tool → user which most
+                            # APIs reject as an invalid sequence.
+                            assistant_msg["content"] = "(empty)"
+                            messages.append(assistant_msg)
+                            messages.append({
+                                "role": "user",
+                                "content": (
+                                    "You just executed tool calls but returned an "
+                                    "empty response. Please process the tool "
+                                    "results above and continue with the task."
+                                ),
+                            })
+                            continue
+
                        # ── Thinking-only prefill continuation ──────────
                        # The model produced structured reasoning (via API
                        # fields) but no visible text content.  Rather than
--- a/skills/leisure/find-nearby/scripts/find_nearby.py
+++ b/skills/leisure/find-nearby/scripts/find_nearby.py
@@ -98,7 +98,7 @@ def find_nearby(lat: float, lon: float, types: list[str], radius: int = 1500, li
        # Get coordinates (nodes have lat/lon directly, ways/relations use center)
        plat = el.get("lat") or (el.get("center", {}) or {}).get("lat")
        plon = el.get("lon") or (el.get("center", {}) or {}).get("lon")
-        if not plat or not plon:
+        if plat is None or plon is None:
            continue

        dist = haversine(lat, lon, plat, plon)
--- a/skills/productivity/google-workspace/scripts/gws_bridge.py
+++ b/skills/productivity/google-workspace/scripts/gws_bridge.py
@@ -25,6 +25,13 @@ def refresh_token(token_data: dict) -> dict:
    import urllib.parse
    import urllib.request

+    required_keys = ["client_id", "client_secret", "refresh_token", "token_uri"]
+    missing = [k for k in required_keys if k not in token_data]
+    if missing:
+        print(f"ERROR: google_token.json is missing required fields: {', '.join(missing)}", file=sys.stderr)
+        print("Please re-authenticate by running the Google Workspace setup script.", file=sys.stderr)
+        sys.exit(1)
+
    params = urllib.parse.urlencode({
        "client_id": token_data["client_id"],
        "client_secret": token_data["client_secret"],
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -233,9 +233,10 @@ class TestDeliverResultWrapping:
        send_mock.assert_called_once()
        sent_content = send_mock.call_args.kwargs.get("content") or send_mock.call_args[0][-1]
        assert "Cronjob Response: daily-report" in sent_content
+        assert "(job_id: test-job)" in sent_content
        assert "-------------" in sent_content
        assert "Here is today's summary." in sent_content
-        assert "The agent cannot see this message" in sent_content
+        assert "To stop or manage this job" in sent_content

    def test_delivery_uses_job_id_when_no_name(self):
        """When a job has no name, the wrapper should fall back to job id."""
--- a/tests/gateway/test_busy_session_ack.py
+++ b/tests/gateway/test_busy_session_ack.py
@@ -0,0 +1,293 @@
+"""Tests for busy-session acknowledgment when user sends messages during active agent runs.
+
+Verifies that users get an immediate status response instead of total silence
+when the agent is working on a task. See PR fix for the @Lonely__MH report.
+"""
+import asyncio
+import time
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Minimal stubs so we can import gateway code without heavy deps
+# ---------------------------------------------------------------------------
+import sys, types
+
+_tg = types.ModuleType("telegram")
+_tg.constants = types.ModuleType("telegram.constants")
+_ct = MagicMock()
+_ct.SUPERGROUP = "supergroup"
+_ct.GROUP = "group"
+_ct.PRIVATE = "private"
+_tg.constants.ChatType = _ct
+sys.modules.setdefault("telegram", _tg)
+sys.modules.setdefault("telegram.constants", _tg.constants)
+sys.modules.setdefault("telegram.ext", types.ModuleType("telegram.ext"))
+
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+    SessionSource,
+    build_session_key,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_event(text="hello", chat_id="123", platform_val="telegram"):
+    """Build a minimal MessageEvent."""
+    source = SessionSource(
+        platform=MagicMock(value=platform_val),
+        chat_id=chat_id,
+        chat_type="private",
+        user_id="user1",
+    )
+    evt = MessageEvent(
+        text=text,
+        message_type=MessageType.TEXT,
+        source=source,
+        message_id="msg1",
+    )
+    return evt
+
+
+def _make_runner():
+    """Build a minimal GatewayRunner-like object for testing."""
+    from gateway.run import GatewayRunner, _AGENT_PENDING_SENTINEL
+
+    runner = object.__new__(GatewayRunner)
+    runner._running_agents = {}
+    runner._running_agents_ts = {}
+    runner._pending_messages = {}
+    runner._busy_ack_ts = {}
+    runner._draining = False
+    runner.adapters = {}
+    runner.config = MagicMock()
+    runner.session_store = None
+    runner.hooks = MagicMock()
+    runner.hooks.emit = AsyncMock()
+    return runner, _AGENT_PENDING_SENTINEL
+
+
+def _make_adapter(platform_val="telegram"):
+    """Build a minimal adapter mock."""
+    adapter = MagicMock()
+    adapter._pending_messages = {}
+    adapter._send_with_retry = AsyncMock()
+    adapter.config = MagicMock()
+    adapter.config.extra = {}
+    adapter.platform = MagicMock(value=platform_val)
+    return adapter
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+class TestBusySessionAck:
+    """User sends a message while agent is running — should get acknowledgment."""
+
+    @pytest.mark.asyncio
+    async def test_sends_ack_when_agent_running(self):
+        """First message during busy session should get a status ack."""
+        runner, sentinel = _make_runner()
+        adapter = _make_adapter()
+
+        event = _make_event(text="Are you working?")
+        sk = build_session_key(event.source)
+
+        # Simulate running agent
+        agent = MagicMock()
+        agent.get_activity_summary.return_value = {
+            "api_call_count": 21,
+            "max_iterations": 60,
+            "current_tool": "terminal",
+            "last_activity_ts": time.time(),
+            "last_activity_desc": "terminal",
+            "seconds_since_activity": 1.0,
+        }
+        runner._running_agents[sk] = agent
+        runner._running_agents_ts[sk] = time.time() - 600  # 10 min ago
+        runner.adapters[event.source.platform] = adapter
+
+        result = await runner._handle_active_session_busy_message(event, sk)
+
+        assert result is True  # handled
+        # Verify ack was sent
+        adapter._send_with_retry.assert_called_once()
+        call_kwargs = adapter._send_with_retry.call_args
+        content = call_kwargs.kwargs.get("content") or call_kwargs[1].get("content", "")
+        if not content and call_kwargs.args:
+            # positional args
+            content = str(call_kwargs)
+        assert "Interrupting" in content or "respond" in content
+        assert "/stop" not in content  # no need — we ARE interrupting
+
+        # Verify message was queued in adapter pending
+        assert sk in adapter._pending_messages
+
+        # Verify agent interrupt was called
+        agent.interrupt.assert_called_once_with("Are you working?")
+
+    @pytest.mark.asyncio
+    async def test_debounce_suppresses_rapid_acks(self):
+        """Second message within 30s should NOT send another ack."""
+        runner, sentinel = _make_runner()
+        adapter = _make_adapter()
+
+        event1 = _make_event(text="hello?")
+        # Reuse the same source so platform mock matches
+        event2 = MessageEvent(
+            text="still there?",
+            message_type=MessageType.TEXT,
+            source=event1.source,
+            message_id="msg2",
+        )
+        sk = build_session_key(event1.source)
+
+        agent = MagicMock()
+        agent.get_activity_summary.return_value = {
+            "api_call_count": 5,
+            "max_iterations": 60,
+            "current_tool": None,
+            "last_activity_ts": time.time(),
+            "last_activity_desc": "api_call",
+            "seconds_since_activity": 0.5,
+        }
+        runner._running_agents[sk] = agent
+        runner._running_agents_ts[sk] = time.time() - 60
+        runner.adapters[event1.source.platform] = adapter
+
+        # First message — should get ack
+        result1 = await runner._handle_active_session_busy_message(event1, sk)
+        assert result1 is True
+        assert adapter._send_with_retry.call_count == 1
+
+        # Second message within cooldown — should be queued but no ack
+        result2 = await runner._handle_active_session_busy_message(event2, sk)
+        assert result2 is True
+        assert adapter._send_with_retry.call_count == 1  # still 1, no new ack
+
+        # But interrupt should still be called for both
+        assert agent.interrupt.call_count == 2
+
+    @pytest.mark.asyncio
+    async def test_ack_after_cooldown_expires(self):
+        """After 30s cooldown, a new message should send a fresh ack."""
+        runner, sentinel = _make_runner()
+        adapter = _make_adapter()
+
+        event = _make_event(text="hello?")
+        sk = build_session_key(event.source)
+
+        agent = MagicMock()
+        agent.get_activity_summary.return_value = {
+            "api_call_count": 10,
+            "max_iterations": 60,
+            "current_tool": "web_search",
+            "last_activity_ts": time.time(),
+            "last_activity_desc": "tool",
+            "seconds_since_activity": 0.5,
+        }
+        runner._running_agents[sk] = agent
+        runner._running_agents_ts[sk] = time.time() - 120
+        runner.adapters[event.source.platform] = adapter
+
+        # First ack
+        await runner._handle_active_session_busy_message(event, sk)
+        assert adapter._send_with_retry.call_count == 1
+
+        # Fake that cooldown expired
+        runner._busy_ack_ts[sk] = time.time() - 31
+
+        # Second ack should go through
+        await runner._handle_active_session_busy_message(event, sk)
+        assert adapter._send_with_retry.call_count == 2
+
+    @pytest.mark.asyncio
+    async def test_includes_status_detail(self):
+        """Ack message should include iteration and tool info when available."""
+        runner, sentinel = _make_runner()
+        adapter = _make_adapter()
+
+        event = _make_event(text="yo")
+        sk = build_session_key(event.source)
+
+        agent = MagicMock()
+        agent.get_activity_summary.return_value = {
+            "api_call_count": 21,
+            "max_iterations": 60,
+            "current_tool": "terminal",
+            "last_activity_ts": time.time(),
+            "last_activity_desc": "terminal",
+            "seconds_since_activity": 0.5,
+        }
+        runner._running_agents[sk] = agent
+        runner._running_agents_ts[sk] = time.time() - 600  # 10 min
+        runner.adapters[event.source.platform] = adapter
+
+        await runner._handle_active_session_busy_message(event, sk)
+
+        call_kwargs = adapter._send_with_retry.call_args
+        content = call_kwargs.kwargs.get("content", "")
+        assert "21/60" in content  # iteration
+        assert "terminal" in content  # current tool
+        assert "10 min" in content  # elapsed
+
+    @pytest.mark.asyncio
+    async def test_draining_still_works(self):
+        """Draining case should still produce the drain-specific message."""
+        runner, sentinel = _make_runner()
+        runner._draining = True
+        adapter = _make_adapter()
+
+        event = _make_event(text="hello")
+        sk = build_session_key(event.source)
+        runner.adapters[event.source.platform] = adapter
+
+        # Mock the drain-specific methods
+        runner._queue_during_drain_enabled = lambda: False
+        runner._status_action_gerund = lambda: "restarting"
+
+        result = await runner._handle_active_session_busy_message(event, sk)
+        assert result is True
+
+        call_kwargs = adapter._send_with_retry.call_args
+        content = call_kwargs.kwargs.get("content", "")
+        assert "restarting" in content
+
+    @pytest.mark.asyncio
+    async def test_pending_sentinel_no_interrupt(self):
+        """When agent is PENDING_SENTINEL, don't call interrupt (it has no method)."""
+        runner, sentinel = _make_runner()
+        adapter = _make_adapter()
+
+        event = _make_event(text="hey")
+        sk = build_session_key(event.source)
+
+        runner._running_agents[sk] = sentinel
+        runner._running_agents_ts[sk] = time.time()
+        runner.adapters[event.source.platform] = adapter
+
+        result = await runner._handle_active_session_busy_message(event, sk)
+        assert result is True
+        # Should still send ack
+        adapter._send_with_retry.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_no_adapter_falls_through(self):
+        """If adapter is missing, return False so default path handles it."""
+        runner, sentinel = _make_runner()
+
+        event = _make_event(text="hello")
+        sk = build_session_key(event.source)
+
+        # No adapter registered
+        runner._running_agents[sk] = MagicMock()
+
+        result = await runner._handle_active_session_busy_message(event, sk)
+        assert result is False  # not handled, let default path try
--- a/tests/gateway/test_duplicate_reply_suppression.py
+++ b/tests/gateway/test_duplicate_reply_suppression.py
@@ -0,0 +1,291 @@
+"""Tests for duplicate reply suppression across the gateway stack.
+
+Covers three fix paths:
+  1. base.py: stale response suppressed when interrupt_event is set and a
+     pending message exists (#8221 / #2483)
+  2. run.py return path: already_sent propagated from stream consumer's
+     already_sent flag without requiring response_previewed (#8375)
+  3. run.py queued-message path: first response correctly detected as
+     already-streamed when already_sent is True without response_previewed
+"""
+
+import asyncio
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+    ProcessingOutcome,
+    SendResult,
+)
+from gateway.session import SessionSource, build_session_key
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+class StubAdapter(BasePlatformAdapter):
+    """Minimal concrete adapter for testing."""
+
+    def __init__(self):
+        super().__init__(PlatformConfig(enabled=True, token="fake"), Platform.DISCORD)
+        self.sent = []
+
+    async def connect(self):
+        return True
+
+    async def disconnect(self):
+        pass
+
+    async def send(self, chat_id, content, reply_to=None, metadata=None):
+        self.sent.append({"chat_id": chat_id, "content": content})
+        return SendResult(success=True, message_id="msg1")
+
+    async def send_typing(self, chat_id, metadata=None):
+        pass
+
+    async def get_chat_info(self, chat_id):
+        return {"id": chat_id}
+
+
+def _make_event(text="hello", chat_id="c1", user_id="u1"):
+    return MessageEvent(
+        text=text,
+        source=SessionSource(
+            platform=Platform.DISCORD,
+            chat_id=chat_id,
+            chat_type="dm",
+            user_id=user_id,
+        ),
+        message_id="m1",
+    )
+
+
+# ===================================================================
+# Test 1: base.py — stale response suppressed on interrupt (#8221)
+# ===================================================================
+
+class TestBaseInterruptSuppression:
+    @pytest.mark.asyncio
+    async def test_stale_response_suppressed_when_interrupted(self):
+        """When interrupt_event is set AND a pending message exists,
+        base.py should suppress the stale response instead of sending it."""
+        adapter = StubAdapter()
+
+        stale_response = "This is the stale answer to the first question."
+        pending_response = "This is the answer to the second question."
+        call_count = 0
+
+        async def fake_handler(event):
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                return stale_response
+            return pending_response
+
+        adapter.set_message_handler(fake_handler)
+
+        event_a = _make_event(text="first question")
+        session_key = build_session_key(event_a.source)
+
+        # Simulate: message A is being processed, message B arrives
+        # The interrupt event is set and B is in pending_messages
+        interrupt_event = asyncio.Event()
+        interrupt_event.set()
+        adapter._active_sessions[session_key] = interrupt_event
+
+        event_b = _make_event(text="second question")
+        adapter._pending_messages[session_key] = event_b
+
+        await adapter._process_message_background(event_a, session_key)
+
+        # The stale response should NOT have been sent.
+        stale_sends = [s for s in adapter.sent if s["content"] == stale_response]
+        assert len(stale_sends) == 0, (
+            f"Stale response was sent {len(stale_sends)} time(s) — should be suppressed"
+        )
+        # The pending message's response SHOULD have been sent.
+        pending_sends = [s for s in adapter.sent if s["content"] == pending_response]
+        assert len(pending_sends) == 1, "Pending message response should be sent"
+
+    @pytest.mark.asyncio
+    async def test_response_not_suppressed_without_interrupt(self):
+        """Normal case: no interrupt, response should be sent."""
+        adapter = StubAdapter()
+
+        async def fake_handler(event):
+            return "Normal response"
+
+        adapter.set_message_handler(fake_handler)
+        event = _make_event()
+        session_key = build_session_key(event.source)
+
+        await adapter._process_message_background(event, session_key)
+
+        assert any(s["content"] == "Normal response" for s in adapter.sent)
+
+    @pytest.mark.asyncio
+    async def test_response_not_suppressed_with_interrupt_but_no_pending(self):
+        """Interrupt event set but no pending message (race already resolved) —
+        response should still be sent."""
+        adapter = StubAdapter()
+
+        async def fake_handler(event):
+            return "Valid response"
+
+        adapter.set_message_handler(fake_handler)
+        event = _make_event()
+        session_key = build_session_key(event.source)
+
+        # Set interrupt but no pending message
+        interrupt_event = asyncio.Event()
+        interrupt_event.set()
+        adapter._active_sessions[session_key] = interrupt_event
+
+        await adapter._process_message_background(event, session_key)
+
+        assert any(s["content"] == "Valid response" for s in adapter.sent)
+
+
+# ===================================================================
+# Test 2: run.py — already_sent without response_previewed (#8375)
+# ===================================================================
+
+class TestAlreadySentWithoutResponsePreviewed:
+    """The already_sent flag on the response dict should be set when the
+    stream consumer's already_sent is True, even if response_previewed is
+    False.  This prevents duplicate sends when streaming was interrupted
+    by flood control."""
+
+    def _make_mock_stream_consumer(self, already_sent=False, final_response_sent=False):
+        sc = SimpleNamespace(
+            already_sent=already_sent,
+            final_response_sent=final_response_sent,
+        )
+        return sc
+
+    def test_already_sent_set_without_response_previewed(self):
+        """Stream consumer already_sent=True should propagate to response
+        dict even when response_previewed is False."""
+        sc = self._make_mock_stream_consumer(already_sent=True, final_response_sent=False)
+        response = {"final_response": "text", "response_previewed": False}
+
+        # Reproduce the logic from run.py return path (post-fix)
+        if sc and isinstance(response, dict) and not response.get("failed"):
+            if (
+                getattr(sc, "final_response_sent", False)
+                or getattr(sc, "already_sent", False)
+            ):
+                response["already_sent"] = True
+
+        assert response.get("already_sent") is True
+
+    def test_already_sent_not_set_when_nothing_sent(self):
+        """When stream consumer hasn't sent anything, already_sent should
+        not be set on the response."""
+        sc = self._make_mock_stream_consumer(already_sent=False, final_response_sent=False)
+        response = {"final_response": "text", "response_previewed": False}
+
+        if sc and isinstance(response, dict) and not response.get("failed"):
+            if (
+                getattr(sc, "final_response_sent", False)
+                or getattr(sc, "already_sent", False)
+            ):
+                response["already_sent"] = True
+
+        assert "already_sent" not in response
+
+    def test_already_sent_set_on_final_response_sent(self):
+        """final_response_sent=True should still work as before."""
+        sc = self._make_mock_stream_consumer(already_sent=False, final_response_sent=True)
+        response = {"final_response": "text"}
+
+        if sc and isinstance(response, dict) and not response.get("failed"):
+            if (
+                getattr(sc, "final_response_sent", False)
+                or getattr(sc, "already_sent", False)
+            ):
+                response["already_sent"] = True
+
+        assert response.get("already_sent") is True
+
+    def test_already_sent_not_set_on_failed_response(self):
+        """Failed responses should never be suppressed — user needs to see
+        the error message even if streaming sent earlier partial output."""
+        sc = self._make_mock_stream_consumer(already_sent=True, final_response_sent=False)
+        response = {"final_response": "Error: something broke", "failed": True}
+
+        if sc and isinstance(response, dict) and not response.get("failed"):
+            if (
+                getattr(sc, "final_response_sent", False)
+                or getattr(sc, "already_sent", False)
+            ):
+                response["already_sent"] = True
+
+        assert "already_sent" not in response
+
+
+# ===================================================================
+# Test 3: run.py queued-message path — _already_streamed detection
+# ===================================================================
+
+class TestQueuedMessageAlreadyStreamed:
+    """The queued-message path should detect that the first response was
+    already streamed (already_sent=True) even without response_previewed."""
+
+    def _make_mock_sc(self, already_sent=False, final_response_sent=False):
+        return SimpleNamespace(
+            already_sent=already_sent,
+            final_response_sent=final_response_sent,
+        )
+
+    def test_queued_path_detects_already_streamed(self):
+        """already_sent=True on stream consumer means first response was
+        streamed — skip re-sending before processing queued message."""
+        _sc = self._make_mock_sc(already_sent=True)
+
+        # Reproduce the queued-message logic from run.py (post-fix)
+        _already_streamed = bool(
+            _sc
+            and (
+                getattr(_sc, "final_response_sent", False)
+                or getattr(_sc, "already_sent", False)
+            )
+        )
+
+        assert _already_streamed is True
+
+    def test_queued_path_sends_when_not_streamed(self):
+        """Nothing was streamed — first response should be sent before
+        processing the queued message."""
+        _sc = self._make_mock_sc(already_sent=False)
+
+        _already_streamed = bool(
+            _sc
+            and (
+                getattr(_sc, "final_response_sent", False)
+                or getattr(_sc, "already_sent", False)
+            )
+        )
+
+        assert _already_streamed is False
+
+    def test_queued_path_with_no_stream_consumer(self):
+        """No stream consumer at all (streaming disabled) — not streamed."""
+        _sc = None
+
+        _already_streamed = bool(
+            _sc
+            and (
+                getattr(_sc, "final_response_sent", False)
+                or getattr(_sc, "already_sent", False)
+            )
+        )
+
+        assert _already_streamed is False
--- a/tests/gateway/test_matrix.py
+++ b/tests/gateway/test_matrix.py
@@ -335,6 +335,29 @@ def _make_adapter():
    return adapter


+# ---------------------------------------------------------------------------
+# Typing indicator
+# ---------------------------------------------------------------------------
+
+class TestMatrixTypingIndicator:
+    def setup_method(self):
+        self.adapter = _make_adapter()
+        self.adapter._client = MagicMock()
+        self.adapter._client.set_typing = AsyncMock()
+
+    @pytest.mark.asyncio
+    async def test_stop_typing_clears_matrix_typing_state(self):
+        """stop_typing() should send typing=false instead of waiting for timeout expiry."""
+        from gateway.platforms.matrix import RoomID
+
+        await self.adapter.stop_typing("!room:example.org")
+
+        self.adapter._client.set_typing.assert_awaited_once_with(
+            RoomID("!room:example.org"),
+            timeout=0,
+        )
+
+
 # ---------------------------------------------------------------------------
 # mxc:// URL conversion
 # ---------------------------------------------------------------------------
@@ -1831,4 +1854,3 @@ class TestMatrixPresence:
        assert result is False


-
--- a/tests/hermes_cli/test_doctor_command_install.py
+++ b/tests/hermes_cli/test_doctor_command_install.py
@@ -0,0 +1,275 @@
+"""Tests for the Command Installation check in hermes doctor."""
+
+import os
+import sys
+import types
+from argparse import Namespace
+from pathlib import Path
+
+import pytest
+
+import hermes_cli.doctor as doctor_mod
+
+
+def _setup_doctor_env(monkeypatch, tmp_path, venv_name="venv"):
+    """Create a minimal HERMES_HOME + PROJECT_ROOT for doctor tests."""
+    home = tmp_path / ".hermes"
+    home.mkdir(parents=True, exist_ok=True)
+    (home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
+
+    project = tmp_path / "project"
+    project.mkdir(exist_ok=True)
+
+    # Create a fake venv entry point
+    venv_bin_dir = project / venv_name / "bin"
+    venv_bin_dir.mkdir(parents=True, exist_ok=True)
+    hermes_bin = venv_bin_dir / "hermes"
+    hermes_bin.write_text("#!/usr/bin/env python\n# entry point\n")
+    hermes_bin.chmod(0o755)
+
+    monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
+    monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
+    monkeypatch.setattr(doctor_mod, "_DHH", str(home))
+
+    # Stub model_tools so doctor doesn't fail on import
+    fake_model_tools = types.SimpleNamespace(
+        check_tool_availability=lambda *a, **kw: ([], []),
+        TOOLSET_REQUIREMENTS={},
+    )
+    monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
+
+    # Stub auth checks
+    try:
+        from hermes_cli import auth as _auth_mod
+        monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
+        monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
+    except Exception:
+        pass
+
+    # Stub httpx.get to avoid network calls
+    try:
+        import httpx
+        monkeypatch.setattr(httpx, "get", lambda *a, **kw: types.SimpleNamespace(status_code=200))
+    except Exception:
+        pass
+
+    return home, project, hermes_bin
+
+
+def _run_doctor(fix=False):
+    """Run doctor and capture stdout."""
+    import io
+    import contextlib
+
+    buf = io.StringIO()
+    with contextlib.redirect_stdout(buf):
+        doctor_mod.run_doctor(Namespace(fix=fix))
+    return buf.getvalue()
+
+
+class TestDoctorCommandInstallation:
+    """Tests for the ◆ Command Installation section."""
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_correct_symlink_shows_ok(self, monkeypatch, tmp_path):
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        # Create the command link dir with correct symlink
+        cmd_link_dir = tmp_path / ".local" / "bin"
+        cmd_link_dir.mkdir(parents=True)
+        cmd_link = cmd_link_dir / "hermes"
+        cmd_link.symlink_to(hermes_bin)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=False)
+        assert "Command Installation" in out
+        assert "Venv entry point exists" in out
+        assert "correct target" in out
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_missing_symlink_shows_fail(self, monkeypatch, tmp_path):
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+        # Don't create the symlink — it should be missing
+
+        out = _run_doctor(fix=False)
+        assert "Command Installation" in out
+        assert "Venv entry point exists" in out
+        assert "not found" in out
+        assert "hermes doctor --fix" in out
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_fix_creates_missing_symlink(self, monkeypatch, tmp_path):
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=True)
+        assert "Command Installation" in out
+        assert "Created symlink" in out
+
+        # Verify the symlink was actually created
+        cmd_link = tmp_path / ".local" / "bin" / "hermes"
+        assert cmd_link.is_symlink()
+        assert cmd_link.resolve() == hermes_bin.resolve()
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_wrong_target_symlink_shows_warn(self, monkeypatch, tmp_path):
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        # Create a symlink pointing to the wrong target
+        cmd_link_dir = tmp_path / ".local" / "bin"
+        cmd_link_dir.mkdir(parents=True)
+        cmd_link = cmd_link_dir / "hermes"
+        wrong_target = tmp_path / "wrong_hermes"
+        wrong_target.write_text("#!/usr/bin/env python\n")
+        cmd_link.symlink_to(wrong_target)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=False)
+        assert "Command Installation" in out
+        assert "wrong target" in out
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_fix_repairs_wrong_symlink(self, monkeypatch, tmp_path):
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        # Create a symlink pointing to wrong target
+        cmd_link_dir = tmp_path / ".local" / "bin"
+        cmd_link_dir.mkdir(parents=True)
+        cmd_link = cmd_link_dir / "hermes"
+        wrong_target = tmp_path / "wrong_hermes"
+        wrong_target.write_text("#!/usr/bin/env python\n")
+        cmd_link.symlink_to(wrong_target)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=True)
+        assert "Fixed symlink" in out
+
+        # Verify the symlink now points to the correct target
+        assert cmd_link.is_symlink()
+        assert cmd_link.resolve() == hermes_bin.resolve()
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_missing_venv_entry_point_shows_warn(self, monkeypatch, tmp_path):
+        home = tmp_path / ".hermes"
+        home.mkdir(parents=True, exist_ok=True)
+        (home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
+
+        project = tmp_path / "project"
+        project.mkdir(exist_ok=True)
+        # Do NOT create any venv entry point
+
+        monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
+        monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
+        monkeypatch.setattr(doctor_mod, "_DHH", str(home))
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        fake_model_tools = types.SimpleNamespace(
+            check_tool_availability=lambda *a, **kw: ([], []),
+            TOOLSET_REQUIREMENTS={},
+        )
+        monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
+        try:
+            from hermes_cli import auth as _auth_mod
+            monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
+            monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
+        except Exception:
+            pass
+        try:
+            import httpx
+            monkeypatch.setattr(httpx, "get", lambda *a, **kw: types.SimpleNamespace(status_code=200))
+        except Exception:
+            pass
+
+        out = _run_doctor(fix=False)
+        assert "Command Installation" in out
+        assert "Venv entry point not found" in out
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_dot_venv_dir_is_found(self, monkeypatch, tmp_path):
+        """The check finds entry points in .venv/ as well as venv/."""
+        home, project, _ = _setup_doctor_env(monkeypatch, tmp_path, venv_name=".venv")
+
+        # Create the command link with correct symlink
+        hermes_bin = project / ".venv" / "bin" / "hermes"
+        cmd_link_dir = tmp_path / ".local" / "bin"
+        cmd_link_dir.mkdir(parents=True)
+        cmd_link = cmd_link_dir / "hermes"
+        cmd_link.symlink_to(hermes_bin)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=False)
+        assert "Venv entry point exists" in out
+        assert ".venv/bin/hermes" in out
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_non_symlink_regular_file_shows_ok(self, monkeypatch, tmp_path):
+        """If ~/.local/bin/hermes is a regular file (not symlink), accept it."""
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        cmd_link_dir = tmp_path / ".local" / "bin"
+        cmd_link_dir.mkdir(parents=True)
+        cmd_link = cmd_link_dir / "hermes"
+        cmd_link.write_text("#!/bin/sh\nexec python -m hermes_cli.main \"$@\"\n")
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=False)
+        assert "non-symlink" in out
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_termux_uses_prefix_bin(self, monkeypatch, tmp_path):
+        """On Termux, the command link dir is $PREFIX/bin."""
+        prefix_dir = tmp_path / "termux_prefix"
+        prefix_bin = prefix_dir / "bin"
+        prefix_bin.mkdir(parents=True)
+
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        monkeypatch.setenv("TERMUX_VERSION", "0.118.3")
+        monkeypatch.setenv("PREFIX", str(prefix_dir))
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=False)
+        assert "Command Installation" in out
+        assert "$PREFIX/bin" in out
+
+    def test_windows_skips_check(self, monkeypatch, tmp_path):
+        """On Windows, the Command Installation section is skipped."""
+        home = tmp_path / ".hermes"
+        home.mkdir(parents=True, exist_ok=True)
+        (home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
+
+        project = tmp_path / "project"
+        project.mkdir(exist_ok=True)
+
+        monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
+        monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
+        monkeypatch.setattr(doctor_mod, "_DHH", str(home))
+        monkeypatch.setattr(sys, "platform", "win32")
+
+        fake_model_tools = types.SimpleNamespace(
+            check_tool_availability=lambda *a, **kw: ([], []),
+            TOOLSET_REQUIREMENTS={},
+        )
+        monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
+        try:
+            from hermes_cli import auth as _auth_mod
+            monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
+            monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
+        except Exception:
+            pass
+        try:
+            import httpx
+            monkeypatch.setattr(httpx, "get", lambda *a, **kw: types.SimpleNamespace(status_code=200))
+        except Exception:
+            pass
+
+        out = _run_doctor(fix=False)
+        assert "Command Installation" not in out
--- a/tests/hermes_cli/test_subparser_routing_fallback.py
+++ b/tests/hermes_cli/test_subparser_routing_fallback.py
@@ -0,0 +1,148 @@
+"""Tests for the defensive subparser routing workaround (bpo-9338).
+
+The main() function in hermes_cli/main.py sets subparsers.required=True
+when argv contains a known subcommand name.  This forces deterministic
+routing on Python versions where argparse fails to match subcommand tokens
+when the parent parser has nargs='?' optional arguments (--continue).
+
+If the subcommand token is consumed as a flag value (e.g. `hermes -c model`
+to resume a session named 'model'), the required=True parse raises
+SystemExit and the code falls back to the default required=False behaviour.
+"""
+import argparse
+import io
+import sys
+
+import pytest
+
+
+def _build_parser():
+    """Build a minimal replica of the hermes top-level parser."""
+    parser = argparse.ArgumentParser(prog="hermes")
+    parser.add_argument("--version", "-V", action="store_true")
+    parser.add_argument("--resume", "-r", metavar="SESSION", default=None)
+    parser.add_argument(
+        "--continue", "-c",
+        dest="continue_last",
+        nargs="?",
+        const=True,
+        default=None,
+        metavar="SESSION_NAME",
+    )
+    parser.add_argument("--worktree", "-w", action="store_true", default=False)
+    parser.add_argument("--skills", "-s", action="append", default=None)
+    parser.add_argument("--yolo", action="store_true", default=False)
+    parser.add_argument("--pass-session-id", action="store_true", default=False)
+
+    subparsers = parser.add_subparsers(dest="command", help="Command to run")
+    chat_p = subparsers.add_parser("chat")
+    chat_p.add_argument("-q", "--query", default=None)
+    subparsers.add_parser("model")
+    subparsers.add_parser("gateway")
+    subparsers.add_parser("setup")
+    return parser, subparsers
+
+
+def _safe_parse(parser, subparsers, argv):
+    """Replica of the defensive parsing logic from main()."""
+    known_cmds = set(subparsers.choices.keys()) if hasattr(subparsers, "choices") else set()
+    has_cmd_token = any(t in known_cmds for t in argv if not t.startswith("-"))
+
+    if has_cmd_token:
+        subparsers.required = True
+        saved_stderr = sys.stderr
+        try:
+            sys.stderr = io.StringIO()
+            args = parser.parse_args(argv)
+            sys.stderr = saved_stderr
+            return args
+        except SystemExit:
+            sys.stderr = saved_stderr
+            subparsers.required = False
+            return parser.parse_args(argv)
+    else:
+        subparsers.required = False
+        return parser.parse_args(argv)
+
+
+class TestSubparserRoutingFallback:
+    """Verify the bpo-9338 defensive routing works for all key cases."""
+
+    def test_direct_subcommand(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["model"])
+        assert args.command == "model"
+
+    def test_subcommand_with_flags(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["--yolo", "model"])
+        assert args.command == "model"
+        assert args.yolo is True
+
+    def test_bare_hermes_defaults_to_none(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, [])
+        assert args.command is None
+
+    def test_flags_only_defaults_to_none(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["--yolo"])
+        assert args.command is None
+        assert args.yolo is True
+
+    def test_continue_flag_alone(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-c"])
+        assert args.command is None
+        assert args.continue_last is True
+
+    def test_continue_with_session_name(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-c", "myproject"])
+        assert args.command is None
+        assert args.continue_last == "myproject"
+
+    def test_continue_with_subcommand_name_as_session(self):
+        """Edge case: session named 'model' — should be treated as session name, not subcommand."""
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-c", "model"])
+        assert args.command is None
+        assert args.continue_last == "model"
+
+    def test_continue_with_session_then_subcommand(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-c", "myproject", "model"])
+        assert args.command == "model"
+        assert args.continue_last == "myproject"
+
+    def test_chat_with_query(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["chat", "-q", "hello"])
+        assert args.command == "chat"
+        assert args.query == "hello"
+
+    def test_resume_flag(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-r", "abc123"])
+        assert args.command is None
+        assert args.resume == "abc123"
+
+    def test_resume_with_subcommand(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-r", "abc123", "chat"])
+        assert args.command == "chat"
+        assert args.resume == "abc123"
+
+    def test_skills_flag_with_subcommand(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-s", "myskill", "chat"])
+        assert args.command == "chat"
+        assert args.skills == ["myskill"]
+
+    def test_all_flags_with_subcommand(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["--yolo", "-w", "-s", "myskill", "model"])
+        assert args.command == "model"
+        assert args.yolo is True
+        assert args.worktree is True
+        assert args.skills == ["myskill"]
--- a/tests/hermes_cli/test_tools_config.py
+++ b/tests/hermes_cli/test_tools_config.py
@@ -8,6 +8,7 @@ from hermes_cli.tools_config import (
    _platform_toolset_summary,
    _save_platform_tools,
    _toolset_has_keys,
+    CONFIGURABLE_TOOLSETS,
    TOOL_CATEGORIES,
    _visible_providers,
    tools_command,
@@ -22,6 +23,15 @@ def test_get_platform_tools_uses_default_when_platform_not_configured():
    assert enabled


+def test_configurable_toolsets_include_messaging():
+    assert any(ts_key == "messaging" for ts_key, _, _ in CONFIGURABLE_TOOLSETS)
+
+def test_get_platform_tools_default_telegram_includes_messaging():
+    enabled = _get_platform_tools({}, "telegram")
+
+    assert "messaging" in enabled
+
+
 def test_get_platform_tools_preserves_explicit_empty_selection():
    config = {"platform_toolsets": {"cli": []}}

--- a/tests/run_agent/test_invalid_context_length_warning.py
+++ b/tests/run_agent/test_invalid_context_length_warning.py
@@ -0,0 +1,111 @@
+"""Tests that invalid context_length values in config produce visible warnings."""
+
+from unittest.mock import patch, MagicMock, call
+
+
+def _build_agent(model_cfg, custom_providers=None, model="anthropic/claude-opus-4.6"):
+    """Build an AIAgent with the given model config."""
+    cfg = {"model": model_cfg}
+    if custom_providers is not None:
+        cfg["custom_providers"] = custom_providers
+
+    with (
+        patch("hermes_cli.config.load_config", return_value=cfg),
+        patch("agent.model_metadata.get_model_context_length", return_value=128_000),
+        patch("run_agent.get_tool_definitions", return_value=[]),
+        patch("run_agent.check_toolset_requirements", return_value={}),
+        patch("run_agent.OpenAI"),
+    ):
+        from run_agent import AIAgent
+
+        agent = AIAgent(
+            model=model,
+            api_key="test-key-1234567890",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+    return agent
+
+
+def test_valid_integer_context_length_no_warning():
+    """Plain integer context_length should work silently."""
+    with patch("run_agent.logger") as mock_logger:
+        agent = _build_agent({"default": "gpt5.4", "provider": "custom",
+                              "base_url": "http://localhost:4000/v1",
+                              "context_length": 256000})
+    assert agent._config_context_length == 256000
+    # No warning about invalid context_length
+    for c in mock_logger.warning.call_args_list:
+        assert "Invalid" not in str(c)
+
+
+def test_string_k_suffix_context_length_warns():
+    """context_length: '256K' should warn the user clearly."""
+    with patch("run_agent.logger") as mock_logger:
+        agent = _build_agent({"default": "gpt5.4", "provider": "custom",
+                              "base_url": "http://localhost:4000/v1",
+                              "context_length": "256K"})
+    assert agent._config_context_length is None
+    # Should have warned
+    warning_calls = [c for c in mock_logger.warning.call_args_list
+                     if "Invalid" in str(c) and "256K" in str(c)]
+    assert len(warning_calls) == 1
+    assert "plain integer" in str(warning_calls[0])
+
+
+def test_string_numeric_context_length_works():
+    """context_length: '256000' (string) should parse fine via int()."""
+    with patch("run_agent.logger") as mock_logger:
+        agent = _build_agent({"default": "gpt5.4", "provider": "custom",
+                              "base_url": "http://localhost:4000/v1",
+                              "context_length": "256000"})
+    assert agent._config_context_length == 256000
+    for c in mock_logger.warning.call_args_list:
+        assert "Invalid" not in str(c)
+
+
+def test_custom_providers_invalid_context_length_warns():
+    """Invalid context_length in custom_providers should warn."""
+    custom_providers = [
+        {
+            "name": "LiteLLM",
+            "base_url": "http://localhost:4000/v1",
+            "models": {
+                "gpt5.4": {"context_length": "256K"}
+            },
+        }
+    ]
+    with patch("run_agent.logger") as mock_logger:
+        agent = _build_agent(
+            {"default": "gpt5.4", "provider": "custom",
+             "base_url": "http://localhost:4000/v1"},
+            custom_providers=custom_providers,
+            model="gpt5.4",
+        )
+    warning_calls = [c for c in mock_logger.warning.call_args_list
+                     if "Invalid" in str(c) and "256K" in str(c)]
+    assert len(warning_calls) == 1
+    assert "custom_providers" in str(warning_calls[0])
+
+
+def test_custom_providers_valid_context_length():
+    """Valid integer in custom_providers should work silently."""
+    custom_providers = [
+        {
+            "name": "LiteLLM",
+            "base_url": "http://localhost:4000/v1",
+            "models": {
+                "gpt5.4": {"context_length": 256000}
+            },
+        }
+    ]
+    with patch("run_agent.logger") as mock_logger:
+        agent = _build_agent(
+            {"default": "gpt5.4", "provider": "custom",
+             "base_url": "http://localhost:4000/v1"},
+            custom_providers=custom_providers,
+            model="gpt5.4",
+        )
+    for c in mock_logger.warning.call_args_list:
+        assert "Invalid" not in str(c)
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@@ -1249,13 +1249,17 @@ def test_chat_messages_to_responses_input_deduplicates_reasoning_ids(monkeypatch
    ]
    items = agent._chat_messages_to_responses_input(messages)

-    reasoning_ids = [it["id"] for it in items if it.get("type") == "reasoning"]
-    # rs_aaa should appear only once (first occurrence kept)
-    assert reasoning_ids.count("rs_aaa") == 1
-    # rs_bbb and rs_ccc should each appear once
-    assert reasoning_ids.count("rs_bbb") == 1
-    assert reasoning_ids.count("rs_ccc") == 1
-    assert len(reasoning_ids) == 3
+    reasoning_items = [it for it in items if it.get("type") == "reasoning"]
+    # Dedup: rs_aaa appears in both turns but should only be emitted once.
+    # 3 unique items total: enc_1 (from rs_aaa), enc_2 (rs_bbb), enc_3 (rs_ccc).
+    assert len(reasoning_items) == 3
+    encrypted = [it["encrypted_content"] for it in reasoning_items]
+    assert encrypted.count("enc_1") == 1
+    assert "enc_2" in encrypted
+    assert "enc_3" in encrypted
+    # IDs must be stripped — with store=False the API 404s on id lookups.
+    for it in reasoning_items:
+        assert "id" not in it


 def test_preflight_codex_input_deduplicates_reasoning_ids(monkeypatch):
@@ -1272,7 +1276,11 @@ def test_preflight_codex_input_deduplicates_reasoning_ids(monkeypatch):
    normalized = agent._preflight_codex_input_items(raw_input)

    reasoning_items = [it for it in normalized if it.get("type") == "reasoning"]
-    reasoning_ids = [it["id"] for it in reasoning_items]
-    assert reasoning_ids.count("rs_xyz") == 1
-    assert reasoning_ids.count("rs_zzz") == 1
+    # rs_xyz duplicate should be collapsed to one item; rs_zzz kept.
    assert len(reasoning_items) == 2
+    encrypted = [it["encrypted_content"] for it in reasoning_items]
+    assert encrypted.count("enc_a") == 1
+    assert "enc_b" in encrypted
+    # IDs must be stripped — with store=False the API 404s on id lookups.
+    for it in reasoning_items:
+        assert "id" not in it
--- a/tests/run_agent/test_unicode_ascii_codec.py
+++ b/tests/run_agent/test_unicode_ascii_codec.py
@@ -230,3 +230,67 @@ class TestSanitizeStructureNonAscii:
        assert _sanitize_structure_non_ascii(payload) is True
        assert payload["default_headers"]["X-Title"] == "Hermes  Agent"
        assert payload["default_headers"]["User-Agent"] == "Hermes/1.0 "
+
+
+class TestApiKeyClientSync:
+    """Verify that ASCII recovery updates the live OpenAI client's api_key.
+
+    The OpenAI SDK stores its own copy of api_key which auth_headers reads
+    dynamically.  If only self.api_key is updated but self.client.api_key
+    is not, the next request still sends the corrupted key in the
+    Authorization header.
+    """
+
+    def test_client_api_key_updated_on_sanitize(self):
+        """Simulate the recovery path and verify client.api_key is synced."""
+        from unittest.mock import MagicMock
+        from run_agent import AIAgent
+
+        agent = AIAgent.__new__(AIAgent)
+        bad_key = "sk-proj-abc\u028bdef"  # ʋ lookalike at position 11
+        agent.api_key = bad_key
+        agent._client_kwargs = {"api_key": bad_key}
+        agent.quiet_mode = True
+
+        # Mock client with its own api_key attribute (like the real OpenAI client)
+        mock_client = MagicMock()
+        mock_client.api_key = bad_key
+        agent.client = mock_client
+
+        # --- replicate the recovery logic from run_agent.py ---
+        _raw_key = agent.api_key
+        _clean_key = _strip_non_ascii(_raw_key)
+        assert _clean_key != _raw_key, "test precondition: key should have non-ASCII"
+
+        agent.api_key = _clean_key
+        agent._client_kwargs["api_key"] = _clean_key
+        if getattr(agent, "client", None) is not None and hasattr(agent.client, "api_key"):
+            agent.client.api_key = _clean_key
+
+        # All three locations should now hold the clean key
+        assert agent.api_key == "sk-proj-abcdef"
+        assert agent._client_kwargs["api_key"] == "sk-proj-abcdef"
+        assert agent.client.api_key == "sk-proj-abcdef"
+        # The bad char should be gone from all of them
+        assert "\u028b" not in agent.api_key
+        assert "\u028b" not in agent._client_kwargs["api_key"]
+        assert "\u028b" not in agent.client.api_key
+
+    def test_client_none_does_not_crash(self):
+        """Recovery should not crash when client is None (pre-init)."""
+        from run_agent import AIAgent
+
+        agent = AIAgent.__new__(AIAgent)
+        bad_key = "sk-proj-\u028b"
+        agent.api_key = bad_key
+        agent._client_kwargs = {"api_key": bad_key}
+        agent.client = None
+
+        _clean_key = _strip_non_ascii(bad_key)
+        agent.api_key = _clean_key
+        agent._client_kwargs["api_key"] = _clean_key
+        if getattr(agent, "client", None) is not None and hasattr(agent.client, "api_key"):
+            agent.client.api_key = _clean_key
+
+        assert agent.api_key == "sk-proj-"
+        assert agent.client is None  # should not have been touched
--- a/tests/tools/test_send_message_tool.py
+++ b/tests/tools/test_send_message_tool.py
@@ -752,6 +752,38 @@ class TestParseTargetRefDiscord:
        assert is_explicit is True


+class TestParseTargetRefMatrix:
+    """_parse_target_ref correctly handles Matrix room IDs and user MXIDs."""
+
+    def test_matrix_room_id_is_explicit(self):
+        """Matrix room IDs (!) are recognized as explicit targets."""
+        chat_id, thread_id, is_explicit = _parse_target_ref("matrix", "!HLOQwxYGgFPMPJUSNR:matrix.org")
+        assert chat_id == "!HLOQwxYGgFPMPJUSNR:matrix.org"
+        assert thread_id is None
+        assert is_explicit is True
+
+    def test_matrix_user_mxid_is_explicit(self):
+        """Matrix user MXIDs (@) are recognized as explicit targets."""
+        chat_id, thread_id, is_explicit = _parse_target_ref("matrix", "@hermes:matrix.org")
+        assert chat_id == "@hermes:matrix.org"
+        assert thread_id is None
+        assert is_explicit is True
+
+    def test_matrix_alias_is_not_explicit(self):
+        """Matrix room aliases (#) are NOT explicit — they need resolution."""
+        chat_id, thread_id, is_explicit = _parse_target_ref("matrix", "#general:matrix.org")
+        assert chat_id is None
+        assert is_explicit is False
+
+    def test_matrix_prefix_only_matches_matrix_platform(self):
+        """! and @ prefixes are only treated as explicit for the matrix platform."""
+        chat_id, _, is_explicit = _parse_target_ref("telegram", "!something")
+        assert is_explicit is False
+
+        chat_id, _, is_explicit = _parse_target_ref("discord", "@someone")
+        assert is_explicit is False
+
+
 class TestSendDiscordThreadId:
    """_send_discord uses thread_id when provided."""

@@ -854,3 +886,225 @@ class TestSendToPlatformDiscordThread:
        send_mock.assert_awaited_once()
        _, call_kwargs = send_mock.await_args
        assert call_kwargs["thread_id"] is None
+
+
+# ---------------------------------------------------------------------------
+# Discord media attachment support
+# ---------------------------------------------------------------------------
+
+
+class TestSendDiscordMedia:
+    """_send_discord uploads media files via multipart/form-data."""
+
+    @staticmethod
+    def _build_mock(response_status, response_data=None, response_text="error body"):
+        """Build a properly-structured aiohttp mock chain."""
+        mock_resp = MagicMock()
+        mock_resp.status = response_status
+        mock_resp.json = AsyncMock(return_value=response_data or {"id": "msg123"})
+        mock_resp.text = AsyncMock(return_value=response_text)
+        mock_resp.__aenter__ = AsyncMock(return_value=mock_resp)
+        mock_resp.__aexit__ = AsyncMock(return_value=None)
+
+        mock_session = MagicMock()
+        mock_session.__aenter__ = AsyncMock(return_value=mock_session)
+        mock_session.__aexit__ = AsyncMock(return_value=None)
+        mock_session.post = MagicMock(return_value=mock_resp)
+
+        return mock_session, mock_resp
+
+    def test_text_and_media_sends_both(self, tmp_path):
+        """Text message is sent first, then each media file as multipart."""
+        img = tmp_path / "photo.png"
+        img.write_bytes(b"\x89PNG fake image data")
+
+        mock_session, _ = self._build_mock(200, {"id": "msg999"})
+        with patch("aiohttp.ClientSession", return_value=mock_session):
+            result = asyncio.run(
+                _send_discord("tok", "111", "hello", media_files=[(str(img), False)])
+            )
+
+        assert result["success"] is True
+        assert result["message_id"] == "msg999"
+        # Two POSTs: one text JSON, one multipart upload
+        assert mock_session.post.call_count == 2
+
+    def test_media_only_skips_text_post(self, tmp_path):
+        """When message is empty and media is present, text POST is skipped."""
+        img = tmp_path / "photo.png"
+        img.write_bytes(b"\x89PNG fake image data")
+
+        mock_session, _ = self._build_mock(200, {"id": "media_only"})
+        with patch("aiohttp.ClientSession", return_value=mock_session):
+            result = asyncio.run(
+                _send_discord("tok", "222", "  ", media_files=[(str(img), False)])
+            )
+
+        assert result["success"] is True
+        # Only one POST: the media upload (text was whitespace-only)
+        assert mock_session.post.call_count == 1
+
+    def test_missing_media_file_collected_as_warning(self):
+        """Non-existent media paths produce warnings but don't fail."""
+        mock_session, _ = self._build_mock(200, {"id": "txt_ok"})
+        with patch("aiohttp.ClientSession", return_value=mock_session):
+            result = asyncio.run(
+                _send_discord("tok", "333", "hello", media_files=[("/nonexistent/file.png", False)])
+            )
+
+        assert result["success"] is True
+        assert "warnings" in result
+        assert any("not found" in w for w in result["warnings"])
+        # Only the text POST was made, media was skipped
+        assert mock_session.post.call_count == 1
+
+    def test_media_upload_failure_collected_as_warning(self, tmp_path):
+        """Failed media upload becomes a warning, text still succeeds."""
+        img = tmp_path / "photo.png"
+        img.write_bytes(b"\x89PNG fake image data")
+
+        # First call (text) succeeds, second call (media) returns 413
+        text_resp = MagicMock()
+        text_resp.status = 200
+        text_resp.json = AsyncMock(return_value={"id": "txt_ok"})
+        text_resp.__aenter__ = AsyncMock(return_value=text_resp)
+        text_resp.__aexit__ = AsyncMock(return_value=None)
+
+        media_resp = MagicMock()
+        media_resp.status = 413
+        media_resp.text = AsyncMock(return_value="Request Entity Too Large")
+        media_resp.__aenter__ = AsyncMock(return_value=media_resp)
+        media_resp.__aexit__ = AsyncMock(return_value=None)
+
+        mock_session = MagicMock()
+        mock_session.__aenter__ = AsyncMock(return_value=mock_session)
+        mock_session.__aexit__ = AsyncMock(return_value=None)
+        mock_session.post = MagicMock(side_effect=[text_resp, media_resp])
+
+        with patch("aiohttp.ClientSession", return_value=mock_session):
+            result = asyncio.run(
+                _send_discord("tok", "444", "hello", media_files=[(str(img), False)])
+            )
+
+        assert result["success"] is True
+        assert result["message_id"] == "txt_ok"
+        assert "warnings" in result
+        assert any("413" in w for w in result["warnings"])
+
+    def test_no_text_no_media_returns_error(self):
+        """Empty text with no media returns error dict."""
+        mock_session, _ = self._build_mock(200)
+        with patch("aiohttp.ClientSession", return_value=mock_session):
+            result = asyncio.run(
+                _send_discord("tok", "555", "", media_files=[])
+            )
+
+        # Text is empty but media_files is empty, so text POST fires
+        # (the "skip text if media present" condition isn't met)
+        assert result["success"] is True
+
+    def test_multiple_media_files_uploaded_separately(self, tmp_path):
+        """Each media file gets its own multipart POST."""
+        img1 = tmp_path / "a.png"
+        img1.write_bytes(b"img1")
+        img2 = tmp_path / "b.jpg"
+        img2.write_bytes(b"img2")
+
+        mock_session, _ = self._build_mock(200, {"id": "last"})
+        with patch("aiohttp.ClientSession", return_value=mock_session):
+            result = asyncio.run(
+                _send_discord("tok", "666", "hi", media_files=[
+                    (str(img1), False), (str(img2), False)
+                ])
+            )
+
+        assert result["success"] is True
+        # 1 text POST + 2 media POSTs = 3
+        assert mock_session.post.call_count == 3
+
+
+class TestSendToPlatformDiscordMedia:
+    """_send_to_platform routes Discord media correctly."""
+
+    def test_media_files_passed_on_last_chunk_only(self):
+        """Discord media_files are only passed on the final chunk."""
+        call_log = []
+
+        async def mock_send_discord(token, chat_id, message, thread_id=None, media_files=None):
+            call_log.append({"message": message, "media_files": media_files or []})
+            return {"success": True, "platform": "discord", "chat_id": chat_id, "message_id": "1"}
+
+        # A message long enough to get chunked (Discord limit is 2000)
+        long_msg = "A" * 1900 + " " + "B" * 1900
+
+        with patch("tools.send_message_tool._send_discord", side_effect=mock_send_discord):
+            result = asyncio.run(
+                _send_to_platform(
+                    Platform.DISCORD,
+                    SimpleNamespace(enabled=True, token="tok", extra={}),
+                    "999",
+                    long_msg,
+                    media_files=[("/fake/img.png", False)],
+                )
+            )
+
+        assert result["success"] is True
+        assert len(call_log) == 2  # Message was chunked
+        assert call_log[0]["media_files"] == []  # First chunk: no media
+        assert call_log[1]["media_files"] == [("/fake/img.png", False)]  # Last chunk: media attached
+
+    def test_single_chunk_gets_media(self):
+        """Short message (single chunk) gets media_files directly."""
+        send_mock = AsyncMock(return_value={"success": True, "message_id": "1"})
+
+        with patch("tools.send_message_tool._send_discord", send_mock):
+            result = asyncio.run(
+                _send_to_platform(
+                    Platform.DISCORD,
+                    SimpleNamespace(enabled=True, token="tok", extra={}),
+                    "888",
+                    "short message",
+                    media_files=[("/fake/img.png", False)],
+                )
+            )
+
+        assert result["success"] is True
+        send_mock.assert_awaited_once()
+        call_kwargs = send_mock.await_args.kwargs
+        assert call_kwargs["media_files"] == [("/fake/img.png", False)]
+
+
+class TestSendMatrixUrlEncoding:
+    """_send_matrix URL-encodes Matrix room IDs in the API path."""
+
+    def test_room_id_is_percent_encoded_in_url(self):
+        """Matrix room IDs with ! and : are percent-encoded in the PUT URL."""
+        import aiohttp
+
+        mock_resp = MagicMock()
+        mock_resp.status = 200
+        mock_resp.json = AsyncMock(return_value={"event_id": "$evt123"})
+        mock_resp.__aenter__ = AsyncMock(return_value=mock_resp)
+        mock_resp.__aexit__ = AsyncMock(return_value=None)
+
+        mock_session = MagicMock()
+        mock_session.put = MagicMock(return_value=mock_resp)
+        mock_session.__aenter__ = AsyncMock(return_value=mock_session)
+        mock_session.__aexit__ = AsyncMock(return_value=None)
+
+        with patch("aiohttp.ClientSession", return_value=mock_session):
+            from tools.send_message_tool import _send_matrix
+            result = asyncio.get_event_loop().run_until_complete(
+                _send_matrix(
+                    "test_token",
+                    {"homeserver": "https://matrix.example.org"},
+                    "!HLOQwxYGgFPMPJUSNR:matrix.org",
+                    "hello",
+                )
+            )
+
+        assert result["success"] is True
+        # Verify the URL was called with percent-encoded room ID
+        put_url = mock_session.put.call_args[0][0]
+        assert "%21HLOQwxYGgFPMPJUSNR%3Amatrix.org" in put_url
+        assert "!HLOQwxYGgFPMPJUSNR:matrix.org" not in put_url
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@@ -13,6 +13,8 @@ import sys
 from pathlib import Path
 from typing import Any, Dict, List, Optional

+from hermes_constants import display_hermes_home
+
 logger = logging.getLogger(__name__)

 # Import from cron module (will be available when properly installed)
@@ -391,6 +393,8 @@ Use action='create' to schedule a new job from a prompt or one or more skills.
 Use action='list' to inspect jobs.
 Use action='update', 'pause', 'resume', 'remove', or 'run' to manage an existing job.

+To stop a job the user no longer wants: first action='list' to find the job_id, then action='remove' with that job_id. Never guess job IDs — always list first.
+
 Jobs run in a fresh session with no current-chat context, so prompts must be self-contained.
 If skills are provided on create, the future cron run loads those skills in order, then follows the prompt as the task instruction.
 On update, passing skills=[] clears attached skills.
@@ -453,7 +457,7 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
            },
            "script": {
                "type": "string",
-                "description": "Optional path to a Python script that runs before each cron job execution. Its stdout is injected into the prompt as context. Use for data collection and change detection. Relative paths resolve under ~/.hermes/scripts/. On update, pass empty string to clear."
+                "description": f"Optional path to a Python script that runs before each cron job execution. Its stdout is injected into the prompt as context. Use for data collection and change detection. Relative paths resolve under {display_hermes_home()}/scripts/. On update, pass empty string to clear."
            },
        },
        "required": ["action"]
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@@ -68,7 +68,7 @@ SEND_MESSAGE_SCHEMA = {
            },
            "target": {
                "type": "string",
-                "description": "Delivery target. Format: 'platform' (uses home channel), 'platform:#channel-name', 'platform:chat_id', or 'platform:chat_id:thread_id' for Telegram topics and Discord threads. Examples: 'telegram', 'telegram:-1001234567890:17585', 'discord:999888777:555444333', 'discord:#bot-home', 'slack:#engineering', 'signal:+155****4567'"
+                "description": "Delivery target. Format: 'platform' (uses home channel), 'platform:#channel-name', 'platform:chat_id', or 'platform:chat_id:thread_id' for Telegram topics and Discord threads. Examples: 'telegram', 'telegram:-1001234567890:17585', 'discord:999888777:555444333', 'discord:#bot-home', 'slack:#engineering', 'signal:+155****4567', 'matrix:!roomid:server.org', 'matrix:@user:server.org'"
            },
            "message": {
                "type": "string",
@@ -248,6 +248,9 @@ def _parse_target_ref(platform_name: str, target_ref: str):
            return match.group(1), None, True
    if target_ref.lstrip("-").isdigit():
        return target_ref, None, True
+    # Matrix room IDs (start with !) and user IDs (start with @) are explicit
+    if platform_name == "matrix" and (target_ref.startswith("!") or target_ref.startswith("@")):
+        return target_ref, None, True
    return None, None, False


@@ -384,11 +387,28 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
    if platform == Platform.WEIXIN:
        return await _send_weixin(pconfig, chat_id, message, media_files=media_files)

-    # --- Non-Telegram platforms ---
+    # --- Discord: special handling for media attachments ---
+    if platform == Platform.DISCORD:
+        last_result = None
+        for i, chunk in enumerate(chunks):
+            is_last = (i == len(chunks) - 1)
+            result = await _send_discord(
+                pconfig.token,
+                chat_id,
+                chunk,
+                media_files=media_files if is_last else [],
+                thread_id=thread_id,
+            )
+            if isinstance(result, dict) and result.get("error"):
+                return result
+            last_result = result
+        return last_result
+
+    # --- Non-Telegram/Discord platforms ---
    if media_files and not message.strip():
        return {
            "error": (
-                f"send_message MEDIA delivery is currently only supported for telegram; "
+                f"send_message MEDIA delivery is currently only supported for telegram, discord, and weixin; "
                f"target {platform.value} had only media attachments"
            )
        }
@@ -396,14 +416,12 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
    if media_files:
        warning = (
            f"MEDIA attachments were omitted for {platform.value}; "
-            "native send_message media delivery is currently only supported for telegram"
+            "native send_message media delivery is currently only supported for telegram, discord, and weixin"
        )

    last_result = None
    for chunk in chunks:
-        if platform == Platform.DISCORD:
-            result = await _send_discord(pconfig.token, chat_id, chunk, thread_id=thread_id)
-        elif platform == Platform.SLACK:
+        if platform == Platform.SLACK:
            result = await _send_slack(pconfig.token, chat_id, chunk)
        elif platform == Platform.WHATSAPP:
            result = await _send_whatsapp(pconfig.extra, chat_id, chunk)
@@ -568,13 +586,16 @@ async def _send_telegram(token, chat_id, message, media_files=None, thread_id=No
        return _error(f"Telegram send failed: {e}")


-async def _send_discord(token, chat_id, message, thread_id=None):
+async def _send_discord(token, chat_id, message, thread_id=None, media_files=None):
    """Send a single message via Discord REST API (no websocket client needed).

    Chunking is handled by _send_to_platform() before this is called.

    When thread_id is provided, the message is sent directly to that thread
    via the /channels/{thread_id}/messages endpoint.
+
+    Media files are uploaded one-by-one via multipart/form-data after the
+    text message is sent (same pattern as Telegram).
    """
    try:
        import aiohttp
@@ -589,14 +610,56 @@ async def _send_discord(token, chat_id, message, thread_id=None):
            url = f"https://discord.com/api/v10/channels/{thread_id}/messages"
        else:
            url = f"https://discord.com/api/v10/channels/{chat_id}/messages"
-        headers = {"Authorization": f"Bot {token}", "Content-Type": "application/json"}
+        auth_headers = {"Authorization": f"Bot {token}"}
+        media_files = media_files or []
+        last_data = None
+        warnings = []
+
        async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=30), **_sess_kw) as session:
-            async with session.post(url, headers=headers, json={"content": message}, **_req_kw) as resp:
-                if resp.status not in (200, 201):
-                    body = await resp.text()
-                    return _error(f"Discord API error ({resp.status}): {body}")
-                data = await resp.json()
-        return {"success": True, "platform": "discord", "chat_id": chat_id, "message_id": data.get("id")}
+            # Send text message (skip if empty and media is present)
+            if message.strip() or not media_files:
+                headers = {**auth_headers, "Content-Type": "application/json"}
+                async with session.post(url, headers=headers, json={"content": message}, **_req_kw) as resp:
+                    if resp.status not in (200, 201):
+                        body = await resp.text()
+                        return _error(f"Discord API error ({resp.status}): {body}")
+                    last_data = await resp.json()
+
+            # Send each media file as a separate multipart upload
+            for media_path, _is_voice in media_files:
+                if not os.path.exists(media_path):
+                    warning = f"Media file not found, skipping: {media_path}"
+                    logger.warning(warning)
+                    warnings.append(warning)
+                    continue
+                try:
+                    form = aiohttp.FormData()
+                    filename = os.path.basename(media_path)
+                    with open(media_path, "rb") as f:
+                        form.add_field("files[0]", f, filename=filename)
+                        async with session.post(url, headers=auth_headers, data=form, **_req_kw) as resp:
+                            if resp.status not in (200, 201):
+                                body = await resp.text()
+                                warning = _sanitize_error_text(f"Failed to send media {media_path}: Discord API error ({resp.status}): {body}")
+                                logger.error(warning)
+                                warnings.append(warning)
+                                continue
+                            last_data = await resp.json()
+                except Exception as e:
+                    warning = _sanitize_error_text(f"Failed to send media {media_path}: {e}")
+                    logger.error(warning)
+                    warnings.append(warning)
+
+        if last_data is None:
+            error = "No deliverable text or media remained after processing"
+            if warnings:
+                return {"error": error, "warnings": warnings}
+            return {"error": error}
+
+        result = {"success": True, "platform": "discord", "chat_id": chat_id, "message_id": last_data.get("id")}
+        if warnings:
+            result["warnings"] = warnings
+        return result
    except Exception as e:
        return _error(f"Discord send failed: {e}")

@@ -816,7 +879,9 @@ async def _send_matrix(token, extra, chat_id, message):
        if not homeserver or not token:
            return {"error": "Matrix not configured (MATRIX_HOMESERVER, MATRIX_ACCESS_TOKEN required)"}
        txn_id = f"hermes_{int(time.time() * 1000)}_{os.urandom(4).hex()}"
-        url = f"{homeserver}/_matrix/client/v3/rooms/{chat_id}/send/m.room.message/{txn_id}"
+        from urllib.parse import quote
+        encoded_room = quote(chat_id, safe="")
+        url = f"{homeserver}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
        headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}

        # Build message payload with optional HTML formatted_body.
--- a/tools/skill_manager_tool.py
+++ b/tools/skill_manager_tool.py
@@ -39,7 +39,7 @@ import re
 import shutil
 import tempfile
 from pathlib import Path
-from hermes_constants import get_hermes_home
+from hermes_constants import get_hermes_home, display_hermes_home
 from typing import Dict, Any, Optional, Tuple

 logger = logging.getLogger(__name__)
@@ -655,7 +655,7 @@ SKILL_MANAGE_SCHEMA = {
    "description": (
        "Manage skills (create, update, delete). Skills are your procedural "
        "memory — reusable approaches for recurring task types. "
-        "New skills go to ~/.hermes/skills/; existing skills can be modified wherever they live.\n\n"
+        f"New skills go to {display_hermes_home()}/skills/; existing skills can be modified wherever they live.\n\n"
        "Actions: create (full SKILL.md + optional category), "
        "patch (old_string/new_string — preferred for fixes), "
        "edit (full SKILL.md rewrite — major overhauls only), "
--- a/tools/skills_tool.py
+++ b/tools/skills_tool.py
@@ -69,7 +69,7 @@ Usage:
 import json
 import logging

-from hermes_constants import get_hermes_home
+from hermes_constants import get_hermes_home, display_hermes_home
 import os
 import re
 from enum import Enum
@@ -408,7 +408,7 @@ def _gateway_setup_hint() -> str:

        return GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE
    except Exception:
-        return "Secure secret entry is not available. Load this skill in the local CLI to be prompted, or add the key to ~/.hermes/.env manually."
+        return f"Secure secret entry is not available. Load this skill in the local CLI to be prompted, or add the key to {display_hermes_home()}/.env manually."


 def _build_setup_note(
@@ -666,7 +666,7 @@ def skills_list(category: str = None, task_id: str = None) -> str:
                    "success": True,
                    "skills": [],
                    "categories": [],
-                    "message": "No skills found. Skills directory created at ~/.hermes/skills/",
+                    "message": f"No skills found. Skills directory created at {display_hermes_home()}/skills/",
                },
                ensure_ascii=False,
            )
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -40,6 +40,8 @@ from pathlib import Path
 from typing import Callable, Dict, Any, Optional
 from urllib.parse import urljoin

+from hermes_constants import display_hermes_home
+
 logger = logging.getLogger(__name__)
 from tools.managed_tool_gateway import resolve_managed_tool_gateway
 from tools.tool_backend_helpers import managed_nous_tools_enabled, resolve_openai_audio_api_key
@@ -1050,7 +1052,7 @@ TTS_SCHEMA = {
            },
            "output_path": {
                "type": "string",
-                "description": "Optional custom file path to save the audio. Defaults to ~/.hermes/audio_cache/<timestamp>.mp3"
+                "description": f"Optional custom file path to save the audio. Defaults to {display_hermes_home()}/audio_cache/<timestamp>.mp3"
            }
        },
        "required": ["text"]
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -49,6 +49,17 @@ The OpenAI Codex provider authenticates via device code (open a URL, enter a cod
 Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, web summarization, MoA) use a separate "auxiliary" model — by default Gemini Flash via OpenRouter. An `OPENROUTER_API_KEY` enables these tools automatically. You can also configure which model and provider these tools use — see [Auxiliary Models](/docs/user-guide/configuration#auxiliary-models).
 :::

+### Two Commands for Model Management
+
+Hermes has **two** model commands that serve different purposes:
+
+| Command | Where to run | What it does |
+|---------|-------------|--------------|
+| **`hermes model`** | Your terminal (outside any session) | Full setup wizard — add providers, run OAuth, enter API keys, configure endpoints |
+| **`/model`** | Inside a Hermes chat session | Quick switch between **already-configured** providers and models |
+
+If you're trying to switch to a provider you haven't set up yet (e.g. you only have OpenRouter configured and want to use Anthropic), you need `hermes model`, not `/model`. Exit your session first (`Ctrl+C` or `/quit`), run `hermes model`, complete the provider setup, then start a new session.
+
 ### Anthropic (Native)

 Use Claude models directly through the Anthropic API — no OpenRouter proxy needed. Supports three auth methods:
@@ -252,7 +263,15 @@ Both approaches persist to `config.yaml`, which is the source of truth for model

 ### Switching Models with `/model`

-Once a custom endpoint is configured, you can switch models mid-session:
+:::warning hermes model vs /model
+**`hermes model`** (run from your terminal, outside any chat session) is the **full provider setup wizard**. Use it to add new providers, run OAuth flows, enter API keys, and configure custom endpoints.
+
+**`/model`** (typed inside an active Hermes chat session) can only **switch between providers and models you've already set up**. It cannot add new providers, run OAuth, or prompt for API keys. If you've only configured one provider (e.g. OpenRouter), `/model` will only show models for that provider.
+
+**To add a new provider:** Exit your session (`Ctrl+C` or `/quit`), run `hermes model`, set up the new provider, then start a new session.
+:::
+
+Once you have at least one custom endpoint configured, you can switch models mid-session:

 ```
 /model custom:qwen-2.5          # Switch to a model on your custom endpoint
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -109,22 +109,31 @@ hermes chat --worktree -q "Review this repo and open a PR"

 ## `hermes model`

-Interactive provider + model selector.
+Interactive provider + model selector. **This is the command for adding new providers, setting up API keys, and running OAuth flows.** Run it from your terminal — not from inside an active Hermes chat session.

 ```bash
 hermes model
 ```

 Use this when you want to:
- switch default providers
- log into OAuth-backed providers during model selection
+- **add a new provider** (OpenRouter, Anthropic, Copilot, DeepSeek, custom, etc.)
+- log into OAuth-backed providers (Anthropic, Copilot, Codex, Nous Portal)
+- enter or update API keys
 - pick from provider-specific model lists
 - configure a custom/self-hosted endpoint
 - save the new default into config

+:::warning hermes model vs /model — know the difference
+**`hermes model`** (run from your terminal, outside any Hermes session) is the **full provider setup wizard**. It can add new providers, run OAuth flows, prompt for API keys, and configure endpoints.
+
+**`/model`** (typed inside an active Hermes chat session) can only **switch between providers and models you've already set up**. It cannot add new providers, run OAuth, or prompt for API keys.
+
+**If you need to add a new provider:** Exit your Hermes session first (`Ctrl+C` or `/quit`), then run `hermes model` from your terminal prompt.
+:::
+
 ### `/model` slash command (mid-session)

-Switch models without leaving a session:
+Switch between already-configured models without leaving a session:

 ```
 /model                              # Show current model and available options
@@ -136,6 +145,16 @@ Switch models without leaving a session:
 /model openrouter:anthropic/claude-sonnet-4  # Switch back to cloud
 ```

+By default, `/model` changes apply **to the current session only**. Add `--global` to persist the change to `config.yaml`:
+
+```
+/model claude-sonnet-4 --global     # Switch and save as new default
+```
+
+:::info What if I only see OpenRouter models?
+If you've only configured OpenRouter, `/model` will only show OpenRouter models. To add another provider (Anthropic, DeepSeek, Copilot, etc.), exit your session and run `hermes model` from the terminal.
+:::
+
 Provider and base URL changes are persisted to `config.yaml` automatically. When switching away from a custom endpoint, the stale base URL is cleared to prevent it leaking into other providers.

 ## `hermes gateway`
--- a/website/docs/reference/faq.md
+++ b/website/docs/reference/faq.md
@@ -187,6 +187,32 @@ curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scri

 ### Provider & Model Issues

+#### `/model` only shows one provider / can't switch providers
+
+**Cause:** `/model` (inside a chat session) can only switch between providers you've **already configured**. If you've only set up OpenRouter, that's all `/model` will show.
+
+**Solution:** Exit your session and use `hermes model` from your terminal to add new providers:
+
+```bash
+# Exit the Hermes chat session first (Ctrl+C or /quit)
+
+# Run the full provider setup wizard
+hermes model
+
+# This lets you: add providers, run OAuth, enter API keys, configure endpoints
+```
+
+After adding a new provider via `hermes model`, start a new chat session — `/model` will now show all your configured providers.
+
+:::tip Quick reference
+| Want to... | Use |
+|-----------|-----|
+| Add a new provider | `hermes model` (from terminal) |
+| Enter/change API keys | `hermes model` (from terminal) |
+| Switch model mid-session | `/model <name>` (inside session) |
+| Switch to different configured provider | `/model provider:model` (inside session) |
+:::
+
 #### API key not working

 **Cause:** Key is missing, expired, incorrectly set, or for the wrong provider.
--- a/website/docs/reference/slash-commands.md
+++ b/website/docs/reference/slash-commands.md
@@ -46,7 +46,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 | Command | Description |
 |---------|-------------|
 | `/config` | Show current configuration |
-| `/model [model-name]` | Show or change the current model. Supports: `/model claude-sonnet-4`, `/model provider:model` (switch providers), `/model custom:model` (custom endpoint), `/model custom:name:model` (named custom provider), `/model custom` (auto-detect from endpoint). Use `--global` to persist the change to config.yaml. |
+| `/model [model-name]` | Show or change the current model. Supports: `/model claude-sonnet-4`, `/model provider:model` (switch providers), `/model custom:model` (custom endpoint), `/model custom:name:model` (named custom provider), `/model custom` (auto-detect from endpoint). Use `--global` to persist the change to config.yaml. **Note:** `/model` can only switch between already-configured providers. To add a new provider, exit the session and run `hermes model` from your terminal. |
 | `/provider` | Show available providers and current provider |
 | `/personality` | Set a predefined personality |
 | `/verbose` | Cycle tool progress display: off → new → all → verbose. Can be [enabled for messaging](#notes) via config. |
@@ -124,7 +124,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
 | `/reset` | Reset conversation history. |
 | `/status` | Show session info. |
 | `/stop` | Kill all running background processes and interrupt the running agent. |
-| `/model [provider:model]` | Show or change the model. Supports provider switches (`/model zai:glm-5`), custom endpoints (`/model custom:model`), named custom providers (`/model custom:local:qwen`), and auto-detect (`/model custom`). Use `--global` to persist the change to config.yaml. |
+| `/model [provider:model]` | Show or change the model. Supports provider switches (`/model zai:glm-5`), custom endpoints (`/model custom:model`), named custom providers (`/model custom:local:qwen`), and auto-detect (`/model custom`). Use `--global` to persist the change to config.yaml. **Note:** `/model` can only switch between already-configured providers. To add a new provider or set up API keys, use `hermes model` from your terminal (outside the chat session). |
 | `/provider` | Show provider availability and auth status. |
 | `/personality [name]` | Set a personality overlay for the session. |
 | `/fast [normal\|fast\|status]` | Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode. |