Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor

This commit is contained in:
Brooklyn Nicholson
2026-04-15 10:21:00 -05:00
35 changed files with 2388 additions and 122 deletions

View File

@@ -17,7 +17,10 @@ Improvements over v2:
- Richer tool call/result detail in summarizer input
"""
import hashlib
import json
import logging
import re
import time
from typing import Any, Dict, List, Optional
@@ -57,6 +60,128 @@ _CHARS_PER_TOKEN = 4
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
"""Create an informative 1-line summary of a tool call + result.
Used during the pre-compression pruning pass to replace large tool
outputs with a short but useful description of what the tool did,
rather than a generic placeholder that carries zero information.
Returns strings like::
[terminal] ran `npm test` -> exit 0, 47 lines output
[read_file] read config.py from line 1 (1,200 chars)
[search_files] content search for 'compress' in agent/ -> 12 matches
"""
try:
args = json.loads(tool_args) if tool_args else {}
except (json.JSONDecodeError, TypeError):
args = {}
content = tool_content or ""
content_len = len(content)
line_count = content.count("\n") + 1 if content.strip() else 0
if tool_name == "terminal":
cmd = args.get("command", "")
if len(cmd) > 80:
cmd = cmd[:77] + "..."
exit_match = re.search(r'"exit_code"\s*:\s*(-?\d+)', content)
exit_code = exit_match.group(1) if exit_match else "?"
return f"[terminal] ran `{cmd}` -> exit {exit_code}, {line_count} lines output"
if tool_name == "read_file":
path = args.get("path", "?")
offset = args.get("offset", 1)
return f"[read_file] read {path} from line {offset} ({content_len:,} chars)"
if tool_name == "write_file":
path = args.get("path", "?")
written_lines = args.get("content", "").count("\n") + 1 if args.get("content") else "?"
return f"[write_file] wrote to {path} ({written_lines} lines)"
if tool_name == "search_files":
pattern = args.get("pattern", "?")
path = args.get("path", ".")
target = args.get("target", "content")
match_count = re.search(r'"total_count"\s*:\s*(\d+)', content)
count = match_count.group(1) if match_count else "?"
return f"[search_files] {target} search for '{pattern}' in {path} -> {count} matches"
if tool_name == "patch":
path = args.get("path", "?")
mode = args.get("mode", "replace")
return f"[patch] {mode} in {path} ({content_len:,} chars result)"
if tool_name in ("browser_navigate", "browser_click", "browser_snapshot",
"browser_type", "browser_scroll", "browser_vision"):
url = args.get("url", "")
ref = args.get("ref", "")
detail = f" {url}" if url else (f" ref={ref}" if ref else "")
return f"[{tool_name}]{detail} ({content_len:,} chars)"
if tool_name == "web_search":
query = args.get("query", "?")
return f"[web_search] query='{query}' ({content_len:,} chars result)"
if tool_name == "web_extract":
urls = args.get("urls", [])
url_desc = urls[0] if isinstance(urls, list) and urls else "?"
if isinstance(urls, list) and len(urls) > 1:
url_desc += f" (+{len(urls) - 1} more)"
return f"[web_extract] {url_desc} ({content_len:,} chars)"
if tool_name == "delegate_task":
goal = args.get("goal", "")
if len(goal) > 60:
goal = goal[:57] + "..."
return f"[delegate_task] '{goal}' ({content_len:,} chars result)"
if tool_name == "execute_code":
code_preview = (args.get("code") or "")[:60].replace("\n", " ")
if len(args.get("code", "")) > 60:
code_preview += "..."
return f"[execute_code] `{code_preview}` ({line_count} lines output)"
if tool_name in ("skill_view", "skills_list", "skill_manage"):
name = args.get("name", "?")
return f"[{tool_name}] name={name} ({content_len:,} chars)"
if tool_name == "vision_analyze":
question = args.get("question", "")[:50]
return f"[vision_analyze] '{question}' ({content_len:,} chars)"
if tool_name == "memory":
action = args.get("action", "?")
target = args.get("target", "?")
return f"[memory] {action} on {target}"
if tool_name == "todo":
return "[todo] updated task list"
if tool_name == "clarify":
return "[clarify] asked user a question"
if tool_name == "text_to_speech":
return f"[text_to_speech] generated audio ({content_len:,} chars)"
if tool_name == "cronjob":
action = args.get("action", "?")
return f"[cronjob] {action}"
if tool_name == "process":
action = args.get("action", "?")
sid = args.get("session_id", "?")
return f"[process] {action} session={sid}"
# Generic fallback
first_arg = ""
for k, v in list(args.items())[:2]:
sv = str(v)[:40]
first_arg += f" {k}={sv}"
return f"[{tool_name}]{first_arg} ({content_len:,} chars result)"
class ContextCompressor(ContextEngine):
"""Default context engine — compresses conversation context via lossy summarization.
@@ -78,6 +203,8 @@ class ContextCompressor(ContextEngine):
self._context_probed = False
self._context_probe_persistable = False
self._previous_summary = None
self._last_compression_savings_pct = 100.0
self._ineffective_compression_count = 0
def update_model(
self,
@@ -167,6 +294,9 @@ class ContextCompressor(ContextEngine):
# Stores the previous compaction summary for iterative updates
self._previous_summary: Optional[str] = None
# Anti-thrashing: track whether last compression was effective
self._last_compression_savings_pct: float = 100.0
self._ineffective_compression_count: int = 0
self._summary_failure_cooldown_until: float = 0.0
def update_from_response(self, usage: Dict[str, Any]):
@@ -175,9 +305,26 @@ class ContextCompressor(ContextEngine):
self.last_completion_tokens = usage.get("completion_tokens", 0)
def should_compress(self, prompt_tokens: int = None) -> bool:
"""Check if context exceeds the compression threshold."""
"""Check if context exceeds the compression threshold.
Includes anti-thrashing protection: if the last two compressions
each saved less than 10%, skip compression to avoid infinite loops
where each pass removes only 1-2 messages.
"""
tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
return tokens >= self.threshold_tokens
if tokens < self.threshold_tokens:
return False
# Anti-thrashing: back off if recent compressions were ineffective
if self._ineffective_compression_count >= 2:
if not self.quiet_mode:
logger.warning(
"Compression skipped — last %d compressions saved <10%% each. "
"Consider /new to start a fresh session, or /compress <topic> "
"for focused compression.",
self._ineffective_compression_count,
)
return False
return True
# ------------------------------------------------------------------
# Tool output pruning (cheap pre-pass, no LLM call)
@@ -187,7 +334,16 @@ class ContextCompressor(ContextEngine):
self, messages: List[Dict[str, Any]], protect_tail_count: int,
protect_tail_tokens: int | None = None,
) -> tuple[List[Dict[str, Any]], int]:
"""Replace old tool result contents with a short placeholder.
"""Replace old tool result contents with informative 1-line summaries.
Instead of a generic placeholder, generates a summary like::
[terminal] ran `npm test` -> exit 0, 47 lines output
[read_file] read config.py from line 1 (3,400 chars)
Also deduplicates identical tool results (e.g. reading the same file
5x keeps only the newest full copy) and truncates large tool_call
arguments in assistant messages outside the protected tail.
Walks backward from the end, protecting the most recent messages that
fall within ``protect_tail_tokens`` (when provided) OR the last
@@ -203,6 +359,22 @@ class ContextCompressor(ContextEngine):
result = [m.copy() for m in messages]
pruned = 0
# Build index: tool_call_id -> (tool_name, arguments_json)
call_id_to_tool: Dict[str, tuple] = {}
for msg in result:
if msg.get("role") == "assistant":
for tc in msg.get("tool_calls") or []:
if isinstance(tc, dict):
cid = tc.get("id", "")
fn = tc.get("function", {})
call_id_to_tool[cid] = (fn.get("name", "unknown"), fn.get("arguments", ""))
else:
cid = getattr(tc, "id", "") or ""
fn = getattr(tc, "function", None)
name = getattr(fn, "name", "unknown") if fn else "unknown"
args_str = getattr(fn, "arguments", "") if fn else ""
call_id_to_tool[cid] = (name, args_str)
# Determine the prune boundary
if protect_tail_tokens is not None and protect_tail_tokens > 0:
# Token-budget approach: walk backward accumulating tokens
@@ -211,7 +383,8 @@ class ContextCompressor(ContextEngine):
min_protect = min(protect_tail_count, len(result) - 1)
for i in range(len(result) - 1, -1, -1):
msg = result[i]
content_len = len(msg.get("content") or "")
raw_content = msg.get("content") or ""
content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
msg_tokens = content_len // _CHARS_PER_TOKEN + 10
for tc in msg.get("tool_calls") or []:
if isinstance(tc, dict):
@@ -226,18 +399,69 @@ class ContextCompressor(ContextEngine):
else:
prune_boundary = len(result) - protect_tail_count
# Pass 1: Deduplicate identical tool results.
# When the same file is read multiple times, keep only the most recent
# full copy and replace older duplicates with a back-reference.
content_hashes: dict = {} # hash -> (index, tool_call_id)
for i in range(len(result) - 1, -1, -1):
msg = result[i]
if msg.get("role") != "tool":
continue
content = msg.get("content") or ""
# Skip multimodal content (list of content blocks)
if isinstance(content, list):
continue
if len(content) < 200:
continue
h = hashlib.md5(content.encode("utf-8", errors="replace")).hexdigest()[:12]
if h in content_hashes:
# This is an older duplicate — replace with back-reference
result[i] = {**msg, "content": "[Duplicate tool output — same content as a more recent call]"}
pruned += 1
else:
content_hashes[h] = (i, msg.get("tool_call_id", "?"))
# Pass 2: Replace old tool results with informative summaries
for i in range(prune_boundary):
msg = result[i]
if msg.get("role") != "tool":
continue
content = msg.get("content", "")
# Skip multimodal content (list of content blocks)
if isinstance(content, list):
continue
if not content or content == _PRUNED_TOOL_PLACEHOLDER:
continue
# Skip already-deduplicated or previously-summarized results
if content.startswith("[Duplicate tool output"):
continue
# Only prune if the content is substantial (>200 chars)
if len(content) > 200:
result[i] = {**msg, "content": _PRUNED_TOOL_PLACEHOLDER}
call_id = msg.get("tool_call_id", "")
tool_name, tool_args = call_id_to_tool.get(call_id, ("unknown", ""))
summary = _summarize_tool_result(tool_name, tool_args, content)
result[i] = {**msg, "content": summary}
pruned += 1
# Pass 3: Truncate large tool_call arguments in assistant messages
# outside the protected tail. write_file with 50KB content, for
# example, survives pruning entirely without this.
for i in range(prune_boundary):
msg = result[i]
if msg.get("role") != "assistant" or not msg.get("tool_calls"):
continue
new_tcs = []
modified = False
for tc in msg["tool_calls"]:
if isinstance(tc, dict):
args = tc.get("function", {}).get("arguments", "")
if len(args) > 500:
tc = {**tc, "function": {**tc["function"], "arguments": args[:200] + "...[truncated]"}}
modified = True
new_tcs.append(tc)
if modified:
result[i] = {**msg, "tool_calls": new_tcs}
return result, pruned
# ------------------------------------------------------------------
@@ -357,29 +581,37 @@ class ContextCompressor(ContextEngine):
)
# Shared structured template (used by both paths).
# Key changes vs v1:
# - "Pending User Asks" section (from Claude Code) explicitly tracks
# unanswered questions so the model knows what's resolved vs open
# - "Remaining Work" replaces "Next Steps" to avoid reading as active
# instructions
# - "Resolved Questions" makes it clear which questions were already
# answered (prevents model from re-answering them)
_template_sections = f"""## Goal
[What the user is trying to accomplish]
## Constraints & Preferences
[User preferences, coding style, constraints, important decisions]
## Progress
### Done
[Completed work — include specific file paths, commands run, results obtained]
### In Progress
[Work currently underway]
### Blocked
[Any blockers or issues encountered]
## Completed Actions
[Numbered list of concrete actions taken — include tool used, target, and outcome.
Format each as: N. ACTION target — outcome [tool: name]
Example:
1. READ config.py:45 — found `==` should be `!=` [tool: read_file]
2. PATCH config.py:45 — changed `==` to `!=` [tool: patch]
3. TEST `pytest tests/` — 3/50 failed: test_parse, test_validate, test_edge [tool: terminal]
Be specific with file paths, commands, line numbers, and results.]
## Active State
[Current working state — include:
- Working directory and branch (if applicable)
- Modified/created files with brief note on each
- Test status (X/Y passing)
- Any running processes or servers
- Environment details that matter]
## In Progress
[Work currently underway — what was being done when compaction fired]
## Blocked
[Any blockers, errors, or issues not yet resolved. Include exact error messages.]
## Key Decisions
[Important technical decisions and why they were made]
[Important technical decisions and WHY they were made]
## Resolved Questions
[Questions the user asked that were ALREADY answered — include the answer so the next assistant does not re-answer them]
@@ -396,10 +628,7 @@ class ContextCompressor(ContextEngine):
## Critical Context
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
## Tools & Patterns
[Which tools were used, how they were used effectively, and any tool-specific discoveries]
Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions.
Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.
Write only the summary body. Do not include any preamble or prefix."""
@@ -415,7 +644,7 @@ PREVIOUS SUMMARY:
NEW TURNS TO INCORPORATE:
{content_to_summarize}
Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new progress. Move items from "In Progress" to "Done" when completed. Move answered questions to "Resolved Questions". Remove information only if it is clearly obsolete.
Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete.
{_template_sections}"""
else:
@@ -450,7 +679,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
"api_mode": self.api_mode,
},
"messages": [{"role": "user", "content": prompt}],
"max_tokens": summary_budget * 2,
"max_tokens": int(summary_budget * 1.3),
# timeout resolved from auxiliary.compression.timeout config by call_llm
}
if self.summary_model:
@@ -464,8 +693,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Store for iterative updates on next compaction
self._previous_summary = summary
self._summary_failure_cooldown_until = 0.0
self._summary_model_fallen_back = False
return self._with_summary_prefix(summary)
except RuntimeError:
# No provider configured — long cooldown, unlikely to self-resolve
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
logging.warning("Context compression: no provider available for "
"summary. Middle turns will be dropped without summary "
@@ -473,12 +704,42 @@ The user has requested that this compaction PRIORITISE preserving all informatio
_SUMMARY_FAILURE_COOLDOWN_SECONDS)
return None
except Exception as e:
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
# If the summary model is different from the main model and the
# error looks permanent (model not found, 503, 404), fall back to
# using the main model instead of entering cooldown that leaves
# context growing unbounded. (#8620 sub-issue 4)
_status = getattr(e, "status_code", None) or getattr(getattr(e, "response", None), "status_code", None)
_err_str = str(e).lower()
_is_model_not_found = (
_status in (404, 503)
or "model_not_found" in _err_str
or "does not exist" in _err_str
or "no available channel" in _err_str
)
if (
_is_model_not_found
and self.summary_model
and self.summary_model != self.model
and not getattr(self, "_summary_model_fallen_back", False)
):
self._summary_model_fallen_back = True
logging.warning(
"Summary model '%s' not available (%s). "
"Falling back to main model '%s' for compression.",
self.summary_model, e, self.model,
)
self.summary_model = "" # empty = use main model
self._summary_failure_cooldown_until = 0.0 # no cooldown
return self._generate_summary(messages, summary_budget) # retry immediately
# Transient errors (timeout, rate limit, network) — shorter cooldown
_transient_cooldown = 60
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
logging.warning(
"Failed to generate context summary: %s. "
"Further summary attempts paused for %d seconds.",
e,
_SUMMARY_FAILURE_COOLDOWN_SECONDS,
_transient_cooldown,
)
return None
@@ -744,11 +1005,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
compressed = []
for i in range(compress_start):
msg = messages[i].copy()
if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
msg["content"] = (
(msg.get("content") or "")
+ "\n\n[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
)
if i == 0 and msg.get("role") == "system":
existing = msg.get("content") or ""
_compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
if _compression_note not in existing:
msg["content"] = existing + "\n\n" + _compression_note
compressed.append(msg)
# If LLM summary failed, insert a static fallback so the model
@@ -806,14 +1067,24 @@ The user has requested that this compaction PRIORITISE preserving all informatio
compressed = self._sanitize_tool_pairs(compressed)
new_estimate = estimate_messages_tokens_rough(compressed)
saved_estimate = display_tokens - new_estimate
# Anti-thrashing: track compression effectiveness
savings_pct = (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0
self._last_compression_savings_pct = savings_pct
if savings_pct < 10:
self._ineffective_compression_count += 1
else:
self._ineffective_compression_count = 0
if not self.quiet_mode:
new_estimate = estimate_messages_tokens_rough(compressed)
saved_estimate = display_tokens - new_estimate
logger.info(
"Compressed: %d -> %d messages (~%d tokens saved)",
"Compressed: %d -> %d messages (~%d tokens saved, %.0f%%)",
n_messages,
len(compressed),
saved_estimate,
savings_pct,
)
logger.info("Compression #%d complete", self.compression_count)

View File

@@ -12,6 +12,8 @@ from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Optional
from hermes_constants import display_hermes_home
logger = logging.getLogger(__name__)
_skill_commands: Dict[str, Dict[str, Any]] = {}
@@ -108,7 +110,7 @@ def _inject_skill_config(loaded_skill: dict[str, Any], parts: list[str]) -> None
if not resolved:
return
lines = ["", "[Skill config (from ~/.hermes/config.yaml):"]
lines = ["", f"[Skill config (from {display_hermes_home()}/config.yaml):"]
for key, value in resolved.items():
display_val = str(value) if value else "(not set)"
lines.append(f" {key} = {display_val}")

23
cli.py
View File

@@ -4692,16 +4692,19 @@ class HermesCLI:
self._close_model_picker()
return
provider_data = providers[selected]
model_list = []
try:
from hermes_cli.models import provider_model_ids
live = provider_model_ids(provider_data["slug"])
if live:
model_list = live
except Exception:
pass
# Use the curated model list from list_authenticated_providers()
# (same lists as `hermes model` and gateway pickers).
# Only fall back to the live provider catalog when the curated
# list is empty (e.g. user-defined endpoints with no curated list).
model_list = provider_data.get("models", [])
if not model_list:
model_list = provider_data.get("models", [])
try:
from hermes_cli.models import provider_model_ids
live = provider_model_ids(provider_data["slug"])
if live:
model_list = live
except Exception:
pass
state["stage"] = "model"
state["provider_data"] = provider_data
state["model_list"] = model_list
@@ -6061,7 +6064,7 @@ class HermesCLI:
parts = cmd.strip().split(None, 1)
sub = parts[1].lower().strip() if len(parts) > 1 else "status"
_DEFAULT_CDP = "http://localhost:9222"
_DEFAULT_CDP = "http://127.0.0.1:9222"
current = os.environ.get("BROWSER_CDP_URL", "").strip()
if sub.startswith("connect"):

View File

@@ -288,11 +288,13 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
if wrap_response:
task_name = job.get("name", job["id"])
job_id = job.get("id", "")
delivery_content = (
f"Cronjob Response: {task_name}\n"
f"(job_id: {job_id})\n"
f"-------------\n\n"
f"{content}\n\n"
f"Note: The agent cannot see this message, and therefore cannot respond to it."
f"To stop or manage this job, send me a new message (e.g. \"stop reminder {task_name}\")."
)
else:
delivery_content = content

View File

@@ -1635,6 +1635,21 @@ class BasePlatformAdapter(ABC):
# streaming already delivered the text (already_sent=True) or
# when the message was queued behind an active agent. Log at
# DEBUG to avoid noisy warnings for expected behavior.
#
# Suppress stale response when the session was interrupted by a
# new message that hasn't been consumed yet. The pending message
# is processed by the pending-message handler below (#8221/#2483).
if (
response
and interrupt_event.is_set()
and session_key in self._pending_messages
):
logger.info(
"[%s] Suppressing stale response for interrupted session %s",
self.name,
session_key,
)
response = None
if not response:
logger.debug("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
if response:

View File

@@ -1379,6 +1379,68 @@ class DiscordAdapter(BasePlatformAdapter):
)
return await super().send_image(chat_id, image_url, caption, reply_to)
async def send_animation(
self,
chat_id: str,
animation_url: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
"""Send an animated GIF natively as a Discord file attachment."""
if not self._client:
return SendResult(success=False, error="Not connected")
if not is_safe_url(animation_url):
logger.warning("[%s] Blocked unsafe animation URL during Discord send_animation", self.name)
return await super().send_animation(chat_id, animation_url, caption, reply_to, metadata=metadata)
try:
import aiohttp
channel = self._client.get_channel(int(chat_id))
if not channel:
channel = await self._client.fetch_channel(int(chat_id))
if not channel:
return SendResult(success=False, error=f"Channel {chat_id} not found")
# Download the GIF and send as a Discord file attachment
# (Discord renders .gif attachments as auto-playing animations inline)
from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
_proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
_sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy)
async with aiohttp.ClientSession(**_sess_kw) as session:
async with session.get(animation_url, timeout=aiohttp.ClientTimeout(total=30), **_req_kw) as resp:
if resp.status != 200:
raise Exception(f"Failed to download animation: HTTP {resp.status}")
animation_data = await resp.read()
import io
file = discord.File(io.BytesIO(animation_data), filename="animation.gif")
msg = await channel.send(
content=caption if caption else None,
file=file,
)
return SendResult(success=True, message_id=str(msg.id))
except ImportError:
logger.warning(
"[%s] aiohttp not installed, falling back to URL. Run: pip install aiohttp",
self.name,
exc_info=True,
)
return await super().send_animation(chat_id, animation_url, caption, reply_to, metadata=metadata)
except Exception as e: # pragma: no cover - defensive logging
logger.error(
"[%s] Failed to send animation attachment, falling back to URL: %s",
self.name,
e,
exc_info=True,
)
return await super().send_animation(chat_id, animation_url, caption, reply_to, metadata=metadata)
async def send_video(
self,
chat_id: str,

View File

@@ -729,6 +729,14 @@ class MatrixAdapter(BasePlatformAdapter):
except Exception:
pass
async def stop_typing(self, chat_id: str) -> None:
"""Stop the Matrix typing indicator."""
if self._client:
try:
await self._client.set_typing(RoomID(chat_id), timeout=0)
except Exception:
pass
async def edit_message(
self, chat_id: str, message_id: str, content: str
) -> SendResult:

View File

@@ -573,6 +573,7 @@ class GatewayRunner:
self._running_agents: Dict[str, Any] = {}
self._running_agents_ts: Dict[str, float] = {} # start timestamp per session
self._pending_messages: Dict[str, str] = {} # Queued messages during interrupt
self._busy_ack_ts: Dict[str, float] = {} # last busy-ack timestamp per session (debounce)
# Cache AIAgent instances per session to preserve prompt caching.
# Without this, a new AIAgent is created per message, rebuilding the
@@ -1329,26 +1330,100 @@ class GatewayRunner:
merge_pending_message_event(adapter._pending_messages, session_key, event)
async def _handle_active_session_busy_message(self, event: MessageEvent, session_key: str) -> bool:
if not self._draining:
return False
# --- Draining case (gateway restarting/stopping) ---
if self._draining:
adapter = self.adapters.get(event.source.platform)
if not adapter:
return True
thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
if self._queue_during_drain_enabled():
self._queue_or_replace_pending_event(session_key, event)
message = f"⏳ Gateway {self._status_action_gerund()} — queued for the next turn after it comes back."
else:
message = f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
await adapter._send_with_retry(
chat_id=event.source.chat_id,
content=message,
reply_to=event.message_id,
metadata=thread_meta,
)
return True
# --- Normal busy case (agent actively running a task) ---
# The user sent a message while the agent is working. Interrupt the
# agent immediately so it stops the current tool-calling loop and
# processes the new message. The pending message is stored in the
# adapter so the base adapter picks it up once the interrupted run
# returns. A brief ack tells the user what's happening (debounced
# to avoid spam when they fire multiple messages quickly).
adapter = self.adapters.get(event.source.platform)
if not adapter:
return True
return False # let default path handle it
# Store the message so it's processed as the next turn after the
# interrupt causes the current run to exit.
from gateway.platforms.base import merge_pending_message_event
merge_pending_message_event(adapter._pending_messages, session_key, event)
# Interrupt the running agent — this aborts in-flight tool calls and
# causes the agent loop to exit at the next check point.
running_agent = self._running_agents.get(session_key)
if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
try:
running_agent.interrupt(event.text)
except Exception:
pass # don't let interrupt failure block the ack
# Debounce: only send an acknowledgment once every 30 seconds per session
# to avoid spamming the user when they send multiple messages quickly
_BUSY_ACK_COOLDOWN = 30
now = time.time()
last_ack = self._busy_ack_ts.get(session_key, 0)
if now - last_ack < _BUSY_ACK_COOLDOWN:
return True # interrupt sent, ack already delivered recently
self._busy_ack_ts[session_key] = now
# Build a status-rich acknowledgment
status_parts = []
if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
try:
summary = running_agent.get_activity_summary()
iteration = summary.get("api_call_count", 0)
max_iter = summary.get("max_iterations", 0)
current_tool = summary.get("current_tool")
start_ts = self._running_agents_ts.get(session_key, 0)
if start_ts:
elapsed_min = int((now - start_ts) / 60)
if elapsed_min > 0:
status_parts.append(f"{elapsed_min} min elapsed")
if max_iter:
status_parts.append(f"iteration {iteration}/{max_iter}")
if current_tool:
status_parts.append(f"running: {current_tool}")
except Exception:
pass
status_detail = f" ({', '.join(status_parts)})" if status_parts else ""
message = (
f"⚡ Interrupting current task{status_detail}. "
f"I'll respond to your message shortly."
)
thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
if self._queue_during_drain_enabled():
self._queue_or_replace_pending_event(session_key, event)
message = f"⏳ Gateway {self._status_action_gerund()} — queued for the next turn after it comes back."
else:
message = f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
try:
await adapter._send_with_retry(
chat_id=event.source.chat_id,
content=message,
reply_to=event.message_id,
metadata=thread_meta,
)
except Exception as e:
logger.debug("Failed to send busy-ack: %s", e)
await adapter._send_with_retry(
chat_id=event.source.chat_id,
content=message,
reply_to=event.message_id,
metadata=thread_meta,
)
return True
async def _drain_active_agents(self, timeout: float) -> tuple[Dict[str, Any], bool]:
@@ -2237,6 +2312,8 @@ class GatewayRunner:
self._running_agents.clear()
self._pending_messages.clear()
self._pending_approvals.clear()
if hasattr(self, '_busy_ack_ts'):
self._busy_ack_ts.clear()
self._shutdown_event.set()
# Global cleanup: kill any remaining tool subprocesses not tied
@@ -2721,6 +2798,7 @@ class GatewayRunner:
)
del self._running_agents[_quick_key]
self._running_agents_ts.pop(_quick_key, None)
self._busy_ack_ts.pop(_quick_key, None)
if _quick_key in self._running_agents:
if event.get_command() == "status":
@@ -8477,6 +8555,12 @@ class GatewayRunner:
cached = _cache.get(session_key)
if cached and cached[1] == _sig:
agent = cached[0]
# Reset activity timestamp so the inactivity timeout
# handler doesn't see stale idle time from the previous
# turn and immediately kill this agent. (#9051)
agent._last_activity_ts = time.time()
agent._last_activity_desc = "starting new turn (cached)"
agent._api_call_count = 0
logger.debug("Reusing cached agent for session %s", session_key)
if agent is None:
@@ -9244,15 +9328,11 @@ class GatewayRunner:
pass
except Exception as e:
logger.debug("Stream consumer wait before queued message failed: %s", e)
_response_previewed = bool(result.get("response_previewed"))
_already_streamed = bool(
_sc
and (
getattr(_sc, "final_response_sent", False)
or (
_response_previewed
and getattr(_sc, "already_sent", False)
)
or getattr(_sc, "already_sent", False)
)
)
first_response = result.get("final_response", "")
@@ -9336,13 +9416,9 @@ class GatewayRunner:
# them even if streaming had sent earlier partial output.
_sc = stream_consumer_holder[0]
if _sc and isinstance(response, dict) and not response.get("failed"):
_response_previewed = bool(response.get("response_previewed"))
if (
getattr(_sc, "final_response_sent", False)
or (
_response_previewed
and getattr(_sc, "already_sent", False)
)
or getattr(_sc, "already_sent", False)
):
response["already_sent"] = True

View File

@@ -8,6 +8,7 @@ import os
import sys
import subprocess
import shutil
from pathlib import Path
from hermes_cli.config import get_project_root, get_hermes_home, get_env_path
from hermes_constants import display_hermes_home
@@ -513,7 +514,87 @@ def run_doctor(args):
pass
_check_gateway_service_linger(issues)
# =========================================================================
# Check: Command installation (hermes bin symlink)
# =========================================================================
if sys.platform != "win32":
print()
print(color("◆ Command Installation", Colors.CYAN, Colors.BOLD))
# Determine the venv entry point location
_venv_bin = None
for _venv_name in ("venv", ".venv"):
_candidate = PROJECT_ROOT / _venv_name / "bin" / "hermes"
if _candidate.exists():
_venv_bin = _candidate
break
# Determine the expected command link directory (mirrors install.sh logic)
_prefix = os.environ.get("PREFIX", "")
_is_termux_env = bool(os.environ.get("TERMUX_VERSION")) or "com.termux/files/usr" in _prefix
if _is_termux_env and _prefix:
_cmd_link_dir = Path(_prefix) / "bin"
_cmd_link_display = "$PREFIX/bin"
else:
_cmd_link_dir = Path.home() / ".local" / "bin"
_cmd_link_display = "~/.local/bin"
_cmd_link = _cmd_link_dir / "hermes"
if _venv_bin is None:
check_warn(
"Venv entry point not found",
"(hermes not in venv/bin/ or .venv/bin/ — reinstall with pip install -e '.[all]')"
)
manual_issues.append(
f"Reinstall entry point: cd {PROJECT_ROOT} && source venv/bin/activate && pip install -e '.[all]'"
)
else:
check_ok(f"Venv entry point exists ({_venv_bin.relative_to(PROJECT_ROOT)})")
# Check the symlink at the command link location
if _cmd_link.is_symlink():
_target = _cmd_link.resolve()
_expected = _venv_bin.resolve()
if _target == _expected:
check_ok(f"{_cmd_link_display}/hermes → correct target")
else:
check_warn(
f"{_cmd_link_display}/hermes points to wrong target",
f"(→ {_target}, expected → {_expected})"
)
if should_fix:
_cmd_link.unlink()
_cmd_link.symlink_to(_venv_bin)
check_ok(f"Fixed symlink: {_cmd_link_display}/hermes → {_venv_bin}")
fixed_count += 1
else:
issues.append(f"Broken symlink at {_cmd_link_display}/hermes — run 'hermes doctor --fix'")
elif _cmd_link.exists():
# It's a regular file, not a symlink — possibly a wrapper script
check_ok(f"{_cmd_link_display}/hermes exists (non-symlink)")
else:
check_fail(
f"{_cmd_link_display}/hermes not found",
"(hermes command may not work outside the venv)"
)
if should_fix:
_cmd_link_dir.mkdir(parents=True, exist_ok=True)
_cmd_link.symlink_to(_venv_bin)
check_ok(f"Created symlink: {_cmd_link_display}/hermes → {_venv_bin}")
fixed_count += 1
# Check if the link dir is on PATH
_path_dirs = os.environ.get("PATH", "").split(os.pathsep)
if str(_cmd_link_dir) not in _path_dirs:
check_warn(
f"{_cmd_link_display} is not on your PATH",
"(add it to your shell config: export PATH=\"$HOME/.local/bin:$PATH\")"
)
manual_issues.append(f"Add {_cmd_link_display} to your PATH")
else:
issues.append(f"Missing {_cmd_link_display}/hermes symlink — run 'hermes doctor --fix'")
# =========================================================================
# Check: External tools
# =========================================================================

View File

@@ -715,7 +715,9 @@ def _detect_venv_dir() -> Path | None:
"""Detect the active virtualenv directory.
Checks ``sys.prefix`` first (works regardless of the directory name),
then falls back to probing common directory names under PROJECT_ROOT.
then ``VIRTUAL_ENV`` env var (covers uv-managed environments where
sys.prefix == sys.base_prefix), then falls back to probing common
directory names under PROJECT_ROOT.
Returns ``None`` when no virtualenv can be found.
"""
# If we're running inside a virtualenv, sys.prefix points to it.
@@ -724,6 +726,15 @@ def _detect_venv_dir() -> Path | None:
if venv.is_dir():
return venv
# uv and some other tools set VIRTUAL_ENV without changing sys.prefix.
# This catches `uv run` where sys.prefix == sys.base_prefix but the
# environment IS a venv. (#8620)
_virtual_env = os.environ.get("VIRTUAL_ENV")
if _virtual_env:
venv = Path(_virtual_env)
if venv.is_dir():
return venv
# Fallback: check common virtualenv directory names under the project root.
for candidate in (".venv", "venv"):
venv = PROJECT_ROOT / candidate

View File

@@ -6320,7 +6320,37 @@ Examples:
sys.exit(1)
_processed_argv = _coalesce_session_name_args(sys.argv[1:])
args = parser.parse_args(_processed_argv)
# ── Defensive subparser routing (bpo-9338 workaround) ───────────
# On some Python versions (notably <3.11), argparse fails to route
# subcommand tokens when the parent parser has nargs='?' optional
# arguments (--continue). The symptom: "unrecognized arguments: model"
# even though 'model' is a registered subcommand.
#
# Fix: when argv contains a token matching a known subcommand, set
# subparsers.required=True to force deterministic routing. If that
# fails (e.g. 'hermes -c model' where 'model' is consumed as the
# session name for --continue), fall back to the default behaviour.
import io as _io
_known_cmds = set(subparsers.choices.keys()) if hasattr(subparsers, "choices") else set()
_has_cmd_token = any(t in _known_cmds for t in _processed_argv if not t.startswith("-"))
if _has_cmd_token:
subparsers.required = True
_saved_stderr = sys.stderr
try:
sys.stderr = _io.StringIO()
args = parser.parse_args(_processed_argv)
sys.stderr = _saved_stderr
except SystemExit:
sys.stderr = _saved_stderr
# Subcommand name was consumed as a flag value (e.g. -c model).
# Fall back to optional subparsers so argparse handles it normally.
subparsers.required = False
args = parser.parse_args(_processed_argv)
else:
subparsers.required = False
args = parser.parse_args(_processed_argv)
# Handle --version flag
if args.version:

View File

@@ -63,6 +63,7 @@ CONFIGURABLE_TOOLSETS = [
("clarify", "❓ Clarifying Questions", "clarify"),
("delegation", "👥 Task Delegation", "delegate_task"),
("cronjob", "⏰ Cron Jobs", "create/list/update/pause/resume/run, with optional attached skills"),
("messaging", "📨 Cross-Platform Messaging", "send_message"),
("rl", "🧪 RL Training", "Tinker-Atropos training tools"),
("homeassistant", "🏠 Home Assistant", "smart home device control"),
]

View File

@@ -1268,6 +1268,19 @@ class AIAgent:
try:
_config_context_length = int(_config_context_length)
except (TypeError, ValueError):
logger.warning(
"Invalid model.context_length in config.yaml: %r"
"must be a plain integer (e.g. 256000, not '256K'). "
"Falling back to auto-detection.",
_config_context_length,
)
import sys
print(
f"\n⚠ Invalid model.context_length in config.yaml: {_config_context_length!r}\n"
f" Must be a plain integer (e.g. 256000, not '256K').\n"
f" Falling back to auto-detected context window.\n",
file=sys.stderr,
)
_config_context_length = None
# Store for reuse in switch_model (so config override persists across model switches)
@@ -1296,7 +1309,20 @@ class AIAgent:
try:
_config_context_length = int(_cp_ctx)
except (TypeError, ValueError):
pass
logger.warning(
"Invalid context_length for model %r in "
"custom_providers: %r — must be a plain "
"integer (e.g. 256000, not '256K'). "
"Falling back to auto-detection.",
self.model, _cp_ctx,
)
import sys
print(
f"\n⚠ Invalid context_length for model {self.model!r} in custom_providers: {_cp_ctx!r}\n"
f" Must be a plain integer (e.g. 256000, not '256K').\n"
f" Falling back to auto-detected context window.\n",
file=sys.stderr,
)
break
# Select context engine: config-driven (like memory providers).
@@ -3563,7 +3589,12 @@ class AIAgent:
item_id = ri.get("id")
if item_id and item_id in seen_item_ids:
continue
items.append(ri)
# Strip the "id" field — with store=False the
# Responses API cannot look up items by ID and
# returns 404. The encrypted_content blob is
# self-contained for reasoning chain continuity.
replay_item = {k: v for k, v in ri.items() if k != "id"}
items.append(replay_item)
if item_id:
seen_item_ids.add(item_id)
has_codex_reasoning = True
@@ -3704,8 +3735,10 @@ class AIAgent:
continue
seen_ids.add(item_id)
reasoning_item = {"type": "reasoning", "encrypted_content": encrypted}
if isinstance(item_id, str) and item_id:
reasoning_item["id"] = item_id
# Do NOT include the "id" in the outgoing item — with
# store=False (our default) the API tries to resolve the
# id server-side and returns 404. The id is still used
# above for local deduplication via seen_ids.
summary = item.get("summary")
if isinstance(summary, list):
reasoning_item["summary"] = summary
@@ -7833,6 +7866,7 @@ class AIAgent:
self._incomplete_scratchpad_retries = 0
self._codex_incomplete_retries = 0
self._thinking_prefill_retries = 0
self._post_tool_empty_retried = False
self._last_content_with_tools = None
self._mute_post_response = False
self._unicode_sanitization_passes = 0
@@ -9011,6 +9045,11 @@ class AIAgent:
self.api_key = _clean_key
if isinstance(getattr(self, "_client_kwargs", None), dict):
self._client_kwargs["api_key"] = _clean_key
# Also update the live client — it holds its
# own copy of api_key which auth_headers reads
# dynamically on every request.
if getattr(self, "client", None) is not None and hasattr(self.client, "api_key"):
self.client.api_key = _clean_key
_credential_sanitized = True
self._vprint(
f"{self.log_prefix}⚠️ API key contained non-ASCII characters "
@@ -10106,6 +10145,10 @@ class AIAgent:
if _had_prefill:
self._thinking_prefill_retries = 0
self._empty_content_retries = 0
# Successful tool execution — reset the post-tool nudge
# flag so it can fire again if the model goes empty on
# a LATER tool round.
self._post_tool_empty_retried = False
messages.append(assistant_msg)
self._emit_interim_assistant_message(assistant_msg)
@@ -10274,6 +10317,48 @@ class AIAgent:
self._response_was_previewed = True
break
# ── Post-tool-call empty response nudge ───────────
# The model returned empty after executing tool calls
# but there's no prior-turn content to fall back on.
# Instead of giving up, nudge the model to continue by
# appending a user-level hint. This is the #9400 case:
# weaker models (GLM-5, etc.) sometimes return empty
# after tool results instead of continuing to the next
# step. One retry with a nudge usually fixes it.
_prior_was_tool = any(
m.get("role") == "tool"
for m in messages[-5:] # check recent messages
)
if (
_prior_was_tool
and not getattr(self, "_post_tool_empty_retried", False)
):
self._post_tool_empty_retried = True
logger.info(
"Empty response after tool calls — nudging model "
"to continue processing"
)
self._emit_status(
"⚠️ Model returned empty after tool calls — "
"nudging to continue"
)
# Append the empty assistant message first so the
# message sequence stays valid:
# tool(result) → assistant("(empty)") → user(nudge)
# Without this, we'd have tool → user which most
# APIs reject as an invalid sequence.
assistant_msg["content"] = "(empty)"
messages.append(assistant_msg)
messages.append({
"role": "user",
"content": (
"You just executed tool calls but returned an "
"empty response. Please process the tool "
"results above and continue with the task."
),
})
continue
# ── Thinking-only prefill continuation ──────────
# The model produced structured reasoning (via API
# fields) but no visible text content. Rather than

View File

@@ -98,7 +98,7 @@ def find_nearby(lat: float, lon: float, types: list[str], radius: int = 1500, li
# Get coordinates (nodes have lat/lon directly, ways/relations use center)
plat = el.get("lat") or (el.get("center", {}) or {}).get("lat")
plon = el.get("lon") or (el.get("center", {}) or {}).get("lon")
if not plat or not plon:
if plat is None or plon is None:
continue
dist = haversine(lat, lon, plat, plon)

View File

@@ -25,6 +25,13 @@ def refresh_token(token_data: dict) -> dict:
import urllib.parse
import urllib.request
required_keys = ["client_id", "client_secret", "refresh_token", "token_uri"]
missing = [k for k in required_keys if k not in token_data]
if missing:
print(f"ERROR: google_token.json is missing required fields: {', '.join(missing)}", file=sys.stderr)
print("Please re-authenticate by running the Google Workspace setup script.", file=sys.stderr)
sys.exit(1)
params = urllib.parse.urlencode({
"client_id": token_data["client_id"],
"client_secret": token_data["client_secret"],

View File

@@ -233,9 +233,10 @@ class TestDeliverResultWrapping:
send_mock.assert_called_once()
sent_content = send_mock.call_args.kwargs.get("content") or send_mock.call_args[0][-1]
assert "Cronjob Response: daily-report" in sent_content
assert "(job_id: test-job)" in sent_content
assert "-------------" in sent_content
assert "Here is today's summary." in sent_content
assert "The agent cannot see this message" in sent_content
assert "To stop or manage this job" in sent_content
def test_delivery_uses_job_id_when_no_name(self):
"""When a job has no name, the wrapper should fall back to job id."""

View File

@@ -0,0 +1,293 @@
"""Tests for busy-session acknowledgment when user sends messages during active agent runs.
Verifies that users get an immediate status response instead of total silence
when the agent is working on a task. See PR fix for the @Lonely__MH report.
"""
import asyncio
import time
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
# ---------------------------------------------------------------------------
# Minimal stubs so we can import gateway code without heavy deps
# ---------------------------------------------------------------------------
import sys, types
_tg = types.ModuleType("telegram")
_tg.constants = types.ModuleType("telegram.constants")
_ct = MagicMock()
_ct.SUPERGROUP = "supergroup"
_ct.GROUP = "group"
_ct.PRIVATE = "private"
_tg.constants.ChatType = _ct
sys.modules.setdefault("telegram", _tg)
sys.modules.setdefault("telegram.constants", _tg.constants)
sys.modules.setdefault("telegram.ext", types.ModuleType("telegram.ext"))
from gateway.platforms.base import (
BasePlatformAdapter,
MessageEvent,
MessageType,
SessionSource,
build_session_key,
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_event(text="hello", chat_id="123", platform_val="telegram"):
"""Build a minimal MessageEvent."""
source = SessionSource(
platform=MagicMock(value=platform_val),
chat_id=chat_id,
chat_type="private",
user_id="user1",
)
evt = MessageEvent(
text=text,
message_type=MessageType.TEXT,
source=source,
message_id="msg1",
)
return evt
def _make_runner():
"""Build a minimal GatewayRunner-like object for testing."""
from gateway.run import GatewayRunner, _AGENT_PENDING_SENTINEL
runner = object.__new__(GatewayRunner)
runner._running_agents = {}
runner._running_agents_ts = {}
runner._pending_messages = {}
runner._busy_ack_ts = {}
runner._draining = False
runner.adapters = {}
runner.config = MagicMock()
runner.session_store = None
runner.hooks = MagicMock()
runner.hooks.emit = AsyncMock()
return runner, _AGENT_PENDING_SENTINEL
def _make_adapter(platform_val="telegram"):
"""Build a minimal adapter mock."""
adapter = MagicMock()
adapter._pending_messages = {}
adapter._send_with_retry = AsyncMock()
adapter.config = MagicMock()
adapter.config.extra = {}
adapter.platform = MagicMock(value=platform_val)
return adapter
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
class TestBusySessionAck:
"""User sends a message while agent is running — should get acknowledgment."""
@pytest.mark.asyncio
async def test_sends_ack_when_agent_running(self):
"""First message during busy session should get a status ack."""
runner, sentinel = _make_runner()
adapter = _make_adapter()
event = _make_event(text="Are you working?")
sk = build_session_key(event.source)
# Simulate running agent
agent = MagicMock()
agent.get_activity_summary.return_value = {
"api_call_count": 21,
"max_iterations": 60,
"current_tool": "terminal",
"last_activity_ts": time.time(),
"last_activity_desc": "terminal",
"seconds_since_activity": 1.0,
}
runner._running_agents[sk] = agent
runner._running_agents_ts[sk] = time.time() - 600 # 10 min ago
runner.adapters[event.source.platform] = adapter
result = await runner._handle_active_session_busy_message(event, sk)
assert result is True # handled
# Verify ack was sent
adapter._send_with_retry.assert_called_once()
call_kwargs = adapter._send_with_retry.call_args
content = call_kwargs.kwargs.get("content") or call_kwargs[1].get("content", "")
if not content and call_kwargs.args:
# positional args
content = str(call_kwargs)
assert "Interrupting" in content or "respond" in content
assert "/stop" not in content # no need — we ARE interrupting
# Verify message was queued in adapter pending
assert sk in adapter._pending_messages
# Verify agent interrupt was called
agent.interrupt.assert_called_once_with("Are you working?")
@pytest.mark.asyncio
async def test_debounce_suppresses_rapid_acks(self):
"""Second message within 30s should NOT send another ack."""
runner, sentinel = _make_runner()
adapter = _make_adapter()
event1 = _make_event(text="hello?")
# Reuse the same source so platform mock matches
event2 = MessageEvent(
text="still there?",
message_type=MessageType.TEXT,
source=event1.source,
message_id="msg2",
)
sk = build_session_key(event1.source)
agent = MagicMock()
agent.get_activity_summary.return_value = {
"api_call_count": 5,
"max_iterations": 60,
"current_tool": None,
"last_activity_ts": time.time(),
"last_activity_desc": "api_call",
"seconds_since_activity": 0.5,
}
runner._running_agents[sk] = agent
runner._running_agents_ts[sk] = time.time() - 60
runner.adapters[event1.source.platform] = adapter
# First message — should get ack
result1 = await runner._handle_active_session_busy_message(event1, sk)
assert result1 is True
assert adapter._send_with_retry.call_count == 1
# Second message within cooldown — should be queued but no ack
result2 = await runner._handle_active_session_busy_message(event2, sk)
assert result2 is True
assert adapter._send_with_retry.call_count == 1 # still 1, no new ack
# But interrupt should still be called for both
assert agent.interrupt.call_count == 2
@pytest.mark.asyncio
async def test_ack_after_cooldown_expires(self):
"""After 30s cooldown, a new message should send a fresh ack."""
runner, sentinel = _make_runner()
adapter = _make_adapter()
event = _make_event(text="hello?")
sk = build_session_key(event.source)
agent = MagicMock()
agent.get_activity_summary.return_value = {
"api_call_count": 10,
"max_iterations": 60,
"current_tool": "web_search",
"last_activity_ts": time.time(),
"last_activity_desc": "tool",
"seconds_since_activity": 0.5,
}
runner._running_agents[sk] = agent
runner._running_agents_ts[sk] = time.time() - 120
runner.adapters[event.source.platform] = adapter
# First ack
await runner._handle_active_session_busy_message(event, sk)
assert adapter._send_with_retry.call_count == 1
# Fake that cooldown expired
runner._busy_ack_ts[sk] = time.time() - 31
# Second ack should go through
await runner._handle_active_session_busy_message(event, sk)
assert adapter._send_with_retry.call_count == 2
@pytest.mark.asyncio
async def test_includes_status_detail(self):
"""Ack message should include iteration and tool info when available."""
runner, sentinel = _make_runner()
adapter = _make_adapter()
event = _make_event(text="yo")
sk = build_session_key(event.source)
agent = MagicMock()
agent.get_activity_summary.return_value = {
"api_call_count": 21,
"max_iterations": 60,
"current_tool": "terminal",
"last_activity_ts": time.time(),
"last_activity_desc": "terminal",
"seconds_since_activity": 0.5,
}
runner._running_agents[sk] = agent
runner._running_agents_ts[sk] = time.time() - 600 # 10 min
runner.adapters[event.source.platform] = adapter
await runner._handle_active_session_busy_message(event, sk)
call_kwargs = adapter._send_with_retry.call_args
content = call_kwargs.kwargs.get("content", "")
assert "21/60" in content # iteration
assert "terminal" in content # current tool
assert "10 min" in content # elapsed
@pytest.mark.asyncio
async def test_draining_still_works(self):
"""Draining case should still produce the drain-specific message."""
runner, sentinel = _make_runner()
runner._draining = True
adapter = _make_adapter()
event = _make_event(text="hello")
sk = build_session_key(event.source)
runner.adapters[event.source.platform] = adapter
# Mock the drain-specific methods
runner._queue_during_drain_enabled = lambda: False
runner._status_action_gerund = lambda: "restarting"
result = await runner._handle_active_session_busy_message(event, sk)
assert result is True
call_kwargs = adapter._send_with_retry.call_args
content = call_kwargs.kwargs.get("content", "")
assert "restarting" in content
@pytest.mark.asyncio
async def test_pending_sentinel_no_interrupt(self):
"""When agent is PENDING_SENTINEL, don't call interrupt (it has no method)."""
runner, sentinel = _make_runner()
adapter = _make_adapter()
event = _make_event(text="hey")
sk = build_session_key(event.source)
runner._running_agents[sk] = sentinel
runner._running_agents_ts[sk] = time.time()
runner.adapters[event.source.platform] = adapter
result = await runner._handle_active_session_busy_message(event, sk)
assert result is True
# Should still send ack
adapter._send_with_retry.assert_called_once()
@pytest.mark.asyncio
async def test_no_adapter_falls_through(self):
"""If adapter is missing, return False so default path handles it."""
runner, sentinel = _make_runner()
event = _make_event(text="hello")
sk = build_session_key(event.source)
# No adapter registered
runner._running_agents[sk] = MagicMock()
result = await runner._handle_active_session_busy_message(event, sk)
assert result is False # not handled, let default path try

View File

@@ -0,0 +1,291 @@
"""Tests for duplicate reply suppression across the gateway stack.
Covers three fix paths:
1. base.py: stale response suppressed when interrupt_event is set and a
pending message exists (#8221 / #2483)
2. run.py return path: already_sent propagated from stream consumer's
already_sent flag without requiring response_previewed (#8375)
3. run.py queued-message path: first response correctly detected as
already-streamed when already_sent is True without response_previewed
"""
import asyncio
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from gateway.config import Platform, PlatformConfig
from gateway.platforms.base import (
BasePlatformAdapter,
MessageEvent,
MessageType,
ProcessingOutcome,
SendResult,
)
from gateway.session import SessionSource, build_session_key
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
class StubAdapter(BasePlatformAdapter):
"""Minimal concrete adapter for testing."""
def __init__(self):
super().__init__(PlatformConfig(enabled=True, token="fake"), Platform.DISCORD)
self.sent = []
async def connect(self):
return True
async def disconnect(self):
pass
async def send(self, chat_id, content, reply_to=None, metadata=None):
self.sent.append({"chat_id": chat_id, "content": content})
return SendResult(success=True, message_id="msg1")
async def send_typing(self, chat_id, metadata=None):
pass
async def get_chat_info(self, chat_id):
return {"id": chat_id}
def _make_event(text="hello", chat_id="c1", user_id="u1"):
return MessageEvent(
text=text,
source=SessionSource(
platform=Platform.DISCORD,
chat_id=chat_id,
chat_type="dm",
user_id=user_id,
),
message_id="m1",
)
# ===================================================================
# Test 1: base.py — stale response suppressed on interrupt (#8221)
# ===================================================================
class TestBaseInterruptSuppression:
@pytest.mark.asyncio
async def test_stale_response_suppressed_when_interrupted(self):
"""When interrupt_event is set AND a pending message exists,
base.py should suppress the stale response instead of sending it."""
adapter = StubAdapter()
stale_response = "This is the stale answer to the first question."
pending_response = "This is the answer to the second question."
call_count = 0
async def fake_handler(event):
nonlocal call_count
call_count += 1
if call_count == 1:
return stale_response
return pending_response
adapter.set_message_handler(fake_handler)
event_a = _make_event(text="first question")
session_key = build_session_key(event_a.source)
# Simulate: message A is being processed, message B arrives
# The interrupt event is set and B is in pending_messages
interrupt_event = asyncio.Event()
interrupt_event.set()
adapter._active_sessions[session_key] = interrupt_event
event_b = _make_event(text="second question")
adapter._pending_messages[session_key] = event_b
await adapter._process_message_background(event_a, session_key)
# The stale response should NOT have been sent.
stale_sends = [s for s in adapter.sent if s["content"] == stale_response]
assert len(stale_sends) == 0, (
f"Stale response was sent {len(stale_sends)} time(s) — should be suppressed"
)
# The pending message's response SHOULD have been sent.
pending_sends = [s for s in adapter.sent if s["content"] == pending_response]
assert len(pending_sends) == 1, "Pending message response should be sent"
@pytest.mark.asyncio
async def test_response_not_suppressed_without_interrupt(self):
"""Normal case: no interrupt, response should be sent."""
adapter = StubAdapter()
async def fake_handler(event):
return "Normal response"
adapter.set_message_handler(fake_handler)
event = _make_event()
session_key = build_session_key(event.source)
await adapter._process_message_background(event, session_key)
assert any(s["content"] == "Normal response" for s in adapter.sent)
@pytest.mark.asyncio
async def test_response_not_suppressed_with_interrupt_but_no_pending(self):
"""Interrupt event set but no pending message (race already resolved) —
response should still be sent."""
adapter = StubAdapter()
async def fake_handler(event):
return "Valid response"
adapter.set_message_handler(fake_handler)
event = _make_event()
session_key = build_session_key(event.source)
# Set interrupt but no pending message
interrupt_event = asyncio.Event()
interrupt_event.set()
adapter._active_sessions[session_key] = interrupt_event
await adapter._process_message_background(event, session_key)
assert any(s["content"] == "Valid response" for s in adapter.sent)
# ===================================================================
# Test 2: run.py — already_sent without response_previewed (#8375)
# ===================================================================
class TestAlreadySentWithoutResponsePreviewed:
"""The already_sent flag on the response dict should be set when the
stream consumer's already_sent is True, even if response_previewed is
False. This prevents duplicate sends when streaming was interrupted
by flood control."""
def _make_mock_stream_consumer(self, already_sent=False, final_response_sent=False):
sc = SimpleNamespace(
already_sent=already_sent,
final_response_sent=final_response_sent,
)
return sc
def test_already_sent_set_without_response_previewed(self):
"""Stream consumer already_sent=True should propagate to response
dict even when response_previewed is False."""
sc = self._make_mock_stream_consumer(already_sent=True, final_response_sent=False)
response = {"final_response": "text", "response_previewed": False}
# Reproduce the logic from run.py return path (post-fix)
if sc and isinstance(response, dict) and not response.get("failed"):
if (
getattr(sc, "final_response_sent", False)
or getattr(sc, "already_sent", False)
):
response["already_sent"] = True
assert response.get("already_sent") is True
def test_already_sent_not_set_when_nothing_sent(self):
"""When stream consumer hasn't sent anything, already_sent should
not be set on the response."""
sc = self._make_mock_stream_consumer(already_sent=False, final_response_sent=False)
response = {"final_response": "text", "response_previewed": False}
if sc and isinstance(response, dict) and not response.get("failed"):
if (
getattr(sc, "final_response_sent", False)
or getattr(sc, "already_sent", False)
):
response["already_sent"] = True
assert "already_sent" not in response
def test_already_sent_set_on_final_response_sent(self):
"""final_response_sent=True should still work as before."""
sc = self._make_mock_stream_consumer(already_sent=False, final_response_sent=True)
response = {"final_response": "text"}
if sc and isinstance(response, dict) and not response.get("failed"):
if (
getattr(sc, "final_response_sent", False)
or getattr(sc, "already_sent", False)
):
response["already_sent"] = True
assert response.get("already_sent") is True
def test_already_sent_not_set_on_failed_response(self):
"""Failed responses should never be suppressed — user needs to see
the error message even if streaming sent earlier partial output."""
sc = self._make_mock_stream_consumer(already_sent=True, final_response_sent=False)
response = {"final_response": "Error: something broke", "failed": True}
if sc and isinstance(response, dict) and not response.get("failed"):
if (
getattr(sc, "final_response_sent", False)
or getattr(sc, "already_sent", False)
):
response["already_sent"] = True
assert "already_sent" not in response
# ===================================================================
# Test 3: run.py queued-message path — _already_streamed detection
# ===================================================================
class TestQueuedMessageAlreadyStreamed:
"""The queued-message path should detect that the first response was
already streamed (already_sent=True) even without response_previewed."""
def _make_mock_sc(self, already_sent=False, final_response_sent=False):
return SimpleNamespace(
already_sent=already_sent,
final_response_sent=final_response_sent,
)
def test_queued_path_detects_already_streamed(self):
"""already_sent=True on stream consumer means first response was
streamed — skip re-sending before processing queued message."""
_sc = self._make_mock_sc(already_sent=True)
# Reproduce the queued-message logic from run.py (post-fix)
_already_streamed = bool(
_sc
and (
getattr(_sc, "final_response_sent", False)
or getattr(_sc, "already_sent", False)
)
)
assert _already_streamed is True
def test_queued_path_sends_when_not_streamed(self):
"""Nothing was streamed — first response should be sent before
processing the queued message."""
_sc = self._make_mock_sc(already_sent=False)
_already_streamed = bool(
_sc
and (
getattr(_sc, "final_response_sent", False)
or getattr(_sc, "already_sent", False)
)
)
assert _already_streamed is False
def test_queued_path_with_no_stream_consumer(self):
"""No stream consumer at all (streaming disabled) — not streamed."""
_sc = None
_already_streamed = bool(
_sc
and (
getattr(_sc, "final_response_sent", False)
or getattr(_sc, "already_sent", False)
)
)
assert _already_streamed is False

View File

@@ -335,6 +335,29 @@ def _make_adapter():
return adapter
# ---------------------------------------------------------------------------
# Typing indicator
# ---------------------------------------------------------------------------
class TestMatrixTypingIndicator:
def setup_method(self):
self.adapter = _make_adapter()
self.adapter._client = MagicMock()
self.adapter._client.set_typing = AsyncMock()
@pytest.mark.asyncio
async def test_stop_typing_clears_matrix_typing_state(self):
"""stop_typing() should send typing=false instead of waiting for timeout expiry."""
from gateway.platforms.matrix import RoomID
await self.adapter.stop_typing("!room:example.org")
self.adapter._client.set_typing.assert_awaited_once_with(
RoomID("!room:example.org"),
timeout=0,
)
# ---------------------------------------------------------------------------
# mxc:// URL conversion
# ---------------------------------------------------------------------------
@@ -1831,4 +1854,3 @@ class TestMatrixPresence:
assert result is False

View File

@@ -0,0 +1,275 @@
"""Tests for the Command Installation check in hermes doctor."""
import os
import sys
import types
from argparse import Namespace
from pathlib import Path
import pytest
import hermes_cli.doctor as doctor_mod
def _setup_doctor_env(monkeypatch, tmp_path, venv_name="venv"):
"""Create a minimal HERMES_HOME + PROJECT_ROOT for doctor tests."""
home = tmp_path / ".hermes"
home.mkdir(parents=True, exist_ok=True)
(home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
project = tmp_path / "project"
project.mkdir(exist_ok=True)
# Create a fake venv entry point
venv_bin_dir = project / venv_name / "bin"
venv_bin_dir.mkdir(parents=True, exist_ok=True)
hermes_bin = venv_bin_dir / "hermes"
hermes_bin.write_text("#!/usr/bin/env python\n# entry point\n")
hermes_bin.chmod(0o755)
monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
monkeypatch.setattr(doctor_mod, "_DHH", str(home))
# Stub model_tools so doctor doesn't fail on import
fake_model_tools = types.SimpleNamespace(
check_tool_availability=lambda *a, **kw: ([], []),
TOOLSET_REQUIREMENTS={},
)
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
# Stub auth checks
try:
from hermes_cli import auth as _auth_mod
monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
except Exception:
pass
# Stub httpx.get to avoid network calls
try:
import httpx
monkeypatch.setattr(httpx, "get", lambda *a, **kw: types.SimpleNamespace(status_code=200))
except Exception:
pass
return home, project, hermes_bin
def _run_doctor(fix=False):
"""Run doctor and capture stdout."""
import io
import contextlib
buf = io.StringIO()
with contextlib.redirect_stdout(buf):
doctor_mod.run_doctor(Namespace(fix=fix))
return buf.getvalue()
class TestDoctorCommandInstallation:
"""Tests for the ◆ Command Installation section."""
@pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
def test_correct_symlink_shows_ok(self, monkeypatch, tmp_path):
home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
# Create the command link dir with correct symlink
cmd_link_dir = tmp_path / ".local" / "bin"
cmd_link_dir.mkdir(parents=True)
cmd_link = cmd_link_dir / "hermes"
cmd_link.symlink_to(hermes_bin)
monkeypatch.setattr(Path, "home", lambda: tmp_path)
out = _run_doctor(fix=False)
assert "Command Installation" in out
assert "Venv entry point exists" in out
assert "correct target" in out
@pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
def test_missing_symlink_shows_fail(self, monkeypatch, tmp_path):
home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
monkeypatch.setattr(Path, "home", lambda: tmp_path)
# Don't create the symlink — it should be missing
out = _run_doctor(fix=False)
assert "Command Installation" in out
assert "Venv entry point exists" in out
assert "not found" in out
assert "hermes doctor --fix" in out
@pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
def test_fix_creates_missing_symlink(self, monkeypatch, tmp_path):
home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
monkeypatch.setattr(Path, "home", lambda: tmp_path)
out = _run_doctor(fix=True)
assert "Command Installation" in out
assert "Created symlink" in out
# Verify the symlink was actually created
cmd_link = tmp_path / ".local" / "bin" / "hermes"
assert cmd_link.is_symlink()
assert cmd_link.resolve() == hermes_bin.resolve()
@pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
def test_wrong_target_symlink_shows_warn(self, monkeypatch, tmp_path):
home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
# Create a symlink pointing to the wrong target
cmd_link_dir = tmp_path / ".local" / "bin"
cmd_link_dir.mkdir(parents=True)
cmd_link = cmd_link_dir / "hermes"
wrong_target = tmp_path / "wrong_hermes"
wrong_target.write_text("#!/usr/bin/env python\n")
cmd_link.symlink_to(wrong_target)
monkeypatch.setattr(Path, "home", lambda: tmp_path)
out = _run_doctor(fix=False)
assert "Command Installation" in out
assert "wrong target" in out
@pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
def test_fix_repairs_wrong_symlink(self, monkeypatch, tmp_path):
home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
# Create a symlink pointing to wrong target
cmd_link_dir = tmp_path / ".local" / "bin"
cmd_link_dir.mkdir(parents=True)
cmd_link = cmd_link_dir / "hermes"
wrong_target = tmp_path / "wrong_hermes"
wrong_target.write_text("#!/usr/bin/env python\n")
cmd_link.symlink_to(wrong_target)
monkeypatch.setattr(Path, "home", lambda: tmp_path)
out = _run_doctor(fix=True)
assert "Fixed symlink" in out
# Verify the symlink now points to the correct target
assert cmd_link.is_symlink()
assert cmd_link.resolve() == hermes_bin.resolve()
@pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
def test_missing_venv_entry_point_shows_warn(self, monkeypatch, tmp_path):
home = tmp_path / ".hermes"
home.mkdir(parents=True, exist_ok=True)
(home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
project = tmp_path / "project"
project.mkdir(exist_ok=True)
# Do NOT create any venv entry point
monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
monkeypatch.setattr(doctor_mod, "_DHH", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
fake_model_tools = types.SimpleNamespace(
check_tool_availability=lambda *a, **kw: ([], []),
TOOLSET_REQUIREMENTS={},
)
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
try:
from hermes_cli import auth as _auth_mod
monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
except Exception:
pass
try:
import httpx
monkeypatch.setattr(httpx, "get", lambda *a, **kw: types.SimpleNamespace(status_code=200))
except Exception:
pass
out = _run_doctor(fix=False)
assert "Command Installation" in out
assert "Venv entry point not found" in out
@pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
def test_dot_venv_dir_is_found(self, monkeypatch, tmp_path):
"""The check finds entry points in .venv/ as well as venv/."""
home, project, _ = _setup_doctor_env(monkeypatch, tmp_path, venv_name=".venv")
# Create the command link with correct symlink
hermes_bin = project / ".venv" / "bin" / "hermes"
cmd_link_dir = tmp_path / ".local" / "bin"
cmd_link_dir.mkdir(parents=True)
cmd_link = cmd_link_dir / "hermes"
cmd_link.symlink_to(hermes_bin)
monkeypatch.setattr(Path, "home", lambda: tmp_path)
out = _run_doctor(fix=False)
assert "Venv entry point exists" in out
assert ".venv/bin/hermes" in out
@pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
def test_non_symlink_regular_file_shows_ok(self, monkeypatch, tmp_path):
"""If ~/.local/bin/hermes is a regular file (not symlink), accept it."""
home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
cmd_link_dir = tmp_path / ".local" / "bin"
cmd_link_dir.mkdir(parents=True)
cmd_link = cmd_link_dir / "hermes"
cmd_link.write_text("#!/bin/sh\nexec python -m hermes_cli.main \"$@\"\n")
monkeypatch.setattr(Path, "home", lambda: tmp_path)
out = _run_doctor(fix=False)
assert "non-symlink" in out
@pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
def test_termux_uses_prefix_bin(self, monkeypatch, tmp_path):
"""On Termux, the command link dir is $PREFIX/bin."""
prefix_dir = tmp_path / "termux_prefix"
prefix_bin = prefix_dir / "bin"
prefix_bin.mkdir(parents=True)
home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
monkeypatch.setenv("TERMUX_VERSION", "0.118.3")
monkeypatch.setenv("PREFIX", str(prefix_dir))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
out = _run_doctor(fix=False)
assert "Command Installation" in out
assert "$PREFIX/bin" in out
def test_windows_skips_check(self, monkeypatch, tmp_path):
"""On Windows, the Command Installation section is skipped."""
home = tmp_path / ".hermes"
home.mkdir(parents=True, exist_ok=True)
(home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
project = tmp_path / "project"
project.mkdir(exist_ok=True)
monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
monkeypatch.setattr(doctor_mod, "_DHH", str(home))
monkeypatch.setattr(sys, "platform", "win32")
fake_model_tools = types.SimpleNamespace(
check_tool_availability=lambda *a, **kw: ([], []),
TOOLSET_REQUIREMENTS={},
)
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
try:
from hermes_cli import auth as _auth_mod
monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
except Exception:
pass
try:
import httpx
monkeypatch.setattr(httpx, "get", lambda *a, **kw: types.SimpleNamespace(status_code=200))
except Exception:
pass
out = _run_doctor(fix=False)
assert "Command Installation" not in out

View File

@@ -0,0 +1,148 @@
"""Tests for the defensive subparser routing workaround (bpo-9338).
The main() function in hermes_cli/main.py sets subparsers.required=True
when argv contains a known subcommand name. This forces deterministic
routing on Python versions where argparse fails to match subcommand tokens
when the parent parser has nargs='?' optional arguments (--continue).
If the subcommand token is consumed as a flag value (e.g. `hermes -c model`
to resume a session named 'model'), the required=True parse raises
SystemExit and the code falls back to the default required=False behaviour.
"""
import argparse
import io
import sys
import pytest
def _build_parser():
"""Build a minimal replica of the hermes top-level parser."""
parser = argparse.ArgumentParser(prog="hermes")
parser.add_argument("--version", "-V", action="store_true")
parser.add_argument("--resume", "-r", metavar="SESSION", default=None)
parser.add_argument(
"--continue", "-c",
dest="continue_last",
nargs="?",
const=True,
default=None,
metavar="SESSION_NAME",
)
parser.add_argument("--worktree", "-w", action="store_true", default=False)
parser.add_argument("--skills", "-s", action="append", default=None)
parser.add_argument("--yolo", action="store_true", default=False)
parser.add_argument("--pass-session-id", action="store_true", default=False)
subparsers = parser.add_subparsers(dest="command", help="Command to run")
chat_p = subparsers.add_parser("chat")
chat_p.add_argument("-q", "--query", default=None)
subparsers.add_parser("model")
subparsers.add_parser("gateway")
subparsers.add_parser("setup")
return parser, subparsers
def _safe_parse(parser, subparsers, argv):
"""Replica of the defensive parsing logic from main()."""
known_cmds = set(subparsers.choices.keys()) if hasattr(subparsers, "choices") else set()
has_cmd_token = any(t in known_cmds for t in argv if not t.startswith("-"))
if has_cmd_token:
subparsers.required = True
saved_stderr = sys.stderr
try:
sys.stderr = io.StringIO()
args = parser.parse_args(argv)
sys.stderr = saved_stderr
return args
except SystemExit:
sys.stderr = saved_stderr
subparsers.required = False
return parser.parse_args(argv)
else:
subparsers.required = False
return parser.parse_args(argv)
class TestSubparserRoutingFallback:
"""Verify the bpo-9338 defensive routing works for all key cases."""
def test_direct_subcommand(self):
parser, sub = _build_parser()
args = _safe_parse(parser, sub, ["model"])
assert args.command == "model"
def test_subcommand_with_flags(self):
parser, sub = _build_parser()
args = _safe_parse(parser, sub, ["--yolo", "model"])
assert args.command == "model"
assert args.yolo is True
def test_bare_hermes_defaults_to_none(self):
parser, sub = _build_parser()
args = _safe_parse(parser, sub, [])
assert args.command is None
def test_flags_only_defaults_to_none(self):
parser, sub = _build_parser()
args = _safe_parse(parser, sub, ["--yolo"])
assert args.command is None
assert args.yolo is True
def test_continue_flag_alone(self):
parser, sub = _build_parser()
args = _safe_parse(parser, sub, ["-c"])
assert args.command is None
assert args.continue_last is True
def test_continue_with_session_name(self):
parser, sub = _build_parser()
args = _safe_parse(parser, sub, ["-c", "myproject"])
assert args.command is None
assert args.continue_last == "myproject"
def test_continue_with_subcommand_name_as_session(self):
"""Edge case: session named 'model' — should be treated as session name, not subcommand."""
parser, sub = _build_parser()
args = _safe_parse(parser, sub, ["-c", "model"])
assert args.command is None
assert args.continue_last == "model"
def test_continue_with_session_then_subcommand(self):
parser, sub = _build_parser()
args = _safe_parse(parser, sub, ["-c", "myproject", "model"])
assert args.command == "model"
assert args.continue_last == "myproject"
def test_chat_with_query(self):
parser, sub = _build_parser()
args = _safe_parse(parser, sub, ["chat", "-q", "hello"])
assert args.command == "chat"
assert args.query == "hello"
def test_resume_flag(self):
parser, sub = _build_parser()
args = _safe_parse(parser, sub, ["-r", "abc123"])
assert args.command is None
assert args.resume == "abc123"
def test_resume_with_subcommand(self):
parser, sub = _build_parser()
args = _safe_parse(parser, sub, ["-r", "abc123", "chat"])
assert args.command == "chat"
assert args.resume == "abc123"
def test_skills_flag_with_subcommand(self):
parser, sub = _build_parser()
args = _safe_parse(parser, sub, ["-s", "myskill", "chat"])
assert args.command == "chat"
assert args.skills == ["myskill"]
def test_all_flags_with_subcommand(self):
parser, sub = _build_parser()
args = _safe_parse(parser, sub, ["--yolo", "-w", "-s", "myskill", "model"])
assert args.command == "model"
assert args.yolo is True
assert args.worktree is True
assert args.skills == ["myskill"]

View File

@@ -8,6 +8,7 @@ from hermes_cli.tools_config import (
_platform_toolset_summary,
_save_platform_tools,
_toolset_has_keys,
CONFIGURABLE_TOOLSETS,
TOOL_CATEGORIES,
_visible_providers,
tools_command,
@@ -22,6 +23,15 @@ def test_get_platform_tools_uses_default_when_platform_not_configured():
assert enabled
def test_configurable_toolsets_include_messaging():
assert any(ts_key == "messaging" for ts_key, _, _ in CONFIGURABLE_TOOLSETS)
def test_get_platform_tools_default_telegram_includes_messaging():
enabled = _get_platform_tools({}, "telegram")
assert "messaging" in enabled
def test_get_platform_tools_preserves_explicit_empty_selection():
config = {"platform_toolsets": {"cli": []}}

View File

@@ -0,0 +1,111 @@
"""Tests that invalid context_length values in config produce visible warnings."""
from unittest.mock import patch, MagicMock, call
def _build_agent(model_cfg, custom_providers=None, model="anthropic/claude-opus-4.6"):
"""Build an AIAgent with the given model config."""
cfg = {"model": model_cfg}
if custom_providers is not None:
cfg["custom_providers"] = custom_providers
with (
patch("hermes_cli.config.load_config", return_value=cfg),
patch("agent.model_metadata.get_model_context_length", return_value=128_000),
patch("run_agent.get_tool_definitions", return_value=[]),
patch("run_agent.check_toolset_requirements", return_value={}),
patch("run_agent.OpenAI"),
):
from run_agent import AIAgent
agent = AIAgent(
model=model,
api_key="test-key-1234567890",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
return agent
def test_valid_integer_context_length_no_warning():
"""Plain integer context_length should work silently."""
with patch("run_agent.logger") as mock_logger:
agent = _build_agent({"default": "gpt5.4", "provider": "custom",
"base_url": "http://localhost:4000/v1",
"context_length": 256000})
assert agent._config_context_length == 256000
# No warning about invalid context_length
for c in mock_logger.warning.call_args_list:
assert "Invalid" not in str(c)
def test_string_k_suffix_context_length_warns():
"""context_length: '256K' should warn the user clearly."""
with patch("run_agent.logger") as mock_logger:
agent = _build_agent({"default": "gpt5.4", "provider": "custom",
"base_url": "http://localhost:4000/v1",
"context_length": "256K"})
assert agent._config_context_length is None
# Should have warned
warning_calls = [c for c in mock_logger.warning.call_args_list
if "Invalid" in str(c) and "256K" in str(c)]
assert len(warning_calls) == 1
assert "plain integer" in str(warning_calls[0])
def test_string_numeric_context_length_works():
"""context_length: '256000' (string) should parse fine via int()."""
with patch("run_agent.logger") as mock_logger:
agent = _build_agent({"default": "gpt5.4", "provider": "custom",
"base_url": "http://localhost:4000/v1",
"context_length": "256000"})
assert agent._config_context_length == 256000
for c in mock_logger.warning.call_args_list:
assert "Invalid" not in str(c)
def test_custom_providers_invalid_context_length_warns():
"""Invalid context_length in custom_providers should warn."""
custom_providers = [
{
"name": "LiteLLM",
"base_url": "http://localhost:4000/v1",
"models": {
"gpt5.4": {"context_length": "256K"}
},
}
]
with patch("run_agent.logger") as mock_logger:
agent = _build_agent(
{"default": "gpt5.4", "provider": "custom",
"base_url": "http://localhost:4000/v1"},
custom_providers=custom_providers,
model="gpt5.4",
)
warning_calls = [c for c in mock_logger.warning.call_args_list
if "Invalid" in str(c) and "256K" in str(c)]
assert len(warning_calls) == 1
assert "custom_providers" in str(warning_calls[0])
def test_custom_providers_valid_context_length():
"""Valid integer in custom_providers should work silently."""
custom_providers = [
{
"name": "LiteLLM",
"base_url": "http://localhost:4000/v1",
"models": {
"gpt5.4": {"context_length": 256000}
},
}
]
with patch("run_agent.logger") as mock_logger:
agent = _build_agent(
{"default": "gpt5.4", "provider": "custom",
"base_url": "http://localhost:4000/v1"},
custom_providers=custom_providers,
model="gpt5.4",
)
for c in mock_logger.warning.call_args_list:
assert "Invalid" not in str(c)

View File

@@ -1249,13 +1249,17 @@ def test_chat_messages_to_responses_input_deduplicates_reasoning_ids(monkeypatch
]
items = agent._chat_messages_to_responses_input(messages)
reasoning_ids = [it["id"] for it in items if it.get("type") == "reasoning"]
# rs_aaa should appear only once (first occurrence kept)
assert reasoning_ids.count("rs_aaa") == 1
# rs_bbb and rs_ccc should each appear once
assert reasoning_ids.count("rs_bbb") == 1
assert reasoning_ids.count("rs_ccc") == 1
assert len(reasoning_ids) == 3
reasoning_items = [it for it in items if it.get("type") == "reasoning"]
# Dedup: rs_aaa appears in both turns but should only be emitted once.
# 3 unique items total: enc_1 (from rs_aaa), enc_2 (rs_bbb), enc_3 (rs_ccc).
assert len(reasoning_items) == 3
encrypted = [it["encrypted_content"] for it in reasoning_items]
assert encrypted.count("enc_1") == 1
assert "enc_2" in encrypted
assert "enc_3" in encrypted
# IDs must be stripped — with store=False the API 404s on id lookups.
for it in reasoning_items:
assert "id" not in it
def test_preflight_codex_input_deduplicates_reasoning_ids(monkeypatch):
@@ -1272,7 +1276,11 @@ def test_preflight_codex_input_deduplicates_reasoning_ids(monkeypatch):
normalized = agent._preflight_codex_input_items(raw_input)
reasoning_items = [it for it in normalized if it.get("type") == "reasoning"]
reasoning_ids = [it["id"] for it in reasoning_items]
assert reasoning_ids.count("rs_xyz") == 1
assert reasoning_ids.count("rs_zzz") == 1
# rs_xyz duplicate should be collapsed to one item; rs_zzz kept.
assert len(reasoning_items) == 2
encrypted = [it["encrypted_content"] for it in reasoning_items]
assert encrypted.count("enc_a") == 1
assert "enc_b" in encrypted
# IDs must be stripped — with store=False the API 404s on id lookups.
for it in reasoning_items:
assert "id" not in it

View File

@@ -230,3 +230,67 @@ class TestSanitizeStructureNonAscii:
assert _sanitize_structure_non_ascii(payload) is True
assert payload["default_headers"]["X-Title"] == "Hermes Agent"
assert payload["default_headers"]["User-Agent"] == "Hermes/1.0 "
class TestApiKeyClientSync:
"""Verify that ASCII recovery updates the live OpenAI client's api_key.
The OpenAI SDK stores its own copy of api_key which auth_headers reads
dynamically. If only self.api_key is updated but self.client.api_key
is not, the next request still sends the corrupted key in the
Authorization header.
"""
def test_client_api_key_updated_on_sanitize(self):
"""Simulate the recovery path and verify client.api_key is synced."""
from unittest.mock import MagicMock
from run_agent import AIAgent
agent = AIAgent.__new__(AIAgent)
bad_key = "sk-proj-abc\u028bdef" # ʋ lookalike at position 11
agent.api_key = bad_key
agent._client_kwargs = {"api_key": bad_key}
agent.quiet_mode = True
# Mock client with its own api_key attribute (like the real OpenAI client)
mock_client = MagicMock()
mock_client.api_key = bad_key
agent.client = mock_client
# --- replicate the recovery logic from run_agent.py ---
_raw_key = agent.api_key
_clean_key = _strip_non_ascii(_raw_key)
assert _clean_key != _raw_key, "test precondition: key should have non-ASCII"
agent.api_key = _clean_key
agent._client_kwargs["api_key"] = _clean_key
if getattr(agent, "client", None) is not None and hasattr(agent.client, "api_key"):
agent.client.api_key = _clean_key
# All three locations should now hold the clean key
assert agent.api_key == "sk-proj-abcdef"
assert agent._client_kwargs["api_key"] == "sk-proj-abcdef"
assert agent.client.api_key == "sk-proj-abcdef"
# The bad char should be gone from all of them
assert "\u028b" not in agent.api_key
assert "\u028b" not in agent._client_kwargs["api_key"]
assert "\u028b" not in agent.client.api_key
def test_client_none_does_not_crash(self):
"""Recovery should not crash when client is None (pre-init)."""
from run_agent import AIAgent
agent = AIAgent.__new__(AIAgent)
bad_key = "sk-proj-\u028b"
agent.api_key = bad_key
agent._client_kwargs = {"api_key": bad_key}
agent.client = None
_clean_key = _strip_non_ascii(bad_key)
agent.api_key = _clean_key
agent._client_kwargs["api_key"] = _clean_key
if getattr(agent, "client", None) is not None and hasattr(agent.client, "api_key"):
agent.client.api_key = _clean_key
assert agent.api_key == "sk-proj-"
assert agent.client is None # should not have been touched

View File

@@ -752,6 +752,38 @@ class TestParseTargetRefDiscord:
assert is_explicit is True
class TestParseTargetRefMatrix:
"""_parse_target_ref correctly handles Matrix room IDs and user MXIDs."""
def test_matrix_room_id_is_explicit(self):
"""Matrix room IDs (!) are recognized as explicit targets."""
chat_id, thread_id, is_explicit = _parse_target_ref("matrix", "!HLOQwxYGgFPMPJUSNR:matrix.org")
assert chat_id == "!HLOQwxYGgFPMPJUSNR:matrix.org"
assert thread_id is None
assert is_explicit is True
def test_matrix_user_mxid_is_explicit(self):
"""Matrix user MXIDs (@) are recognized as explicit targets."""
chat_id, thread_id, is_explicit = _parse_target_ref("matrix", "@hermes:matrix.org")
assert chat_id == "@hermes:matrix.org"
assert thread_id is None
assert is_explicit is True
def test_matrix_alias_is_not_explicit(self):
"""Matrix room aliases (#) are NOT explicit — they need resolution."""
chat_id, thread_id, is_explicit = _parse_target_ref("matrix", "#general:matrix.org")
assert chat_id is None
assert is_explicit is False
def test_matrix_prefix_only_matches_matrix_platform(self):
"""! and @ prefixes are only treated as explicit for the matrix platform."""
chat_id, _, is_explicit = _parse_target_ref("telegram", "!something")
assert is_explicit is False
chat_id, _, is_explicit = _parse_target_ref("discord", "@someone")
assert is_explicit is False
class TestSendDiscordThreadId:
"""_send_discord uses thread_id when provided."""
@@ -854,3 +886,225 @@ class TestSendToPlatformDiscordThread:
send_mock.assert_awaited_once()
_, call_kwargs = send_mock.await_args
assert call_kwargs["thread_id"] is None
# ---------------------------------------------------------------------------
# Discord media attachment support
# ---------------------------------------------------------------------------
class TestSendDiscordMedia:
"""_send_discord uploads media files via multipart/form-data."""
@staticmethod
def _build_mock(response_status, response_data=None, response_text="error body"):
"""Build a properly-structured aiohttp mock chain."""
mock_resp = MagicMock()
mock_resp.status = response_status
mock_resp.json = AsyncMock(return_value=response_data or {"id": "msg123"})
mock_resp.text = AsyncMock(return_value=response_text)
mock_resp.__aenter__ = AsyncMock(return_value=mock_resp)
mock_resp.__aexit__ = AsyncMock(return_value=None)
mock_session = MagicMock()
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
mock_session.__aexit__ = AsyncMock(return_value=None)
mock_session.post = MagicMock(return_value=mock_resp)
return mock_session, mock_resp
def test_text_and_media_sends_both(self, tmp_path):
"""Text message is sent first, then each media file as multipart."""
img = tmp_path / "photo.png"
img.write_bytes(b"\x89PNG fake image data")
mock_session, _ = self._build_mock(200, {"id": "msg999"})
with patch("aiohttp.ClientSession", return_value=mock_session):
result = asyncio.run(
_send_discord("tok", "111", "hello", media_files=[(str(img), False)])
)
assert result["success"] is True
assert result["message_id"] == "msg999"
# Two POSTs: one text JSON, one multipart upload
assert mock_session.post.call_count == 2
def test_media_only_skips_text_post(self, tmp_path):
"""When message is empty and media is present, text POST is skipped."""
img = tmp_path / "photo.png"
img.write_bytes(b"\x89PNG fake image data")
mock_session, _ = self._build_mock(200, {"id": "media_only"})
with patch("aiohttp.ClientSession", return_value=mock_session):
result = asyncio.run(
_send_discord("tok", "222", " ", media_files=[(str(img), False)])
)
assert result["success"] is True
# Only one POST: the media upload (text was whitespace-only)
assert mock_session.post.call_count == 1
def test_missing_media_file_collected_as_warning(self):
"""Non-existent media paths produce warnings but don't fail."""
mock_session, _ = self._build_mock(200, {"id": "txt_ok"})
with patch("aiohttp.ClientSession", return_value=mock_session):
result = asyncio.run(
_send_discord("tok", "333", "hello", media_files=[("/nonexistent/file.png", False)])
)
assert result["success"] is True
assert "warnings" in result
assert any("not found" in w for w in result["warnings"])
# Only the text POST was made, media was skipped
assert mock_session.post.call_count == 1
def test_media_upload_failure_collected_as_warning(self, tmp_path):
"""Failed media upload becomes a warning, text still succeeds."""
img = tmp_path / "photo.png"
img.write_bytes(b"\x89PNG fake image data")
# First call (text) succeeds, second call (media) returns 413
text_resp = MagicMock()
text_resp.status = 200
text_resp.json = AsyncMock(return_value={"id": "txt_ok"})
text_resp.__aenter__ = AsyncMock(return_value=text_resp)
text_resp.__aexit__ = AsyncMock(return_value=None)
media_resp = MagicMock()
media_resp.status = 413
media_resp.text = AsyncMock(return_value="Request Entity Too Large")
media_resp.__aenter__ = AsyncMock(return_value=media_resp)
media_resp.__aexit__ = AsyncMock(return_value=None)
mock_session = MagicMock()
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
mock_session.__aexit__ = AsyncMock(return_value=None)
mock_session.post = MagicMock(side_effect=[text_resp, media_resp])
with patch("aiohttp.ClientSession", return_value=mock_session):
result = asyncio.run(
_send_discord("tok", "444", "hello", media_files=[(str(img), False)])
)
assert result["success"] is True
assert result["message_id"] == "txt_ok"
assert "warnings" in result
assert any("413" in w for w in result["warnings"])
def test_no_text_no_media_returns_error(self):
"""Empty text with no media returns error dict."""
mock_session, _ = self._build_mock(200)
with patch("aiohttp.ClientSession", return_value=mock_session):
result = asyncio.run(
_send_discord("tok", "555", "", media_files=[])
)
# Text is empty but media_files is empty, so text POST fires
# (the "skip text if media present" condition isn't met)
assert result["success"] is True
def test_multiple_media_files_uploaded_separately(self, tmp_path):
"""Each media file gets its own multipart POST."""
img1 = tmp_path / "a.png"
img1.write_bytes(b"img1")
img2 = tmp_path / "b.jpg"
img2.write_bytes(b"img2")
mock_session, _ = self._build_mock(200, {"id": "last"})
with patch("aiohttp.ClientSession", return_value=mock_session):
result = asyncio.run(
_send_discord("tok", "666", "hi", media_files=[
(str(img1), False), (str(img2), False)
])
)
assert result["success"] is True
# 1 text POST + 2 media POSTs = 3
assert mock_session.post.call_count == 3
class TestSendToPlatformDiscordMedia:
"""_send_to_platform routes Discord media correctly."""
def test_media_files_passed_on_last_chunk_only(self):
"""Discord media_files are only passed on the final chunk."""
call_log = []
async def mock_send_discord(token, chat_id, message, thread_id=None, media_files=None):
call_log.append({"message": message, "media_files": media_files or []})
return {"success": True, "platform": "discord", "chat_id": chat_id, "message_id": "1"}
# A message long enough to get chunked (Discord limit is 2000)
long_msg = "A" * 1900 + " " + "B" * 1900
with patch("tools.send_message_tool._send_discord", side_effect=mock_send_discord):
result = asyncio.run(
_send_to_platform(
Platform.DISCORD,
SimpleNamespace(enabled=True, token="tok", extra={}),
"999",
long_msg,
media_files=[("/fake/img.png", False)],
)
)
assert result["success"] is True
assert len(call_log) == 2 # Message was chunked
assert call_log[0]["media_files"] == [] # First chunk: no media
assert call_log[1]["media_files"] == [("/fake/img.png", False)] # Last chunk: media attached
def test_single_chunk_gets_media(self):
"""Short message (single chunk) gets media_files directly."""
send_mock = AsyncMock(return_value={"success": True, "message_id": "1"})
with patch("tools.send_message_tool._send_discord", send_mock):
result = asyncio.run(
_send_to_platform(
Platform.DISCORD,
SimpleNamespace(enabled=True, token="tok", extra={}),
"888",
"short message",
media_files=[("/fake/img.png", False)],
)
)
assert result["success"] is True
send_mock.assert_awaited_once()
call_kwargs = send_mock.await_args.kwargs
assert call_kwargs["media_files"] == [("/fake/img.png", False)]
class TestSendMatrixUrlEncoding:
"""_send_matrix URL-encodes Matrix room IDs in the API path."""
def test_room_id_is_percent_encoded_in_url(self):
"""Matrix room IDs with ! and : are percent-encoded in the PUT URL."""
import aiohttp
mock_resp = MagicMock()
mock_resp.status = 200
mock_resp.json = AsyncMock(return_value={"event_id": "$evt123"})
mock_resp.__aenter__ = AsyncMock(return_value=mock_resp)
mock_resp.__aexit__ = AsyncMock(return_value=None)
mock_session = MagicMock()
mock_session.put = MagicMock(return_value=mock_resp)
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
mock_session.__aexit__ = AsyncMock(return_value=None)
with patch("aiohttp.ClientSession", return_value=mock_session):
from tools.send_message_tool import _send_matrix
result = asyncio.get_event_loop().run_until_complete(
_send_matrix(
"test_token",
{"homeserver": "https://matrix.example.org"},
"!HLOQwxYGgFPMPJUSNR:matrix.org",
"hello",
)
)
assert result["success"] is True
# Verify the URL was called with percent-encoded room ID
put_url = mock_session.put.call_args[0][0]
assert "%21HLOQwxYGgFPMPJUSNR%3Amatrix.org" in put_url
assert "!HLOQwxYGgFPMPJUSNR:matrix.org" not in put_url

View File

@@ -13,6 +13,8 @@ import sys
from pathlib import Path
from typing import Any, Dict, List, Optional
from hermes_constants import display_hermes_home
logger = logging.getLogger(__name__)
# Import from cron module (will be available when properly installed)
@@ -391,6 +393,8 @@ Use action='create' to schedule a new job from a prompt or one or more skills.
Use action='list' to inspect jobs.
Use action='update', 'pause', 'resume', 'remove', or 'run' to manage an existing job.
To stop a job the user no longer wants: first action='list' to find the job_id, then action='remove' with that job_id. Never guess job IDs — always list first.
Jobs run in a fresh session with no current-chat context, so prompts must be self-contained.
If skills are provided on create, the future cron run loads those skills in order, then follows the prompt as the task instruction.
On update, passing skills=[] clears attached skills.
@@ -453,7 +457,7 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
},
"script": {
"type": "string",
"description": "Optional path to a Python script that runs before each cron job execution. Its stdout is injected into the prompt as context. Use for data collection and change detection. Relative paths resolve under ~/.hermes/scripts/. On update, pass empty string to clear."
"description": f"Optional path to a Python script that runs before each cron job execution. Its stdout is injected into the prompt as context. Use for data collection and change detection. Relative paths resolve under {display_hermes_home()}/scripts/. On update, pass empty string to clear."
},
},
"required": ["action"]

View File

@@ -68,7 +68,7 @@ SEND_MESSAGE_SCHEMA = {
},
"target": {
"type": "string",
"description": "Delivery target. Format: 'platform' (uses home channel), 'platform:#channel-name', 'platform:chat_id', or 'platform:chat_id:thread_id' for Telegram topics and Discord threads. Examples: 'telegram', 'telegram:-1001234567890:17585', 'discord:999888777:555444333', 'discord:#bot-home', 'slack:#engineering', 'signal:+155****4567'"
"description": "Delivery target. Format: 'platform' (uses home channel), 'platform:#channel-name', 'platform:chat_id', or 'platform:chat_id:thread_id' for Telegram topics and Discord threads. Examples: 'telegram', 'telegram:-1001234567890:17585', 'discord:999888777:555444333', 'discord:#bot-home', 'slack:#engineering', 'signal:+155****4567', 'matrix:!roomid:server.org', 'matrix:@user:server.org'"
},
"message": {
"type": "string",
@@ -248,6 +248,9 @@ def _parse_target_ref(platform_name: str, target_ref: str):
return match.group(1), None, True
if target_ref.lstrip("-").isdigit():
return target_ref, None, True
# Matrix room IDs (start with !) and user IDs (start with @) are explicit
if platform_name == "matrix" and (target_ref.startswith("!") or target_ref.startswith("@")):
return target_ref, None, True
return None, None, False
@@ -384,11 +387,28 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
if platform == Platform.WEIXIN:
return await _send_weixin(pconfig, chat_id, message, media_files=media_files)
# --- Non-Telegram platforms ---
# --- Discord: special handling for media attachments ---
if platform == Platform.DISCORD:
last_result = None
for i, chunk in enumerate(chunks):
is_last = (i == len(chunks) - 1)
result = await _send_discord(
pconfig.token,
chat_id,
chunk,
media_files=media_files if is_last else [],
thread_id=thread_id,
)
if isinstance(result, dict) and result.get("error"):
return result
last_result = result
return last_result
# --- Non-Telegram/Discord platforms ---
if media_files and not message.strip():
return {
"error": (
f"send_message MEDIA delivery is currently only supported for telegram; "
f"send_message MEDIA delivery is currently only supported for telegram, discord, and weixin; "
f"target {platform.value} had only media attachments"
)
}
@@ -396,14 +416,12 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
if media_files:
warning = (
f"MEDIA attachments were omitted for {platform.value}; "
"native send_message media delivery is currently only supported for telegram"
"native send_message media delivery is currently only supported for telegram, discord, and weixin"
)
last_result = None
for chunk in chunks:
if platform == Platform.DISCORD:
result = await _send_discord(pconfig.token, chat_id, chunk, thread_id=thread_id)
elif platform == Platform.SLACK:
if platform == Platform.SLACK:
result = await _send_slack(pconfig.token, chat_id, chunk)
elif platform == Platform.WHATSAPP:
result = await _send_whatsapp(pconfig.extra, chat_id, chunk)
@@ -568,13 +586,16 @@ async def _send_telegram(token, chat_id, message, media_files=None, thread_id=No
return _error(f"Telegram send failed: {e}")
async def _send_discord(token, chat_id, message, thread_id=None):
async def _send_discord(token, chat_id, message, thread_id=None, media_files=None):
"""Send a single message via Discord REST API (no websocket client needed).
Chunking is handled by _send_to_platform() before this is called.
When thread_id is provided, the message is sent directly to that thread
via the /channels/{thread_id}/messages endpoint.
Media files are uploaded one-by-one via multipart/form-data after the
text message is sent (same pattern as Telegram).
"""
try:
import aiohttp
@@ -589,14 +610,56 @@ async def _send_discord(token, chat_id, message, thread_id=None):
url = f"https://discord.com/api/v10/channels/{thread_id}/messages"
else:
url = f"https://discord.com/api/v10/channels/{chat_id}/messages"
headers = {"Authorization": f"Bot {token}", "Content-Type": "application/json"}
auth_headers = {"Authorization": f"Bot {token}"}
media_files = media_files or []
last_data = None
warnings = []
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=30), **_sess_kw) as session:
async with session.post(url, headers=headers, json={"content": message}, **_req_kw) as resp:
if resp.status not in (200, 201):
body = await resp.text()
return _error(f"Discord API error ({resp.status}): {body}")
data = await resp.json()
return {"success": True, "platform": "discord", "chat_id": chat_id, "message_id": data.get("id")}
# Send text message (skip if empty and media is present)
if message.strip() or not media_files:
headers = {**auth_headers, "Content-Type": "application/json"}
async with session.post(url, headers=headers, json={"content": message}, **_req_kw) as resp:
if resp.status not in (200, 201):
body = await resp.text()
return _error(f"Discord API error ({resp.status}): {body}")
last_data = await resp.json()
# Send each media file as a separate multipart upload
for media_path, _is_voice in media_files:
if not os.path.exists(media_path):
warning = f"Media file not found, skipping: {media_path}"
logger.warning(warning)
warnings.append(warning)
continue
try:
form = aiohttp.FormData()
filename = os.path.basename(media_path)
with open(media_path, "rb") as f:
form.add_field("files[0]", f, filename=filename)
async with session.post(url, headers=auth_headers, data=form, **_req_kw) as resp:
if resp.status not in (200, 201):
body = await resp.text()
warning = _sanitize_error_text(f"Failed to send media {media_path}: Discord API error ({resp.status}): {body}")
logger.error(warning)
warnings.append(warning)
continue
last_data = await resp.json()
except Exception as e:
warning = _sanitize_error_text(f"Failed to send media {media_path}: {e}")
logger.error(warning)
warnings.append(warning)
if last_data is None:
error = "No deliverable text or media remained after processing"
if warnings:
return {"error": error, "warnings": warnings}
return {"error": error}
result = {"success": True, "platform": "discord", "chat_id": chat_id, "message_id": last_data.get("id")}
if warnings:
result["warnings"] = warnings
return result
except Exception as e:
return _error(f"Discord send failed: {e}")
@@ -816,7 +879,9 @@ async def _send_matrix(token, extra, chat_id, message):
if not homeserver or not token:
return {"error": "Matrix not configured (MATRIX_HOMESERVER, MATRIX_ACCESS_TOKEN required)"}
txn_id = f"hermes_{int(time.time() * 1000)}_{os.urandom(4).hex()}"
url = f"{homeserver}/_matrix/client/v3/rooms/{chat_id}/send/m.room.message/{txn_id}"
from urllib.parse import quote
encoded_room = quote(chat_id, safe="")
url = f"{homeserver}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
# Build message payload with optional HTML formatted_body.

View File

@@ -39,7 +39,7 @@ import re
import shutil
import tempfile
from pathlib import Path
from hermes_constants import get_hermes_home
from hermes_constants import get_hermes_home, display_hermes_home
from typing import Dict, Any, Optional, Tuple
logger = logging.getLogger(__name__)
@@ -655,7 +655,7 @@ SKILL_MANAGE_SCHEMA = {
"description": (
"Manage skills (create, update, delete). Skills are your procedural "
"memory — reusable approaches for recurring task types. "
"New skills go to ~/.hermes/skills/; existing skills can be modified wherever they live.\n\n"
f"New skills go to {display_hermes_home()}/skills/; existing skills can be modified wherever they live.\n\n"
"Actions: create (full SKILL.md + optional category), "
"patch (old_string/new_string — preferred for fixes), "
"edit (full SKILL.md rewrite — major overhauls only), "

View File

@@ -69,7 +69,7 @@ Usage:
import json
import logging
from hermes_constants import get_hermes_home
from hermes_constants import get_hermes_home, display_hermes_home
import os
import re
from enum import Enum
@@ -408,7 +408,7 @@ def _gateway_setup_hint() -> str:
return GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE
except Exception:
return "Secure secret entry is not available. Load this skill in the local CLI to be prompted, or add the key to ~/.hermes/.env manually."
return f"Secure secret entry is not available. Load this skill in the local CLI to be prompted, or add the key to {display_hermes_home()}/.env manually."
def _build_setup_note(
@@ -666,7 +666,7 @@ def skills_list(category: str = None, task_id: str = None) -> str:
"success": True,
"skills": [],
"categories": [],
"message": "No skills found. Skills directory created at ~/.hermes/skills/",
"message": f"No skills found. Skills directory created at {display_hermes_home()}/skills/",
},
ensure_ascii=False,
)

View File

@@ -40,6 +40,8 @@ from pathlib import Path
from typing import Callable, Dict, Any, Optional
from urllib.parse import urljoin
from hermes_constants import display_hermes_home
logger = logging.getLogger(__name__)
from tools.managed_tool_gateway import resolve_managed_tool_gateway
from tools.tool_backend_helpers import managed_nous_tools_enabled, resolve_openai_audio_api_key
@@ -1050,7 +1052,7 @@ TTS_SCHEMA = {
},
"output_path": {
"type": "string",
"description": "Optional custom file path to save the audio. Defaults to ~/.hermes/audio_cache/<timestamp>.mp3"
"description": f"Optional custom file path to save the audio. Defaults to {display_hermes_home()}/audio_cache/<timestamp>.mp3"
}
},
"required": ["text"]

View File

@@ -49,6 +49,17 @@ The OpenAI Codex provider authenticates via device code (open a URL, enter a cod
Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, web summarization, MoA) use a separate "auxiliary" model — by default Gemini Flash via OpenRouter. An `OPENROUTER_API_KEY` enables these tools automatically. You can also configure which model and provider these tools use — see [Auxiliary Models](/docs/user-guide/configuration#auxiliary-models).
:::
### Two Commands for Model Management
Hermes has **two** model commands that serve different purposes:
| Command | Where to run | What it does |
|---------|-------------|--------------|
| **`hermes model`** | Your terminal (outside any session) | Full setup wizard — add providers, run OAuth, enter API keys, configure endpoints |
| **`/model`** | Inside a Hermes chat session | Quick switch between **already-configured** providers and models |
If you're trying to switch to a provider you haven't set up yet (e.g. you only have OpenRouter configured and want to use Anthropic), you need `hermes model`, not `/model`. Exit your session first (`Ctrl+C` or `/quit`), run `hermes model`, complete the provider setup, then start a new session.
### Anthropic (Native)
Use Claude models directly through the Anthropic API — no OpenRouter proxy needed. Supports three auth methods:
@@ -252,7 +263,15 @@ Both approaches persist to `config.yaml`, which is the source of truth for model
### Switching Models with `/model`
Once a custom endpoint is configured, you can switch models mid-session:
:::warning hermes model vs /model
**`hermes model`** (run from your terminal, outside any chat session) is the **full provider setup wizard**. Use it to add new providers, run OAuth flows, enter API keys, and configure custom endpoints.
**`/model`** (typed inside an active Hermes chat session) can only **switch between providers and models you've already set up**. It cannot add new providers, run OAuth, or prompt for API keys. If you've only configured one provider (e.g. OpenRouter), `/model` will only show models for that provider.
**To add a new provider:** Exit your session (`Ctrl+C` or `/quit`), run `hermes model`, set up the new provider, then start a new session.
:::
Once you have at least one custom endpoint configured, you can switch models mid-session:
```
/model custom:qwen-2.5 # Switch to a model on your custom endpoint

View File

@@ -109,22 +109,31 @@ hermes chat --worktree -q "Review this repo and open a PR"
## `hermes model`
Interactive provider + model selector.
Interactive provider + model selector. **This is the command for adding new providers, setting up API keys, and running OAuth flows.** Run it from your terminal — not from inside an active Hermes chat session.
```bash
hermes model
```
Use this when you want to:
- switch default providers
- log into OAuth-backed providers during model selection
- **add a new provider** (OpenRouter, Anthropic, Copilot, DeepSeek, custom, etc.)
- log into OAuth-backed providers (Anthropic, Copilot, Codex, Nous Portal)
- enter or update API keys
- pick from provider-specific model lists
- configure a custom/self-hosted endpoint
- save the new default into config
:::warning hermes model vs /model — know the difference
**`hermes model`** (run from your terminal, outside any Hermes session) is the **full provider setup wizard**. It can add new providers, run OAuth flows, prompt for API keys, and configure endpoints.
**`/model`** (typed inside an active Hermes chat session) can only **switch between providers and models you've already set up**. It cannot add new providers, run OAuth, or prompt for API keys.
**If you need to add a new provider:** Exit your Hermes session first (`Ctrl+C` or `/quit`), then run `hermes model` from your terminal prompt.
:::
### `/model` slash command (mid-session)
Switch models without leaving a session:
Switch between already-configured models without leaving a session:
```
/model # Show current model and available options
@@ -136,6 +145,16 @@ Switch models without leaving a session:
/model openrouter:anthropic/claude-sonnet-4 # Switch back to cloud
```
By default, `/model` changes apply **to the current session only**. Add `--global` to persist the change to `config.yaml`:
```
/model claude-sonnet-4 --global # Switch and save as new default
```
:::info What if I only see OpenRouter models?
If you've only configured OpenRouter, `/model` will only show OpenRouter models. To add another provider (Anthropic, DeepSeek, Copilot, etc.), exit your session and run `hermes model` from the terminal.
:::
Provider and base URL changes are persisted to `config.yaml` automatically. When switching away from a custom endpoint, the stale base URL is cleared to prevent it leaking into other providers.
## `hermes gateway`

View File

@@ -187,6 +187,32 @@ curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scri
### Provider & Model Issues
#### `/model` only shows one provider / can't switch providers
**Cause:** `/model` (inside a chat session) can only switch between providers you've **already configured**. If you've only set up OpenRouter, that's all `/model` will show.
**Solution:** Exit your session and use `hermes model` from your terminal to add new providers:
```bash
# Exit the Hermes chat session first (Ctrl+C or /quit)
# Run the full provider setup wizard
hermes model
# This lets you: add providers, run OAuth, enter API keys, configure endpoints
```
After adding a new provider via `hermes model`, start a new chat session — `/model` will now show all your configured providers.
:::tip Quick reference
| Want to... | Use |
|-----------|-----|
| Add a new provider | `hermes model` (from terminal) |
| Enter/change API keys | `hermes model` (from terminal) |
| Switch model mid-session | `/model <name>` (inside session) |
| Switch to different configured provider | `/model provider:model` (inside session) |
:::
#### API key not working
**Cause:** Key is missing, expired, incorrectly set, or for the wrong provider.

View File

@@ -46,7 +46,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
| Command | Description |
|---------|-------------|
| `/config` | Show current configuration |
| `/model [model-name]` | Show or change the current model. Supports: `/model claude-sonnet-4`, `/model provider:model` (switch providers), `/model custom:model` (custom endpoint), `/model custom:name:model` (named custom provider), `/model custom` (auto-detect from endpoint). Use `--global` to persist the change to config.yaml. |
| `/model [model-name]` | Show or change the current model. Supports: `/model claude-sonnet-4`, `/model provider:model` (switch providers), `/model custom:model` (custom endpoint), `/model custom:name:model` (named custom provider), `/model custom` (auto-detect from endpoint). Use `--global` to persist the change to config.yaml. **Note:** `/model` can only switch between already-configured providers. To add a new provider, exit the session and run `hermes model` from your terminal. |
| `/provider` | Show available providers and current provider |
| `/personality` | Set a predefined personality |
| `/verbose` | Cycle tool progress display: off → new → all → verbose. Can be [enabled for messaging](#notes) via config. |
@@ -124,7 +124,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
| `/reset` | Reset conversation history. |
| `/status` | Show session info. |
| `/stop` | Kill all running background processes and interrupt the running agent. |
| `/model [provider:model]` | Show or change the model. Supports provider switches (`/model zai:glm-5`), custom endpoints (`/model custom:model`), named custom providers (`/model custom:local:qwen`), and auto-detect (`/model custom`). Use `--global` to persist the change to config.yaml. |
| `/model [provider:model]` | Show or change the model. Supports provider switches (`/model zai:glm-5`), custom endpoints (`/model custom:model`), named custom providers (`/model custom:local:qwen`), and auto-detect (`/model custom`). Use `--global` to persist the change to config.yaml. **Note:** `/model` can only switch between already-configured providers. To add a new provider or set up API keys, use `hermes model` from your terminal (outside the chat session). |
| `/provider` | Show provider availability and auth status. |
| `/personality [name]` | Set a personality overlay for the session. |
| `/fast [normal\|fast\|status]` | Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode. |