Merge branch 'main' into rewbs/tool-use-charge-to-subscription

This commit is contained in:
Ben Barclay
2026-04-02 11:00:35 +11:00
175 changed files with 18848 additions and 3772 deletions

View File

@@ -15,7 +15,7 @@ Setup::
npm install && npm start # downloads Camoufox (~300MB) on first run
# Option 2: Docker
docker run -p 9377:9377 jo-inc/camofox-browser
docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser
Then set ``CAMOFOX_URL=http://localhost:9377`` in ``~/.hermes/.env``.
"""
@@ -34,6 +34,9 @@ from typing import Any, Dict, Optional
import requests
from hermes_cli.config import load_config
from tools.browser_camofox_state import get_camofox_identity
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
@@ -42,6 +45,8 @@ logger = logging.getLogger(__name__)
_DEFAULT_TIMEOUT = 30 # seconds per HTTP request
_SNAPSHOT_MAX_CHARS = 80_000 # camofox paginates at this limit
_vnc_url: Optional[str] = None # cached from /health response
_vnc_url_checked = False # only probe once per process
def get_camofox_url() -> str:
@@ -56,16 +61,52 @@ def is_camofox_mode() -> bool:
def check_camofox_available() -> bool:
"""Verify the Camofox server is reachable."""
global _vnc_url, _vnc_url_checked
url = get_camofox_url()
if not url:
return False
try:
resp = requests.get(f"{url}/health", timeout=5)
if resp.status_code == 200 and not _vnc_url_checked:
try:
data = resp.json()
vnc_port = data.get("vncPort")
if isinstance(vnc_port, int) and 1 <= vnc_port <= 65535:
from urllib.parse import urlparse
parsed = urlparse(url)
host = parsed.hostname or "localhost"
_vnc_url = f"http://{host}:{vnc_port}"
except (ValueError, KeyError):
pass
_vnc_url_checked = True
return resp.status_code == 200
except Exception:
return False
def get_vnc_url() -> Optional[str]:
"""Return the VNC URL if the Camofox server exposes one, or None."""
if not _vnc_url_checked:
check_camofox_available()
return _vnc_url
def _managed_persistence_enabled() -> bool:
"""Return whether Hermes-managed persistence is enabled for Camofox.
When enabled, sessions use a stable profile-scoped userId so the
Camofox server can map it to a persistent browser profile directory.
When disabled (default), each session gets a random userId (ephemeral).
Controlled by ``browser.camofox.managed_persistence`` in config.yaml.
"""
try:
camofox_cfg = load_config().get("browser", {}).get("camofox", {})
except Exception:
return False
return bool(camofox_cfg.get("managed_persistence"))
# ---------------------------------------------------------------------------
# Session management
# ---------------------------------------------------------------------------
@@ -75,16 +116,31 @@ _sessions_lock = threading.Lock()
def _get_session(task_id: Optional[str]) -> Dict[str, Any]:
"""Get or create a camofox session for the given task."""
"""Get or create a camofox session for the given task.
When managed persistence is enabled, uses a deterministic userId
derived from the Hermes profile so the Camofox server can map it
to the same persistent browser profile across restarts.
"""
task_id = task_id or "default"
with _sessions_lock:
if task_id in _sessions:
return _sessions[task_id]
session = {
"user_id": f"hermes_{uuid.uuid4().hex[:10]}",
"tab_id": None,
"session_key": f"task_{task_id[:16]}",
}
if _managed_persistence_enabled():
identity = get_camofox_identity(task_id)
session = {
"user_id": identity["user_id"],
"tab_id": None,
"session_key": identity["session_key"],
"managed": True,
}
else:
session = {
"user_id": f"hermes_{uuid.uuid4().hex[:10]}",
"tab_id": None,
"session_key": f"task_{task_id[:16]}",
"managed": False,
}
_sessions[task_id] = session
return session
@@ -172,11 +228,19 @@ def camofox_navigate(url: str, task_id: Optional[str] = None) -> str:
{"userId": session["user_id"], "url": url},
timeout=60,
)
return json.dumps({
result = {
"success": True,
"url": data.get("url", url),
"title": data.get("title", ""),
})
}
vnc = get_vnc_url()
if vnc:
result["vnc_url"] = vnc
result["vnc_hint"] = (
"Browser is visible via VNC. "
"Share this link with the user so they can watch the browser live."
)
return json.dumps(result)
except requests.HTTPError as e:
return json.dumps({"success": False, "error": f"Navigation failed: {e}"})
except requests.ConnectionError:
@@ -184,7 +248,7 @@ def camofox_navigate(url: str, task_id: Optional[str] = None) -> str:
"success": False,
"error": f"Cannot connect to Camofox at {get_camofox_url()}. "
"Is the server running? Start with: npm start (in camofox-browser dir) "
"or: docker run -p 9377:9377 jo-inc/camofox-browser",
"or: docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser",
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
@@ -421,6 +485,12 @@ def camofox_vision(question: str, annotate: bool = False,
except Exception:
pass
# Redact secrets from annotation context before sending to vision LLM.
# The screenshot image itself cannot be redacted, but at least the
# text-based accessibility tree snippet won't leak secret values.
from agent.redact import redact_sensitive_text
annotation_context = redact_sensitive_text(annotation_context)
# Send to vision LLM
from agent.auxiliary_client import call_llm
@@ -436,7 +506,7 @@ def camofox_vision(question: str, annotate: bool = False,
except Exception:
_vision_timeout = 120
analysis = call_llm(
response = call_llm(
messages=[{
"role": "user",
"content": [
@@ -452,6 +522,11 @@ def camofox_vision(question: str, annotate: bool = False,
task="vision",
timeout=_vision_timeout,
)
analysis = (response.choices[0].message.content or "").strip() if response.choices else ""
# Redact secrets the vision LLM may have read from the screenshot.
from agent.redact import redact_sensitive_text
analysis = redact_sensitive_text(analysis)
return json.dumps({
"success": True,

View File

@@ -0,0 +1,47 @@
"""Hermes-managed Camofox state helpers.
Provides profile-scoped identity and state directory paths for Camofox
persistent browser profiles. When managed persistence is enabled, Hermes
sends a deterministic userId derived from the active profile so that
Camofox can map it to the same persistent browser profile directory
across restarts.
"""
from __future__ import annotations
import uuid
from pathlib import Path
from typing import Dict, Optional
from hermes_constants import get_hermes_home
CAMOFOX_STATE_DIR_NAME = "browser_auth"
CAMOFOX_STATE_SUBDIR = "camofox"
def get_camofox_state_dir() -> Path:
"""Return the profile-scoped root directory for Camofox persistence."""
return get_hermes_home() / CAMOFOX_STATE_DIR_NAME / CAMOFOX_STATE_SUBDIR
def get_camofox_identity(task_id: Optional[str] = None) -> Dict[str, str]:
"""Return the stable Hermes-managed Camofox identity for this profile.
The user identity is profile-scoped (same Hermes profile = same userId).
The session key is scoped to the logical browser task so newly created
tabs within the same profile reuse the same identity contract.
"""
scope_root = str(get_camofox_state_dir())
logical_scope = task_id or "default"
user_digest = uuid.uuid5(
uuid.NAMESPACE_URL,
f"camofox-user:{scope_root}",
).hex[:10]
session_digest = uuid.uuid5(
uuid.NAMESPACE_URL,
f"camofox-session:{scope_root}:{logical_scope}",
).hex[:16]
return {
"user_id": f"hermes_{user_digest}",
"session_key": f"task_{session_digest}",
}

View File

@@ -238,6 +238,8 @@ _PROVIDER_REGISTRY: Dict[str, type] = {
_cached_cloud_provider: Optional[CloudBrowserProvider] = None
_cloud_provider_resolved = False
_allow_private_urls_resolved = False
_cached_allow_private_urls: Optional[bool] = None
def _get_cloud_provider() -> Optional[CloudBrowserProvider]:
@@ -299,6 +301,44 @@ def _is_local_mode() -> bool:
return _get_cloud_provider() is None
def _is_local_backend() -> bool:
"""Return True when the browser runs locally (no cloud provider).
SSRF protection is only meaningful for cloud backends (Browserbase,
BrowserUse) where the agent could reach internal resources on a remote
machine. For local backends — Camofox, or the built-in headless
Chromium without a cloud provider — the user already has full terminal
and network access on the same machine, so the check adds no security
value.
"""
return _is_camofox_mode() or _get_cloud_provider() is None
def _allow_private_urls() -> bool:
"""Return whether the browser is allowed to navigate to private/internal addresses.
Reads ``config["browser"]["allow_private_urls"]`` once and caches the result
for the process lifetime. Defaults to ``False`` (SSRF protection active).
"""
global _cached_allow_private_urls, _allow_private_urls_resolved
if _allow_private_urls_resolved:
return _cached_allow_private_urls
_allow_private_urls_resolved = True
_cached_allow_private_urls = False # safe default
try:
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
config_path = hermes_home / "config.yaml"
if config_path.exists():
import yaml
with open(config_path) as f:
cfg = yaml.safe_load(f) or {}
_cached_allow_private_urls = bool(cfg.get("browser", {}).get("allow_private_urls"))
except Exception as e:
logger.debug("Could not read allow_private_urls from config: %s", e)
return _cached_allow_private_urls
def _socket_safe_tmpdir() -> str:
"""Return a short temp directory path suitable for Unix domain sockets.
@@ -1024,6 +1064,13 @@ def _extract_relevant_content(
f"Provide a concise summary focused on interactive elements and key content."
)
# Redact secrets from snapshot before sending to auxiliary LLM.
# Without this, a page displaying env vars or API keys would leak
# secrets to the extraction model before run_agent.py's general
# redaction layer ever sees the tool result.
from agent.redact import redact_sensitive_text
extraction_prompt = redact_sensitive_text(extraction_prompt)
try:
call_kwargs = {
"task": "web_extract",
@@ -1035,7 +1082,9 @@ def _extract_relevant_content(
if model:
call_kwargs["model"] = model
response = call_llm(**call_kwargs)
return (response.choices[0].message.content or "").strip() or _truncate_snapshot(snapshot_text)
extracted = (response.choices[0].message.content or "").strip() or _truncate_snapshot(snapshot_text)
# Redact any secrets the auxiliary LLM may have echoed back.
return redact_sensitive_text(extracted)
except Exception:
return _truncate_snapshot(snapshot_text)
@@ -1072,8 +1121,23 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
Returns:
JSON string with navigation result (includes stealth features info on first nav)
"""
# SSRF protection — block private/internal addresses before navigating
if not _is_safe_url(url):
# Secret exfiltration protection — block URLs that embed API keys or
# tokens in query parameters. A prompt injection could trick the agent
# into navigating to https://evil.com/steal?key=sk-ant-... to exfil secrets.
from agent.redact import _PREFIX_RE
if _PREFIX_RE.search(url):
return json.dumps({
"success": False,
"error": "Blocked: URL contains what appears to be an API key or token. "
"Secrets must not be sent in URLs.",
})
# SSRF protection — block private/internal addresses before navigating.
# Skipped for local backends (Camofox, headless Chromium without a cloud
# provider) because the agent already has full local network access via
# the terminal tool. Can also be opted out for cloud mode via
# ``browser.allow_private_urls`` in config.
if not _is_local_backend() and not _allow_private_urls() and not _is_safe_url(url):
return json.dumps({
"success": False,
"error": "Blocked: URL targets a private or internal address",
@@ -1115,7 +1179,8 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
# Post-redirect SSRF check — if the browser followed a redirect to a
# private/internal address, block the result so the model can't read
# internal content via subsequent browser_snapshot calls.
if final_url and final_url != url and not _is_safe_url(final_url):
# Skipped for local backends (same rationale as the pre-nav check).
if not _is_local_backend() and not _allow_private_urls() and final_url and final_url != url and not _is_safe_url(final_url):
# Navigate away to a blank page to prevent snapshot leaks
_run_browser_command(effective_task_id, "open", ["about:blank"], timeout=10)
return json.dumps({
@@ -1711,6 +1776,9 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
response = call_llm(**call_kwargs)
analysis = (response.choices[0].message.content or "").strip()
# Redact secrets the vision LLM may have read from the screenshot.
from agent.redact import redact_sensitive_text
analysis = redact_sensitive_text(analysis)
response_data = {
"success": True,
"analysis": analysis or "Vision analysis returned no content.",

View File

@@ -596,6 +596,14 @@ def execute_code(
stdout_text = strip_ansi(stdout_text)
stderr_text = strip_ansi(stderr_text)
# Redact secrets (API keys, tokens, etc.) from sandbox output.
# The sandbox env-var filter (lines 434-454) blocks os.environ access,
# but scripts can still read secrets from disk (e.g. open('~/.hermes/.env')).
# This ensures leaked secrets never enter the model context.
from agent.redact import redact_sensitive_text
stdout_text = redact_sensitive_text(stdout_text)
stderr_text = redact_sensitive_text(stderr_text)
# Build response
result: Dict[str, Any] = {
"status": status,

View File

@@ -55,16 +55,47 @@ def register_credential_file(
*relative_path* is relative to ``HERMES_HOME`` (e.g. ``google_token.json``).
Returns True if the file exists on the host and was registered.
Security: rejects absolute paths and path traversal sequences (``..``).
The resolved host path must remain inside HERMES_HOME so that a malicious
skill cannot declare ``required_credential_files: ['../../.ssh/id_rsa']``
and exfiltrate sensitive host files into a container sandbox.
"""
hermes_home = _resolve_hermes_home()
# Reject absolute paths — they bypass the HERMES_HOME sandbox entirely.
if os.path.isabs(relative_path):
logger.warning(
"credential_files: rejected absolute path %r (must be relative to HERMES_HOME)",
relative_path,
)
return False
host_path = hermes_home / relative_path
if not host_path.is_file():
logger.debug("credential_files: skipping %s (not found)", host_path)
# Resolve symlinks and normalise ``..`` before the containment check so
# that traversal like ``../. ssh/id_rsa`` cannot escape HERMES_HOME.
try:
resolved = host_path.resolve()
hermes_home_resolved = hermes_home.resolve()
resolved.relative_to(hermes_home_resolved) # raises ValueError if outside
except ValueError:
logger.warning(
"credential_files: rejected path traversal %r "
"(resolves to %s, outside HERMES_HOME %s)",
relative_path,
resolved,
hermes_home_resolved,
)
return False
if not resolved.is_file():
logger.debug("credential_files: skipping %s (not found)", resolved)
return False
container_path = f"{container_base.rstrip('/')}/{relative_path}"
_registered_files[container_path] = str(host_path)
logger.debug("credential_files: registered %s -> %s", host_path, container_path)
_registered_files[container_path] = str(resolved)
logger.debug("credential_files: registered %s -> %s", resolved, container_path)
return True
@@ -110,11 +141,27 @@ def _load_config_files() -> List[Dict[str, str]]:
cfg = yaml.safe_load(f) or {}
cred_files = cfg.get("terminal", {}).get("credential_files")
if isinstance(cred_files, list):
hermes_home_resolved = hermes_home.resolve()
for item in cred_files:
if isinstance(item, str) and item.strip():
host_path = hermes_home / item.strip()
rel = item.strip()
if os.path.isabs(rel):
logger.warning(
"credential_files: rejected absolute config path %r", rel,
)
continue
host_path = (hermes_home / rel).resolve()
try:
host_path.relative_to(hermes_home_resolved)
except ValueError:
logger.warning(
"credential_files: rejected config path traversal %r "
"(resolves to %s, outside HERMES_HOME %s)",
rel, host_path, hermes_home_resolved,
)
continue
if host_path.is_file():
container_path = f"/root/.hermes/{item.strip()}"
container_path = f"/root/.hermes/{rel}"
result.append({
"host_path": str(host_path),
"container_path": container_path,

View File

@@ -71,6 +71,9 @@ WRITE_DENIED_PREFIXES = [
os.path.join(_HOME, ".kube"),
"/etc/sudoers.d",
"/etc/systemd",
os.path.join(_HOME, ".docker"),
os.path.join(_HOME, ".azure"),
os.path.join(_HOME, ".config", "gh"),
]
]

View File

@@ -15,6 +15,80 @@ logger = logging.getLogger(__name__)
_EXPECTED_WRITE_ERRNOS = {errno.EACCES, errno.EPERM, errno.EROFS}
# ---------------------------------------------------------------------------
# Read-size guard: cap the character count returned to the model.
# We're model-agnostic so we can't count tokens; characters are a safe proxy.
# 100K chars ≈ 2535K tokens across typical tokenisers. Files larger than
# this in a single read are a context-window hazard — the model should use
# offset+limit to read the relevant section.
#
# Configurable via config.yaml: file_read_max_chars: 200000
# ---------------------------------------------------------------------------
_DEFAULT_MAX_READ_CHARS = 100_000
_max_read_chars_cached: int | None = None
def _get_max_read_chars() -> int:
"""Return the configured max characters per file read.
Reads ``file_read_max_chars`` from config.yaml on first call, caches
the result for the lifetime of the process. Falls back to the
built-in default if the config is missing or invalid.
"""
global _max_read_chars_cached
if _max_read_chars_cached is not None:
return _max_read_chars_cached
try:
from hermes_cli.config import load_config
cfg = load_config()
val = cfg.get("file_read_max_chars")
if isinstance(val, (int, float)) and val > 0:
_max_read_chars_cached = int(val)
return _max_read_chars_cached
except Exception:
pass
_max_read_chars_cached = _DEFAULT_MAX_READ_CHARS
return _max_read_chars_cached
# If the total file size exceeds this AND the caller didn't specify a narrow
# range (limit <= 200), we include a hint encouraging targeted reads.
_LARGE_FILE_HINT_BYTES = 512_000 # 512 KB
# ---------------------------------------------------------------------------
# Device path blocklist — reading these hangs the process (infinite output
# or blocking on input). Checked by path only (no I/O).
# ---------------------------------------------------------------------------
_BLOCKED_DEVICE_PATHS = frozenset({
# Infinite output — never reach EOF
"/dev/zero", "/dev/random", "/dev/urandom", "/dev/full",
# Blocks waiting for input
"/dev/stdin", "/dev/tty", "/dev/console",
# Nonsensical to read
"/dev/stdout", "/dev/stderr",
# fd aliases
"/dev/fd/0", "/dev/fd/1", "/dev/fd/2",
})
def _is_blocked_device(filepath: str) -> bool:
"""Return True if the path would hang the process (infinite output or blocking input).
Uses the *literal* path — no symlink resolution — because the model
specifies paths directly and realpath follows symlinks all the way
through (e.g. /dev/stdin → /proc/self/fd/0 → /dev/pts/0), defeating
the check.
"""
normalized = os.path.expanduser(filepath)
if normalized in _BLOCKED_DEVICE_PATHS:
return True
# /proc/self/fd/0-2 and /proc/<pid>/fd/0-2 are Linux aliases for stdio
if normalized.startswith("/proc/") and normalized.endswith(
("/fd/0", "/fd/1", "/fd/2")
):
return True
return False
# Paths that file tools should refuse to write to without going through the
# terminal tool's approval system. These match prefixes after os.path.realpath.
_SENSITIVE_PATH_PREFIXES = ("/etc/", "/boot/", "/usr/lib/systemd/")
@@ -53,11 +127,21 @@ def _is_expected_write_exception(exc: Exception) -> bool:
_file_ops_lock = threading.Lock()
_file_ops_cache: dict = {}
# Track files read per task to detect re-read loops after context compression.
# Track files read per task to detect re-read loops and deduplicate reads.
# Per task_id we store:
# "last_key": the key of the most recent read/search call (or None)
# "consecutive": how many times that exact call has been repeated in a row
# "read_history": set of (path, offset, limit) tuples for get_read_files_summary
# "dedup": dict mapping (resolved_path, offset, limit) → mtime float
# Used to skip re-reads of unchanged files. Reset on
# context compression (the original content is summarised
# away so the model needs the full content again).
# "read_timestamps": dict mapping resolved_path → modification-time float
# recorded when the file was last read (or written) by
# this task. Used by write_file and patch to detect
# external changes between the agent's read and write.
# Updated after successful writes so consecutive edits
# by the same task don't trigger false warnings.
_read_tracker_lock = threading.Lock()
_read_tracker: dict = {}
@@ -195,8 +279,19 @@ def clear_file_ops_cache(task_id: str = None):
def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = "default") -> str:
"""Read a file with pagination and line numbers."""
try:
# Security: block direct reads of internal Hermes cache/index files
# to prevent prompt injection via catalog or hub metadata files.
# ── Device path guard ─────────────────────────────────────────
# Block paths that would hang the process (infinite output,
# blocking on input). Pure path check — no I/O.
if _is_blocked_device(path):
return json.dumps({
"error": (
f"Cannot read '{path}': this is a device file that would "
"block or produce infinite output."
),
})
# ── Hermes internal path guard ────────────────────────────────
# Prevent prompt injection via catalog or hub metadata files.
import pathlib as _pathlib
from hermes_constants import get_hermes_home as _get_hh
_resolved = _pathlib.Path(path).expanduser().resolve()
@@ -217,20 +312,83 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
})
except ValueError:
pass
# ── Dedup check ───────────────────────────────────────────────
# If we already read this exact (path, offset, limit) and the
# file hasn't been modified since, return a lightweight stub
# instead of re-sending the same content. Saves context tokens.
resolved_str = str(_resolved)
dedup_key = (resolved_str, offset, limit)
with _read_tracker_lock:
task_data = _read_tracker.setdefault(task_id, {
"last_key": None, "consecutive": 0,
"read_history": set(), "dedup": {},
})
cached_mtime = task_data.get("dedup", {}).get(dedup_key)
if cached_mtime is not None:
try:
current_mtime = os.path.getmtime(resolved_str)
if current_mtime == cached_mtime:
return json.dumps({
"content": (
"File unchanged since last read. The content from "
"the earlier read_file result in this conversation is "
"still current — refer to that instead of re-reading."
),
"path": path,
"dedup": True,
}, ensure_ascii=False)
except OSError:
pass # stat failed — fall through to full read
# ── Perform the read ──────────────────────────────────────────
file_ops = _get_file_ops(task_id)
result = file_ops.read_file(path, offset, limit)
if result.content:
result.content = redact_sensitive_text(result.content)
result_dict = result.to_dict()
# Track reads to detect *consecutive* re-read loops.
# The counter resets whenever any other tool is called in between,
# so only truly back-to-back identical reads trigger warnings/blocks.
# ── Character-count guard ─────────────────────────────────────
# We're model-agnostic so we can't count tokens; characters are
# the best proxy we have. If the read produced an unreasonable
# amount of content, reject it and tell the model to narrow down.
# Note: we check the formatted content (with line-number prefixes),
# not the raw file size, because that's what actually enters context.
content_len = len(result.content or "")
file_size = result_dict.get("file_size", 0)
max_chars = _get_max_read_chars()
if content_len > max_chars:
total_lines = result_dict.get("total_lines", "unknown")
return json.dumps({
"error": (
f"Read produced {content_len:,} characters which exceeds "
f"the safety limit ({max_chars:,} chars). "
"Use offset and limit to read a smaller range. "
f"The file has {total_lines} lines total."
),
"path": path,
"total_lines": total_lines,
"file_size": file_size,
}, ensure_ascii=False)
# Large-file hint: if the file is big and the caller didn't ask
# for a narrow window, nudge toward targeted reads.
if (file_size and file_size > _LARGE_FILE_HINT_BYTES
and limit > 200
and result_dict.get("truncated")):
result_dict.setdefault("_hint", (
f"This file is large ({file_size:,} bytes). "
"Consider reading only the section you need with offset and limit "
"to keep context usage efficient."
))
# ── Track for consecutive-loop detection ──────────────────────
read_key = ("read", path, offset, limit)
with _read_tracker_lock:
task_data = _read_tracker.setdefault(task_id, {
"last_key": None, "consecutive": 0, "read_history": set(),
})
# Ensure "dedup" key exists (backward compat with old tracker state)
if "dedup" not in task_data:
task_data["dedup"] = {}
task_data["read_history"].add((path, offset, limit))
if task_data["last_key"] == read_key:
task_data["consecutive"] += 1
@@ -239,6 +397,17 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
task_data["consecutive"] = 1
count = task_data["consecutive"]
# Store mtime at read time for two purposes:
# 1. Dedup: skip identical re-reads of unchanged files.
# 2. Staleness: warn on write/patch if the file changed since
# the agent last read it (external edit, concurrent agent, etc.).
try:
_mtime_now = os.path.getmtime(resolved_str)
task_data["dedup"][dedup_key] = _mtime_now
task_data.setdefault("read_timestamps", {})[resolved_str] = _mtime_now
except OSError:
pass # Can't stat — skip tracking for this entry
if count >= 4:
# Hard block: stop returning content to break the loop
return json.dumps({
@@ -296,6 +465,28 @@ def clear_read_tracker(task_id: str = None):
_read_tracker.clear()
def reset_file_dedup(task_id: str = None):
"""Clear the deduplication cache for file reads.
Called after context compression — the original read content has been
summarised away, so the model needs the full content if it reads the
same file again. Without this, reads after compression would return
a "file unchanged" stub pointing at content that no longer exists in
context.
Call with a task_id to clear just that task, or without to clear all.
"""
with _read_tracker_lock:
if task_id:
task_data = _read_tracker.get(task_id)
if task_data and "dedup" in task_data:
task_data["dedup"].clear()
else:
for task_data in _read_tracker.values():
if "dedup" in task_data:
task_data["dedup"].clear()
def notify_other_tool_call(task_id: str = "default"):
"""Reset consecutive read/search counter for a task.
@@ -312,15 +503,71 @@ def notify_other_tool_call(task_id: str = "default"):
task_data["consecutive"] = 0
def _update_read_timestamp(filepath: str, task_id: str) -> None:
"""Record the file's current modification time after a successful write.
Called after write_file and patch so that consecutive edits by the
same task don't trigger false staleness warnings — each write
refreshes the stored timestamp to match the file's new state.
"""
try:
resolved = str(Path(filepath).expanduser().resolve())
current_mtime = os.path.getmtime(resolved)
except (OSError, ValueError):
return
with _read_tracker_lock:
task_data = _read_tracker.get(task_id)
if task_data is not None:
task_data.setdefault("read_timestamps", {})[resolved] = current_mtime
def _check_file_staleness(filepath: str, task_id: str) -> str | None:
"""Check whether a file was modified since the agent last read it.
Returns a warning string if the file is stale (mtime changed since
the last read_file call for this task), or None if the file is fresh
or was never read. Does not block — the write still proceeds.
"""
try:
resolved = str(Path(filepath).expanduser().resolve())
except (OSError, ValueError):
return None
with _read_tracker_lock:
task_data = _read_tracker.get(task_id)
if not task_data:
return None
read_mtime = task_data.get("read_timestamps", {}).get(resolved)
if read_mtime is None:
return None # File was never read — nothing to compare against
try:
current_mtime = os.path.getmtime(resolved)
except OSError:
return None # Can't stat — file may have been deleted, let write handle it
if current_mtime != read_mtime:
return (
f"Warning: {filepath} was modified since you last read it "
"(external edit or concurrent agent). The content you read may be "
"stale. Consider re-reading the file to verify before writing."
)
return None
def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
"""Write content to a file."""
sensitive_err = _check_sensitive_path(path)
if sensitive_err:
return json.dumps({"error": sensitive_err}, ensure_ascii=False)
try:
stale_warning = _check_file_staleness(path, task_id)
file_ops = _get_file_ops(task_id)
result = file_ops.write_file(path, content)
return json.dumps(result.to_dict(), ensure_ascii=False)
result_dict = result.to_dict()
if stale_warning:
result_dict["_warning"] = stale_warning
# Refresh the stored timestamp so consecutive writes by this
# task don't trigger false staleness warnings.
_update_read_timestamp(path, task_id)
return json.dumps(result_dict, ensure_ascii=False)
except Exception as e:
if _is_expected_write_exception(e):
logger.debug("write_file expected denial: %s: %s", type(e).__name__, e)
@@ -346,6 +593,13 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
if sensitive_err:
return json.dumps({"error": sensitive_err}, ensure_ascii=False)
try:
# Check staleness for all files this patch will touch.
stale_warnings = []
for _p in _paths_to_check:
_sw = _check_file_staleness(_p, task_id)
if _sw:
stale_warnings.append(_sw)
file_ops = _get_file_ops(task_id)
if mode == "replace":
@@ -362,6 +616,13 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
return json.dumps({"error": f"Unknown mode: {mode}"})
result_dict = result.to_dict()
if stale_warnings:
result_dict["_warning"] = stale_warnings[0] if len(stale_warnings) == 1 else " | ".join(stale_warnings)
# Refresh stored timestamps for all successfully-patched paths so
# consecutive edits by this task don't trigger false warnings.
if not result_dict.get("error"):
for _p in _paths_to_check:
_update_read_timestamp(_p, task_id)
result_json = json.dumps(result_dict, ensure_ascii=False)
# Hint when old_string not found — saves iterations where the agent
# retries with stale content instead of re-reading the file.
@@ -466,7 +727,7 @@ def _check_file_reqs():
READ_FILE_SCHEMA = {
"name": "read_file",
"description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. Use offset and limit for large files. NOTE: Cannot read images or binary files — use vision_analyze for images.",
"description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. Use offset and limit for large files. Reads exceeding ~100K characters are rejected; use offset and limit to read specific sections of large files. NOTE: Cannot read images or binary files — use vision_analyze for images.",
"parameters": {
"type": "object",
"properties": {

View File

@@ -82,6 +82,8 @@ SKILLS_DIR = HERMES_HOME / "skills"
MAX_NAME_LENGTH = 64
MAX_DESCRIPTION_LENGTH = 1024
MAX_SKILL_CONTENT_CHARS = 100_000 # ~36k tokens at 2.75 chars/token
MAX_SKILL_FILE_BYTES = 1_048_576 # 1 MiB per supporting file
# Characters allowed in skill names (filesystem-safe, URL-friendly)
VALID_NAME_RE = re.compile(r'^[a-z0-9][a-z0-9._-]*$')
@@ -177,6 +179,21 @@ def _validate_frontmatter(content: str) -> Optional[str]:
return None
def _validate_content_size(content: str, label: str = "SKILL.md") -> Optional[str]:
"""Check that content doesn't exceed the character limit for agent writes.
Returns an error message or None if within bounds.
"""
if len(content) > MAX_SKILL_CONTENT_CHARS:
return (
f"{label} content is {len(content):,} characters "
f"(limit: {MAX_SKILL_CONTENT_CHARS:,}). "
f"Consider splitting into a smaller SKILL.md with supporting files "
f"in references/ or templates/."
)
return None
def _resolve_skill_dir(name: str, category: str = None) -> Path:
"""Build the directory path for a new skill, optionally under a category."""
if category:
@@ -275,6 +292,10 @@ def _create_skill(name: str, content: str, category: str = None) -> Dict[str, An
if err:
return {"success": False, "error": err}
err = _validate_content_size(content)
if err:
return {"success": False, "error": err}
# Check for name collisions across all directories
existing = _find_skill(name)
if existing:
@@ -318,6 +339,10 @@ def _edit_skill(name: str, content: str) -> Dict[str, Any]:
if err:
return {"success": False, "error": err}
err = _validate_content_size(content)
if err:
return {"success": False, "error": err}
existing = _find_skill(name)
if not existing:
return {"success": False, "error": f"Skill '{name}' not found. Use skills_list() to see available skills."}
@@ -379,27 +404,29 @@ def _patch_skill(
content = target.read_text(encoding="utf-8")
count = content.count(old_string)
if count == 0:
# Use the same fuzzy matching engine as the file patch tool.
# This handles whitespace normalization, indentation differences,
# escape sequences, and block-anchor matching — saving the agent
# from exact-match failures on minor formatting mismatches.
from tools.fuzzy_match import fuzzy_find_and_replace
new_content, match_count, match_error = fuzzy_find_and_replace(
content, old_string, new_string, replace_all
)
if match_error:
# Show a short preview of the file so the model can self-correct
preview = content[:500] + ("..." if len(content) > 500 else "")
return {
"success": False,
"error": "old_string not found in the file.",
"error": match_error,
"file_preview": preview,
}
if count > 1 and not replace_all:
return {
"success": False,
"error": (
f"old_string matched {count} times. Provide more surrounding context "
f"to make the match unique, or set replace_all=true to replace all occurrences."
),
"match_count": count,
}
new_content = content.replace(old_string, new_string) if replace_all else content.replace(old_string, new_string, 1)
# Check size limit on the result
target_label = "SKILL.md" if not file_path else file_path
err = _validate_content_size(new_content, label=target_label)
if err:
return {"success": False, "error": err}
# If patching SKILL.md, validate frontmatter is still intact
if not file_path:
@@ -419,10 +446,9 @@ def _patch_skill(
_atomic_write_text(target, original_content)
return {"success": False, "error": scan_error}
replacements = count if replace_all else 1
return {
"success": True,
"message": f"Patched {'SKILL.md' if not file_path else file_path} in skill '{name}' ({replacements} replacement{'s' if replacements > 1 else ''}).",
"message": f"Patched {'SKILL.md' if not file_path else file_path} in skill '{name}' ({match_count} replacement{'s' if match_count > 1 else ''}).",
}
@@ -455,6 +481,21 @@ def _write_file(name: str, file_path: str, file_content: str) -> Dict[str, Any]:
if not file_content and file_content != "":
return {"success": False, "error": "file_content is required."}
# Check size limits
content_bytes = len(file_content.encode("utf-8"))
if content_bytes > MAX_SKILL_FILE_BYTES:
return {
"success": False,
"error": (
f"File content is {content_bytes:,} bytes "
f"(limit: {MAX_SKILL_FILE_BYTES:,} bytes / 1 MiB). "
f"Consider splitting into smaller files."
),
}
err = _validate_content_size(file_content, label=file_path)
if err:
return {"success": False, "error": err}
existing = _find_skill(name)
if not existing:
return {"success": False, "error": f"Skill '{name}' not found. Create it first with action='create'."}

View File

@@ -2115,7 +2115,11 @@ class OptionalSkillSource(SkillSource):
"""
def __init__(self):
self._optional_dir = Path(__file__).parent.parent / "optional-skills"
from hermes_constants import get_optional_skills_dir
self._optional_dir = get_optional_skills_dir(
Path(__file__).parent.parent / "optional-skills"
)
def source_id(self) -> str:
return "official"
@@ -2521,6 +2525,22 @@ def install_from_quarantine(
if install_dir.exists():
shutil.rmtree(install_dir)
# Warn (but don't block) if SKILL.md is very large
skill_md = quarantine_path / "SKILL.md"
if skill_md.exists():
try:
skill_size = skill_md.stat().st_size
if skill_size > 100_000:
logger.warning(
"Skill '%s' has a large SKILL.md (%s chars). "
"Large skills consume significant context when loaded. "
"Consider asking the author to split it into smaller files.",
safe_skill_name,
f"{skill_size:,}",
)
except OSError:
pass
install_dir.parent.mkdir(parents=True, exist_ok=True)
shutil.move(str(quarantine_path), str(install_dir))

View File

@@ -51,9 +51,12 @@ def _audio_available() -> bool:
def detect_audio_environment() -> dict:
"""Detect if the current environment supports audio I/O.
Returns dict with 'available' (bool) and 'warnings' (list of strings).
Returns dict with 'available' (bool), 'warnings' (list of hard-fail
reasons that block voice mode), and 'notices' (list of informational
messages that do NOT block voice mode).
"""
warnings = []
warnings = [] # hard-fail: these block voice mode
notices = [] # informational: logged but don't block
# SSH detection
if any(os.environ.get(v) for v in ('SSH_CLIENT', 'SSH_TTY', 'SSH_CONNECTION')):
@@ -63,11 +66,20 @@ def detect_audio_environment() -> dict:
if os.path.exists('/.dockerenv'):
warnings.append("Running inside Docker container -- no audio devices")
# WSL detection
# WSL detection — PulseAudio bridge makes audio work in WSL.
# Only block if PULSE_SERVER is not configured.
try:
with open('/proc/version', 'r') as f:
if 'microsoft' in f.read().lower():
warnings.append("Running in WSL -- audio requires PulseAudio bridge to Windows")
if os.environ.get('PULSE_SERVER'):
notices.append("Running in WSL with PulseAudio bridge")
else:
warnings.append(
"Running in WSL -- audio requires PulseAudio bridge.\n"
" 1. Set PULSE_SERVER=unix:/mnt/wslg/PulseServer\n"
" 2. Create ~/.asoundrc pointing ALSA at PulseAudio\n"
" 3. Verify with: arecord -d 3 /tmp/test.wav && aplay /tmp/test.wav"
)
except (FileNotFoundError, PermissionError, OSError):
pass
@@ -79,7 +91,12 @@ def detect_audio_environment() -> dict:
if not devices:
warnings.append("No audio input/output devices detected")
except Exception:
warnings.append("Audio subsystem error (PortAudio cannot query devices)")
# In WSL with PulseAudio, device queries can fail even though
# recording/playback works fine. Don't block if PULSE_SERVER is set.
if os.environ.get('PULSE_SERVER'):
notices.append("Audio device query failed but PULSE_SERVER is set -- continuing")
else:
warnings.append("Audio subsystem error (PortAudio cannot query devices)")
except ImportError:
warnings.append("Audio libraries not installed (pip install sounddevice numpy)")
except OSError:
@@ -93,6 +110,7 @@ def detect_audio_environment() -> dict:
return {
"available": len(warnings) == 0,
"warnings": warnings,
"notices": notices,
}
# ---------------------------------------------------------------------------
@@ -748,6 +766,8 @@ def check_voice_requirements() -> Dict[str, Any]:
for warning in env_check["warnings"]:
details_parts.append(f"Environment: {warning}")
for notice in env_check.get("notices", []):
details_parts.append(f"Environment: {notice}")
return {
"available": available,

View File

@@ -1130,24 +1130,26 @@ def web_search_tool(query: str, limit: int = 5) -> str:
async def web_extract_tool(
urls: List[str],
format: str = None,
urls: List[str],
format: str = None,
use_llm_processing: bool = True,
model: Optional[str] = None,
min_length: int = DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION
) -> str:
"""
Extract content from specific web pages using available extraction API backend.
This function provides a generic interface for web content extraction that
can work with multiple backends. Currently uses Firecrawl.
Args:
urls (List[str]): List of URLs to extract content from
format (str): Desired output format ("markdown" or "html", optional)
use_llm_processing (bool): Whether to process content with LLM for summarization (default: True)
model (Optional[str]): The model to use for LLM processing (defaults to current auxiliary backend model)
min_length (int): Minimum content length to trigger LLM processing (default: 5000)
Security: URLs are checked for embedded secrets before fetching.
Returns:
str: JSON string containing extracted content. If LLM processing is enabled and successful,
@@ -1156,6 +1158,16 @@ async def web_extract_tool(
Raises:
Exception: If extraction fails or API key is not set
"""
# Block URLs containing embedded secrets (exfiltration prevention)
from agent.redact import _PREFIX_RE
for _url in urls:
if _PREFIX_RE.search(_url):
return json.dumps({
"success": False,
"error": "Blocked: URL contains what appears to be an API key or token. "
"Secrets must not be sent in URLs.",
})
debug_call_data = {
"parameters": {
"urls": urls,