Merge branch 'main' into rewbs/tool-use-charge-to-subscription

2026-03-31 08:48:54 +09:00
parent 1cbb1b99cc ce2841f3c9
commit 6e4598ce1e
269 changed files with 33678 additions and 2273 deletions
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -35,6 +35,54 @@ ADAPTIVE_EFFORT_MAP = {
    "minimal": "low",
 }

+# ── Max output token limits per Anthropic model ───────────────────────
+# Source: Anthropic docs + Cline model catalog.  Anthropic's API requires
+# max_tokens as a mandatory field.  Previously we hardcoded 16384, which
+# starves thinking-enabled models (thinking tokens count toward the limit).
+_ANTHROPIC_OUTPUT_LIMITS = {
+    # Claude 4.6
+    "claude-opus-4-6":   128_000,
+    "claude-sonnet-4-6":  64_000,
+    # Claude 4.5
+    "claude-opus-4-5":    64_000,
+    "claude-sonnet-4-5":  64_000,
+    "claude-haiku-4-5":   64_000,
+    # Claude 4
+    "claude-opus-4":      32_000,
+    "claude-sonnet-4":    64_000,
+    # Claude 3.7
+    "claude-3-7-sonnet": 128_000,
+    # Claude 3.5
+    "claude-3-5-sonnet":   8_192,
+    "claude-3-5-haiku":    8_192,
+    # Claude 3
+    "claude-3-opus":       4_096,
+    "claude-3-sonnet":     4_096,
+    "claude-3-haiku":      4_096,
+}
+
+# For any model not in the table, assume the highest current limit.
+# Future Anthropic models are unlikely to have *less* output capacity.
+_ANTHROPIC_DEFAULT_OUTPUT_LIMIT = 128_000
+
+
+def _get_anthropic_max_output(model: str) -> int:
+    """Look up the max output token limit for an Anthropic model.
+
+    Uses substring matching against _ANTHROPIC_OUTPUT_LIMITS so date-stamped
+    model IDs (claude-sonnet-4-5-20250929) and variant suffixes (:1m, :fast)
+    resolve correctly.  Longest-prefix match wins to avoid e.g. "claude-3-5"
+    matching before "claude-3-5-sonnet".
+    """
+    m = model.lower()
+    best_key = ""
+    best_val = _ANTHROPIC_DEFAULT_OUTPUT_LIMIT
+    for key, val in _ANTHROPIC_OUTPUT_LIMITS.items():
+        if key in m and len(key) > len(best_key):
+            best_key = key
+            best_val = val
+    return best_val
+

 def _supports_adaptive_thinking(model: str) -> bool:
    """Return True for Claude 4.6 models that support adaptive thinking."""
@@ -59,6 +107,7 @@ _OAUTH_ONLY_BETAS = [
 # The version must stay reasonably current — Anthropic rejects OAuth requests
 # when the spoofed user-agent version is too far behind the actual release.
 _CLAUDE_CODE_VERSION_FALLBACK = "2.1.74"
+_claude_code_version_cache: Optional[str] = None


 def _detect_claude_code_version() -> str:
@@ -86,11 +135,18 @@ def _detect_claude_code_version() -> str:
    return _CLAUDE_CODE_VERSION_FALLBACK


-_CLAUDE_CODE_VERSION = _detect_claude_code_version()
 _CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude."
 _MCP_TOOL_PREFIX = "mcp_"


+def _get_claude_code_version() -> str:
+    """Lazily detect the installed Claude Code version when OAuth headers need it."""
+    global _claude_code_version_cache
+    if _claude_code_version_cache is None:
+        _claude_code_version_cache = _detect_claude_code_version()
+    return _claude_code_version_cache
+
+
 def _is_oauth_token(key: str) -> bool:
    """Check if the key is an OAuth/setup token (not a regular Console API key).

@@ -132,7 +188,7 @@ def build_anthropic_client(api_key: str, base_url: str = None):
        kwargs["auth_token"] = api_key
        kwargs["default_headers"] = {
            "anthropic-beta": ",".join(all_betas),
-            "user-agent": f"claude-cli/{_CLAUDE_CODE_VERSION} (external, cli)",
+            "user-agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
            "x-app": "cli",
        }
    else:
@@ -241,7 +297,7 @@ def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]:

    headers = {
        "Content-Type": "application/json",
-        "User-Agent": f"claude-cli/{_CLAUDE_CODE_VERSION} (external, cli)",
+        "User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
    }

    for endpoint in token_endpoints:
@@ -706,14 +762,21 @@ def convert_messages_to_anthropic(
                result.append({"role": "user", "content": [tool_result]})
            continue

-        # Regular user message
+        # Regular user message — validate non-empty content (Anthropic rejects empty)
        if isinstance(content, list):
            converted_blocks = _convert_content_to_anthropic(content)
-            result.append({
-                "role": "user",
-                "content": converted_blocks or [{"type": "text", "text": ""}],
-            })
+            # Check if all text blocks are empty
+            if not converted_blocks or all(
+                b.get("text", "").strip() == ""
+                for b in converted_blocks
+                if isinstance(b, dict) and b.get("type") == "text"
+            ):
+                converted_blocks = [{"type": "text", "text": "(empty message)"}]
+            result.append({"role": "user", "content": converted_blocks})
        else:
+            # Validate string content is non-empty
+            if not content or (isinstance(content, str) and not content.strip()):
+                content = "(empty message)"
            result.append({"role": "user", "content": content})

    # Strip orphaned tool_use blocks (no matching tool_result follows)
@@ -803,9 +866,15 @@ def build_anthropic_kwargs(
    tool_choice: Optional[str] = None,
    is_oauth: bool = False,
    preserve_dots: bool = False,
+    context_length: Optional[int] = None,
 ) -> Dict[str, Any]:
    """Build kwargs for anthropic.messages.create().

+    When *max_tokens* is None, the model's native output limit is used
+    (e.g. 128K for Opus 4.6, 64K for Sonnet 4.6).  If *context_length*
+    is provided, the effective limit is clamped so it doesn't exceed
+    the context window.
+
    When *is_oauth* is True, applies Claude Code compatibility transforms:
    system prompt prefix, tool name prefixing, and prompt sanitization.

@@ -816,7 +885,12 @@ def build_anthropic_kwargs(
    anthropic_tools = convert_tools_to_anthropic(tools) if tools else []

    model = normalize_model_name(model, preserve_dots=preserve_dots)
-    effective_max_tokens = max_tokens or 16384
+    effective_max_tokens = max_tokens or _get_anthropic_max_output(model)
+
+    # Clamp to context window if the user set a lower context_length
+    # (e.g. custom endpoint with limited capacity).
+    if context_length and effective_max_tokens > context_length:
+        effective_max_tokens = max(context_length - 1, 1)

    # ── OAuth: Claude Code identity ──────────────────────────────────
    if is_oauth:
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -47,8 +47,7 @@ from typing import Any, Dict, List, Optional, Tuple

 from openai import OpenAI

-from hermes_cli.config import get_hermes_home
-from hermes_constants import OPENROUTER_BASE_URL
+from hermes_constants import OPENROUTER_BASE_URL, get_hermes_home

 logger = logging.getLogger(__name__)

@@ -627,8 +626,6 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]:
    custom_key = runtime.get("api_key")
    if not isinstance(custom_base, str) or not custom_base.strip():
        return None, None
-    if not isinstance(custom_key, str) or not custom_key.strip():
-        return None, None

    custom_base = custom_base.strip().rstrip("/")
    if "openrouter.ai" in custom_base.lower():
@@ -636,6 +633,13 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]:
        # configured. Treat that as "no custom endpoint" for auxiliary routing.
        return None, None

+    # Local servers (Ollama, llama.cpp, vLLM, LM Studio) don't require auth.
+    # Use a placeholder key — the OpenAI SDK requires a non-empty string but
+    # local servers ignore the Authorization header.  Same fix as cli.py
+    # _ensure_runtime_credentials() (PR #2556).
+    if not isinstance(custom_key, str) or not custom_key.strip():
+        custom_key = "no-key-required"
+
    return custom_base, custom_key.strip()


@@ -693,7 +697,13 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
    is_oauth = _is_oauth_token(token)
    model = _API_KEY_PROVIDER_AUX_MODELS.get("anthropic", "claude-haiku-4-5-20251001")
    logger.debug("Auxiliary client: Anthropic native (%s) at %s (oauth=%s)", model, base_url, is_oauth)
-    real_client = build_anthropic_client(token, base_url)
+    try:
+        real_client = build_anthropic_client(token, base_url)
+    except ImportError:
+        # The anthropic_adapter module imports fine but the SDK itself is
+        # missing — build_anthropic_client raises ImportError at call time
+        # when _anthropic_sdk is None.  Treat as unavailable.
+        return None, None
    return AnthropicAuxiliaryClient(real_client, model, token, base_url, is_oauth=is_oauth), model


@@ -731,16 +741,37 @@ def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[st
    return None, None


+_AUTO_PROVIDER_LABELS = {
+    "_try_openrouter": "openrouter",
+    "_try_nous": "nous",
+    "_try_custom_endpoint": "local/custom",
+    "_try_codex": "openai-codex",
+    "_resolve_api_key_provider": "api-key",
+}
+
+
 def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
    """Full auto-detection chain: OpenRouter → Nous → custom → Codex → API-key → None."""
    global auxiliary_is_nous
    auxiliary_is_nous = False  # Reset — _try_nous() will set True if it wins
+    tried = []
    for try_fn in (_try_openrouter, _try_nous, _try_custom_endpoint,
                   _try_codex, _resolve_api_key_provider):
+        fn_name = getattr(try_fn, "__name__", "unknown")
+        label = _AUTO_PROVIDER_LABELS.get(fn_name, fn_name)
        client, model = try_fn()
        if client is not None:
+            if tried:
+                logger.info("Auxiliary auto-detect: using %s (%s) — skipped: %s",
+                            label, model or "default", ", ".join(tried))
+            else:
+                logger.info("Auxiliary auto-detect: using %s (%s)", label, model or "default")
            return client, model
-    logger.debug("Auxiliary client: none available")
+        tried.append(label)
+    logger.warning("Auxiliary auto-detect: no provider available (tried: %s). "
+                   "Compression, summarization, and memory flush will not work. "
+                   "Set OPENROUTER_API_KEY or configure a local model in config.yaml.",
+                   ", ".join(tried))
    return None, None


@@ -891,11 +922,12 @@ def resolve_provider_client(
            custom_key = (
                (explicit_api_key or "").strip()
                or os.getenv("OPENAI_API_KEY", "").strip()
+                or "no-key-required"  # local servers don't need auth
            )
-            if not custom_base or not custom_key:
+            if not custom_base:
                logger.warning(
                    "resolve_provider_client: explicit custom endpoint requested "
-                    "but no API key was found (set explicit_api_key or OPENAI_API_KEY)"
+                    "but base_url is empty"
                )
                return None, None
            final_model = model or _read_main_model() or "gpt-4o-mini"
@@ -1131,7 +1163,13 @@ def resolve_vision_provider_client(
        return "custom", client, final_model

    if requested == "auto":
-        for candidate in get_available_vision_backends():
+        ordered = list(_VISION_AUTO_PROVIDER_ORDER)
+        preferred = _preferred_main_vision_provider()
+        if preferred in ordered:
+            ordered.remove(preferred)
+            ordered.insert(0, preferred)
+
+        for candidate in ordered:
            sync_client, default_model = _resolve_strict_vision_backend(candidate)
            if sync_client is not None:
                return _finalize(candidate, sync_client, default_model)
@@ -1204,6 +1242,39 @@ _client_cache: Dict[tuple, tuple] = {}
 _client_cache_lock = threading.Lock()


+def neuter_async_httpx_del() -> None:
+    """Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op.
+
+    The OpenAI SDK's ``AsyncHttpxClientWrapper.__del__`` schedules
+    ``self.aclose()`` via ``asyncio.get_running_loop().create_task()``.
+    When an ``AsyncOpenAI`` client is garbage-collected while
+    prompt_toolkit's event loop is running (the common CLI idle state),
+    the ``aclose()`` task runs on prompt_toolkit's loop but the
+    underlying TCP transport is bound to a *different* loop (the worker
+    thread's loop that the client was originally created on).  If that
+    loop is closed or its thread is dead, the transport's
+    ``self._loop.call_soon()`` raises ``RuntimeError("Event loop is
+    closed")``, which prompt_toolkit surfaces as "Unhandled exception
+    in event loop ... Press ENTER to continue...".
+
+    Neutering ``__del__`` is safe because:
+    - Cached clients are explicitly cleaned via ``_force_close_async_httpx``
+      on stale-loop detection and ``shutdown_cached_clients`` on exit.
+    - Uncached clients' TCP connections are cleaned up by the OS when the
+      process exits.
+    - The OpenAI SDK itself marks this as a TODO (``# TODO(someday):
+      support non asyncio runtimes here``).
+
+    Call this once at CLI startup, before any ``AsyncOpenAI`` clients are
+    created.
+    """
+    try:
+        from openai._base_client import AsyncHttpxClientWrapper
+        AsyncHttpxClientWrapper.__del__ = lambda self: None  # type: ignore[assignment]
+    except (ImportError, AttributeError):
+        pass  # Graceful degradation if the SDK changes its internals
+
+
 def _force_close_async_httpx(client: Any) -> None:
    """Mark the httpx AsyncClient inside an AsyncOpenAI client as closed.

@@ -1251,6 +1322,25 @@ def shutdown_cached_clients() -> None:
        _client_cache.clear()


+def cleanup_stale_async_clients() -> None:
+    """Force-close cached async clients whose event loop is closed.
+
+    Call this after each agent turn to proactively clean up stale clients
+    before GC can trigger ``AsyncHttpxClientWrapper.__del__`` on them.
+    This is defense-in-depth — the primary fix is ``neuter_async_httpx_del``
+    which disables ``__del__`` entirely.
+    """
+    with _client_cache_lock:
+        stale_keys = []
+        for key, entry in _client_cache.items():
+            client, _default, cached_loop = entry
+            if cached_loop is not None and cached_loop.is_closed():
+                _force_close_async_httpx(client)
+                stale_keys.append(key)
+        for key in stale_keys:
+            del _client_cache[key]
+
+
 def _get_cached_client(
    provider: str,
    model: str = None,
@@ -1394,6 +1484,29 @@ def _resolve_task_provider_model(
    return "auto", resolved_model, None, None


+_DEFAULT_AUX_TIMEOUT = 30.0
+
+
+def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float:
+    """Read timeout from auxiliary.{task}.timeout in config, falling back to *default*."""
+    if not task:
+        return default
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+    except ImportError:
+        return default
+    aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
+    task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
+    raw = task_config.get("timeout")
+    if raw is not None:
+        try:
+            return float(raw)
+        except (ValueError, TypeError):
+            pass
+    return default
+
+
 def _build_call_kwargs(
    provider: str,
    model: str,
@@ -1451,7 +1564,7 @@ def call_llm(
    temperature: float = None,
    max_tokens: int = None,
    tools: list = None,
-    timeout: float = 30.0,
+    timeout: float = None,
    extra_body: dict = None,
 ) -> Any:
    """Centralized synchronous LLM call.
@@ -1469,7 +1582,7 @@ def call_llm(
        temperature: Sampling temperature (None = provider default).
        max_tokens: Max output tokens (handles max_tokens vs max_completion_tokens).
        tools: Tool definitions (for function calling).
-        timeout: Request timeout in seconds.
+        timeout: Request timeout in seconds (None = read from auxiliary.{task}.timeout config).
        extra_body: Additional request body fields.

    Returns:
@@ -1525,8 +1638,8 @@ def call_llm(
                )
            # For auto/custom, fall back to OpenRouter
            if not resolved_base_url:
-                logger.warning("Provider %s unavailable, falling back to openrouter",
-                               resolved_provider)
+                logger.info("Auxiliary %s: provider %s unavailable, falling back to openrouter",
+                            task or "call", resolved_provider)
                client, final_model = _get_cached_client(
                    "openrouter", resolved_model or _OPENROUTER_MODEL)
        if client is None:
@@ -1534,10 +1647,19 @@ def call_llm(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
                f"Run: hermes setup")

+    effective_timeout = timeout if timeout is not None else _get_task_timeout(task)
+
+    # Log what we're about to do — makes auxiliary operations visible
+    _base_info = str(getattr(client, "base_url", resolved_base_url) or "")
+    if task:
+        logger.info("Auxiliary %s: using %s (%s)%s",
+                     task, resolved_provider or "auto", final_model or "default",
+                     f" at {_base_info}" if _base_info and "openrouter" not in _base_info else "")
+
    kwargs = _build_call_kwargs(
        resolved_provider, final_model, messages,
        temperature=temperature, max_tokens=max_tokens,
-        tools=tools, timeout=timeout, extra_body=extra_body,
+        tools=tools, timeout=effective_timeout, extra_body=extra_body,
        base_url=resolved_base_url)

    # Handle max_tokens vs max_completion_tokens retry
@@ -1552,6 +1674,62 @@ def call_llm(
        raise


+def extract_content_or_reasoning(response) -> str:
+    """Extract content from an LLM response, falling back to reasoning fields.
+
+    Mirrors the main agent loop's behavior when a reasoning model (DeepSeek-R1,
+    Qwen-QwQ, etc.) returns ``content=None`` with reasoning in structured fields.
+
+    Resolution order:
+      1. ``message.content`` — strip inline think/reasoning blocks, check for
+         remaining non-whitespace text.
+      2. ``message.reasoning`` / ``message.reasoning_content`` — direct
+         structured reasoning fields (DeepSeek, Moonshot, Novita, etc.).
+      3. ``message.reasoning_details`` — OpenRouter unified array format.
+
+    Returns the best available text, or ``""`` if nothing found.
+    """
+    import re
+
+    msg = response.choices[0].message
+    content = (msg.content or "").strip()
+
+    if content:
+        # Strip inline think/reasoning blocks (mirrors _strip_think_blocks)
+        cleaned = re.sub(
+            r"<(?:think|thinking|reasoning|REASONING_SCRATCHPAD)>"
+            r".*?"
+            r"</(?:think|thinking|reasoning|REASONING_SCRATCHPAD)>",
+            "", content, flags=re.DOTALL | re.IGNORECASE,
+        ).strip()
+        if cleaned:
+            return cleaned
+
+    # Content is empty or reasoning-only — try structured reasoning fields
+    reasoning_parts: list[str] = []
+    for field in ("reasoning", "reasoning_content"):
+        val = getattr(msg, field, None)
+        if val and isinstance(val, str) and val.strip() and val not in reasoning_parts:
+            reasoning_parts.append(val.strip())
+
+    details = getattr(msg, "reasoning_details", None)
+    if details and isinstance(details, list):
+        for detail in details:
+            if isinstance(detail, dict):
+                summary = (
+                    detail.get("summary")
+                    or detail.get("content")
+                    or detail.get("text")
+                )
+                if summary and summary not in reasoning_parts:
+                    reasoning_parts.append(summary.strip() if isinstance(summary, str) else str(summary))
+
+    if reasoning_parts:
+        return "\n\n".join(reasoning_parts)
+
+    return ""
+
+
 async def async_call_llm(
    task: str = None,
    *,
@@ -1563,7 +1741,7 @@ async def async_call_llm(
    temperature: float = None,
    max_tokens: int = None,
    tools: list = None,
-    timeout: float = 30.0,
+    timeout: float = None,
    extra_body: dict = None,
 ) -> Any:
    """Centralized asynchronous LLM call.
@@ -1624,10 +1802,12 @@ async def async_call_llm(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
                f"Run: hermes setup")

+    effective_timeout = timeout if timeout is not None else _get_task_timeout(task)
+
    kwargs = _build_call_kwargs(
        resolved_provider, final_model, messages,
        temperature=temperature, max_tokens=max_tokens,
-        tools=tools, timeout=timeout, extra_body=extra_body,
+        tools=tools, timeout=effective_timeout, extra_body=extra_body,
        base_url=resolved_base_url)

    try:
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -141,7 +141,7 @@ class ContextCompressor:
            "last_prompt_tokens": self.last_prompt_tokens,
            "threshold_tokens": self.threshold_tokens,
            "context_length": self.context_length,
-            "usage_percent": (self.last_prompt_tokens / self.context_length * 100) if self.context_length else 0,
+            "usage_percent": min(100, (self.last_prompt_tokens / self.context_length * 100)) if self.context_length else 0,
            "compression_count": self.compression_count,
        }

@@ -347,7 +347,7 @@ Write only the summary body. Do not include any preamble or prefix."""
                "messages": [{"role": "user", "content": prompt}],
                "temperature": 0.3,
                "max_tokens": summary_budget * 2,
-                "timeout": 45.0,
+                # timeout resolved from auxiliary.compression.timeout config by call_llm
            }
            if self.summary_model:
                call_kwargs["model"] = self.summary_model
--- a/agent/context_references.py
+++ b/agent/context_references.py
@@ -286,12 +286,16 @@ def _expand_git_reference(
    args: list[str],
    label: str,
 ) -> tuple[str | None, str | None]:
-    result = subprocess.run(
-        ["git", *args],
-        cwd=cwd,
-        capture_output=True,
-        text=True,
-    )
+    try:
+        result = subprocess.run(
+            ["git", *args],
+            cwd=cwd,
+            capture_output=True,
+            text=True,
+            timeout=30,
+        )
+    except subprocess.TimeoutExpired:
+        return f"{ref.raw}: git command timed out (30s)", None
    if result.returncode != 0:
        stderr = (result.stderr or "").strip() or "git command failed"
        return f"{ref.raw}: {stderr}", None
@@ -449,9 +453,12 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
            cwd=cwd,
            capture_output=True,
            text=True,
+            timeout=10,
        )
    except FileNotFoundError:
        return None
+    except subprocess.TimeoutExpired:
+        return None
    if result.returncode != 0:
        return None
    files = [Path(line.strip()) for line in result.stdout.splitlines() if line.strip()]
--- a/agent/display.py
+++ b/agent/display.py
@@ -17,6 +17,23 @@ _RESET = "\033[0m"

 logger = logging.getLogger(__name__)

+# =========================================================================
+# Configurable tool preview length (0 = no limit)
+# Set once at startup by CLI or gateway from display.tool_preview_length config.
+# =========================================================================
+_tool_preview_max_len: int = 0  # 0 = unlimited
+
+
+def set_tool_preview_max_len(n: int) -> None:
+    """Set the global max length for tool call previews. 0 = no limit."""
+    global _tool_preview_max_len
+    _tool_preview_max_len = max(int(n), 0) if n else 0
+
+
+def get_tool_preview_max_len() -> int:
+    """Return the configured max preview length (0 = unlimited)."""
+    return _tool_preview_max_len
+

 # =========================================================================
 # Skin-aware helpers (lazy import to avoid circular deps)
@@ -94,8 +111,14 @@ def _oneline(text: str) -> str:
    return " ".join(text.split())


-def build_tool_preview(tool_name: str, args: dict, max_len: int = 40) -> str | None:
-    """Build a short preview of a tool call's primary argument for display."""
+def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -> str | None:
+    """Build a short preview of a tool call's primary argument for display.
+
+    *max_len* controls truncation.  ``None`` (default) defers to the global
+    ``_tool_preview_max_len`` set via config; ``0`` means unlimited.
+    """
+    if max_len is None:
+        max_len = _tool_preview_max_len
    if not args:
        return None
    primary_args = {
@@ -190,7 +213,7 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int = 40) -> str | N
    preview = _oneline(str(value))
    if not preview:
        return None
-    if len(preview) > max_len:
+    if max_len > 0 and len(preview) > max_len:
        preview = preview[:max_len - 3] + "..."
    return preview

@@ -231,7 +254,7 @@ class KawaiiSpinner:
        "analyzing", "computing", "synthesizing", "formulating", "brainstorming",
    ]

-    def __init__(self, message: str = "", spinner_type: str = 'dots'):
+    def __init__(self, message: str = "", spinner_type: str = 'dots', print_fn=None):
        self.message = message
        self.spinner_frames = self.SPINNERS.get(spinner_type, self.SPINNERS['dots'])
        self.running = False
@@ -239,12 +262,26 @@ class KawaiiSpinner:
        self.frame_idx = 0
        self.start_time = None
        self.last_line_len = 0
+        # Optional callable to route all output through (e.g. a no-op for silent
+        # background agents).  When set, bypasses self._out entirely so that
+        # agents with _print_fn overridden remain fully silent.
+        self._print_fn = print_fn
        # Capture stdout NOW, before any redirect_stdout(devnull) from
        # child agents can replace sys.stdout with a black hole.
        self._out = sys.stdout

    def _write(self, text: str, end: str = '\n', flush: bool = False):
-        """Write to the stdout captured at spinner creation time."""
+        """Write to the stdout captured at spinner creation time.
+
+        If a print_fn was supplied at construction, all output is routed through
+        it instead — allowing callers to silence the spinner with a no-op lambda.
+        """
+        if self._print_fn is not None:
+            try:
+                self._print_fn(text)
+            except Exception:
+                pass
+            return
        try:
            self._out.write(text + end)
            if flush:
@@ -270,11 +307,11 @@ class KawaiiSpinner:
        The CLI already drives a TUI widget (_spinner_text) for spinner display,
        so KawaiiSpinner's \\r-based animation is redundant under StdoutProxy.
        """
-        out = self._out
-        # StdoutProxy has a 'raw' attribute (bool) that plain file objects lack.
-        if hasattr(out, 'raw') and type(out).__name__ == 'StdoutProxy':
-            return True
-        return False
+        try:
+            from prompt_toolkit.patch_stdout import StdoutProxy
+            return isinstance(self._out, StdoutProxy)
+        except ImportError:
+            return False

    def _animate(self):
        # When stdout is not a real terminal (e.g. Docker, systemd, pipe),
@@ -470,10 +507,14 @@ def get_cute_tool_message(

    def _trunc(s, n=40):
        s = str(s)
+        if _tool_preview_max_len == 0:
+            return s  # no limit
        return (s[:n-3] + "...") if len(s) > n else s

    def _path(p, n=35):
        p = str(p)
+        if _tool_preview_max_len == 0:
+            return p  # no limit
        return ("..." + p[-(n-3):]) if len(p) > n else p

    def _wrap(line: str) -> str:
@@ -685,7 +726,7 @@ def format_context_pressure(
        threshold_percent: Compaction threshold as a fraction of context window.
        compression_enabled: Whether auto-compression is active.
    """
-    pct_int = int(compaction_progress * 100)
+    pct_int = min(int(compaction_progress * 100), 100)
    filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
    bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)

@@ -715,7 +756,7 @@ def format_context_pressure_gateway(
    No ANSI — just Unicode and plain text suitable for Telegram/Discord/etc.
    The percentage shows progress toward the compaction threshold.
    """
-    pct_int = int(compaction_progress * 100)
+    pct_int = min(int(compaction_progress * 100), 100)
    filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
    bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)

--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -113,6 +113,15 @@ DEFAULT_CONTEXT_LENGTHS = {
    "glm": 202752,
    # Kimi
    "kimi": 262144,
+    # Hugging Face Inference Providers — model IDs use org/name format
+    "Qwen/Qwen3.5-397B-A17B": 131072,
+    "Qwen/Qwen3.5-35B-A3B": 131072,
+    "deepseek-ai/DeepSeek-V3.2": 65536,
+    "moonshotai/Kimi-K2.5": 262144,
+    "moonshotai/Kimi-K2-Thinking": 262144,
+    "MiniMaxAI/MiniMax-M2.5": 204800,
+    "XiaomiMiMo/MiMo-V2-Flash": 32768,
+    "zai-org/GLM-5": 202752,
 }

 _CONTEXT_LENGTH_KEYS = (
--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@@ -15,6 +15,8 @@ import time
 from pathlib import Path
 from typing import Any, Dict, Optional

+from utils import atomic_json_write
+
 import requests

 logger = logging.getLogger(__name__)
@@ -64,12 +66,10 @@ def _load_disk_cache() -> Dict[str, Any]:


 def _save_disk_cache(data: Dict[str, Any]) -> None:
-    """Save models.dev data to disk cache."""
+    """Save models.dev data to disk cache atomically."""
    try:
        cache_path = _get_cache_path()
-        cache_path.parent.mkdir(parents=True, exist_ok=True)
-        with open(cache_path, "w", encoding="utf-8") as f:
-            json.dump(data, f, separators=(",", ":"))
+        atomic_json_write(cache_path, data, indent=None, separators=(",", ":"))
    except Exception as e:
        logger.debug("Failed to save models.dev disk cache: %s", e)

--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -4,14 +4,28 @@ All functions are stateless. AIAgent._build_system_prompt() calls these to
 assemble pieces, then combines them with memory and ephemeral prompts.
 """

+import json
 import logging
 import os
 import re
+import threading
+from collections import OrderedDict
 from pathlib import Path

 from hermes_constants import get_hermes_home
 from typing import Optional

+from agent.skill_utils import (
+    extract_skill_conditions,
+    extract_skill_description,
+    get_all_skills_dirs,
+    get_disabled_skill_names,
+    iter_skill_index_files,
+    parse_frontmatter,
+    skill_matches_platform,
+)
+from utils import atomic_json_write
+
 logger = logging.getLogger(__name__)

 # ---------------------------------------------------------------------------
@@ -156,6 +170,25 @@ SKILLS_GUIDANCE = (
    "Skills that aren't maintained become liabilities."
 )

+TOOL_USE_ENFORCEMENT_GUIDANCE = (
+    "# Tool-use enforcement\n"
+    "You MUST use your tools to take action — do not describe what you would do "
+    "or plan to do without actually doing it. When you say you will perform an "
+    "action (e.g. 'I will run the tests', 'Let me check the file', 'I will create "
+    "the project'), you MUST immediately make the corresponding tool call in the same "
+    "response. Never end your turn with a promise of future action — execute it now.\n"
+    "Keep working until the task is actually complete. Do not stop with a summary of "
+    "what you plan to do next time. If you have tools available that can accomplish "
+    "the task, use them instead of telling the user what you would do.\n"
+    "Every response should either (a) contain tool calls that make progress, or "
+    "(b) deliver a final result to the user. Responses that only describe intentions "
+    "without acting are not acceptable."
+)
+
+# Model name substrings that trigger tool-use enforcement guidance.
+# Add new patterns here when a model family needs explicit steering.
+TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex")
+
 PLATFORM_HINTS = {
    "whatsapp": (
        "You are on a text messaging communication platform, WhatsApp. "
@@ -230,6 +263,111 @@ CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
 CONTEXT_TRUNCATE_TAIL_RATIO = 0.2


+# =========================================================================
+# Skills prompt cache
+# =========================================================================
+
+_SKILLS_PROMPT_CACHE_MAX = 8
+_SKILLS_PROMPT_CACHE: OrderedDict[tuple, str] = OrderedDict()
+_SKILLS_PROMPT_CACHE_LOCK = threading.Lock()
+_SKILLS_SNAPSHOT_VERSION = 1
+
+
+def _skills_prompt_snapshot_path() -> Path:
+    return get_hermes_home() / ".skills_prompt_snapshot.json"
+
+
+def clear_skills_system_prompt_cache(*, clear_snapshot: bool = False) -> None:
+    """Drop the in-process skills prompt cache (and optionally the disk snapshot)."""
+    with _SKILLS_PROMPT_CACHE_LOCK:
+        _SKILLS_PROMPT_CACHE.clear()
+    if clear_snapshot:
+        try:
+            _skills_prompt_snapshot_path().unlink(missing_ok=True)
+        except OSError as e:
+            logger.debug("Could not remove skills prompt snapshot: %s", e)
+
+
+def _build_skills_manifest(skills_dir: Path) -> dict[str, list[int]]:
+    """Build an mtime/size manifest of all SKILL.md and DESCRIPTION.md files."""
+    manifest: dict[str, list[int]] = {}
+    for filename in ("SKILL.md", "DESCRIPTION.md"):
+        for path in iter_skill_index_files(skills_dir, filename):
+            try:
+                st = path.stat()
+            except OSError:
+                continue
+            manifest[str(path.relative_to(skills_dir))] = [st.st_mtime_ns, st.st_size]
+    return manifest
+
+
+def _load_skills_snapshot(skills_dir: Path) -> Optional[dict]:
+    """Load the disk snapshot if it exists and its manifest still matches."""
+    snapshot_path = _skills_prompt_snapshot_path()
+    if not snapshot_path.exists():
+        return None
+    try:
+        snapshot = json.loads(snapshot_path.read_text(encoding="utf-8"))
+    except Exception:
+        return None
+    if not isinstance(snapshot, dict):
+        return None
+    if snapshot.get("version") != _SKILLS_SNAPSHOT_VERSION:
+        return None
+    if snapshot.get("manifest") != _build_skills_manifest(skills_dir):
+        return None
+    return snapshot
+
+
+def _write_skills_snapshot(
+    skills_dir: Path,
+    manifest: dict[str, list[int]],
+    skill_entries: list[dict],
+    category_descriptions: dict[str, str],
+) -> None:
+    """Persist skill metadata to disk for fast cold-start reuse."""
+    payload = {
+        "version": _SKILLS_SNAPSHOT_VERSION,
+        "manifest": manifest,
+        "skills": skill_entries,
+        "category_descriptions": category_descriptions,
+    }
+    try:
+        atomic_json_write(_skills_prompt_snapshot_path(), payload)
+    except Exception as e:
+        logger.debug("Could not write skills prompt snapshot: %s", e)
+
+
+def _build_snapshot_entry(
+    skill_file: Path,
+    skills_dir: Path,
+    frontmatter: dict,
+    description: str,
+) -> dict:
+    """Build a serialisable metadata dict for one skill."""
+    rel_path = skill_file.relative_to(skills_dir)
+    parts = rel_path.parts
+    if len(parts) >= 2:
+        skill_name = parts[-2]
+        category = "/".join(parts[:-2]) if len(parts) > 2 else parts[0]
+    else:
+        category = "general"
+        skill_name = skill_file.parent.name
+
+    platforms = frontmatter.get("platforms") or []
+    if isinstance(platforms, str):
+        platforms = [platforms]
+
+    return {
+        "skill_name": skill_name,
+        "category": category,
+        "frontmatter_name": str(frontmatter.get("name", skill_name)),
+        "description": description,
+        "platforms": [str(p).strip() for p in platforms if str(p).strip()],
+        "conditions": extract_skill_conditions(frontmatter),
+    }
+
+
 # =========================================================================
 # Skills index
 # =========================================================================
@@ -241,22 +379,13 @@ def _parse_skill_file(skill_file: Path) -> tuple[bool, dict, str]:
    (True, {}, "") to err on the side of showing the skill.
    """
    try:
-        from tools.skills_tool import _parse_frontmatter, skill_matches_platform
-
        raw = skill_file.read_text(encoding="utf-8")[:2000]
-        frontmatter, _ = _parse_frontmatter(raw)
+        frontmatter, _ = parse_frontmatter(raw)

        if not skill_matches_platform(frontmatter):
-            return False, {}, ""
+            return False, frontmatter, ""

-        desc = ""
-        raw_desc = frontmatter.get("description", "")
-        if raw_desc:
-            desc = str(raw_desc).strip().strip("'\"")
-            if len(desc) > 60:
-                desc = desc[:57] + "..."
-
-        return True, frontmatter, desc
+        return True, frontmatter, extract_skill_description(frontmatter)
    except Exception as e:
        logger.debug("Failed to parse skill file %s: %s", skill_file, e)
        return True, {}, ""
@@ -265,16 +394,9 @@ def _parse_skill_file(skill_file: Path) -> tuple[bool, dict, str]:
 def _read_skill_conditions(skill_file: Path) -> dict:
    """Extract conditional activation fields from SKILL.md frontmatter."""
    try:
-        from tools.skills_tool import _parse_frontmatter
        raw = skill_file.read_text(encoding="utf-8")[:2000]
-        frontmatter, _ = _parse_frontmatter(raw)
-        hermes = frontmatter.get("metadata", {}).get("hermes", {})
-        return {
-            "fallback_for_toolsets": hermes.get("fallback_for_toolsets", []),
-            "requires_toolsets": hermes.get("requires_toolsets", []),
-            "fallback_for_tools": hermes.get("fallback_for_tools", []),
-            "requires_tools": hermes.get("requires_tools", []),
-        }
+        frontmatter, _ = parse_frontmatter(raw)
+        return extract_skill_conditions(frontmatter)
    except Exception as e:
        logger.debug("Failed to read skill conditions from %s: %s", skill_file, e)
        return {}
@@ -317,109 +439,210 @@ def build_skills_system_prompt(
 ) -> str:
    """Build a compact skill index for the system prompt.

-    Scans ~/.hermes/skills/ for SKILL.md files grouped by category.
-    Includes per-skill descriptions from frontmatter so the model can
-    match skills by meaning, not just name.
-    Filters out skills incompatible with the current OS platform.
+    Two-layer cache:
+      1. In-process LRU dict keyed by (skills_dir, tools, toolsets)
+      2. Disk snapshot (``.skills_prompt_snapshot.json``) validated by
+         mtime/size manifest — survives process restarts
+
+    Falls back to a full filesystem scan when both layers miss.
+
+    External skill directories (``skills.external_dirs`` in config.yaml) are
+    scanned alongside the local ``~/.hermes/skills/`` directory.  External dirs
+    are read-only — they appear in the index but new skills are always created
+    in the local dir.  Local skills take precedence when names collide.
    """
    hermes_home = get_hermes_home()
    skills_dir = hermes_home / "skills"
+    external_dirs = get_all_skills_dirs()[1:]  # skip local (index 0)

-    if not skills_dir.exists():
+    if not skills_dir.exists() and not external_dirs:
        return ""

-    # Collect skills with descriptions, grouped by category.
-    # Each entry: (skill_name, description)
-    # Supports sub-categories: skills/mlops/training/axolotl/SKILL.md
-    # -> category "mlops/training", skill "axolotl"
-    # Load disabled skill names once for the entire scan
-    try:
-        from tools.skills_tool import _get_disabled_skill_names
-        disabled = _get_disabled_skill_names()
-    except Exception:
-        disabled = set()
+    # ── Layer 1: in-process LRU cache ─────────────────────────────────
+    cache_key = (
+        str(skills_dir.resolve()),
+        tuple(str(d) for d in external_dirs),
+        tuple(sorted(str(t) for t in (available_tools or set()))),
+        tuple(sorted(str(ts) for ts in (available_toolsets or set()))),
+    )
+    with _SKILLS_PROMPT_CACHE_LOCK:
+        cached = _SKILLS_PROMPT_CACHE.get(cache_key)
+        if cached is not None:
+            _SKILLS_PROMPT_CACHE.move_to_end(cache_key)
+            return cached
+
+    disabled = get_disabled_skill_names()
+
+    # ── Layer 2: disk snapshot ────────────────────────────────────────
+    snapshot = _load_skills_snapshot(skills_dir)

    skills_by_category: dict[str, list[tuple[str, str]]] = {}
-    for skill_file in skills_dir.rglob("SKILL.md"):
-        is_compatible, frontmatter, desc = _parse_skill_file(skill_file)
-        if not is_compatible:
-            continue
-        rel_path = skill_file.relative_to(skills_dir)
-        parts = rel_path.parts
-        if len(parts) >= 2:
-            skill_name = parts[-2]
-            category = "/".join(parts[:-2]) if len(parts) > 2 else parts[0]
-        else:
-            category = "general"
-            skill_name = skill_file.parent.name
-        # Respect user's disabled skills config
-        fm_name = frontmatter.get("name", skill_name)
-        if fm_name in disabled or skill_name in disabled:
-            continue
-        # Extract conditions inline from already-parsed frontmatter
-        # (avoids redundant file re-read that _read_skill_conditions would do)
-        hermes_meta = (frontmatter.get("metadata") or {}).get("hermes") or {}
-        conditions = {
-            "fallback_for_toolsets": hermes_meta.get("fallback_for_toolsets", []),
-            "requires_toolsets": hermes_meta.get("requires_toolsets", []),
-            "fallback_for_tools": hermes_meta.get("fallback_for_tools", []),
-            "requires_tools": hermes_meta.get("requires_tools", []),
+    category_descriptions: dict[str, str] = {}
+
+    if snapshot is not None:
+        # Fast path: use pre-parsed metadata from disk
+        for entry in snapshot.get("skills", []):
+            if not isinstance(entry, dict):
+                continue
+            skill_name = entry.get("skill_name") or ""
+            category = entry.get("category") or "general"
+            frontmatter_name = entry.get("frontmatter_name") or skill_name
+            platforms = entry.get("platforms") or []
+            if not skill_matches_platform({"platforms": platforms}):
+                continue
+            if frontmatter_name in disabled or skill_name in disabled:
+                continue
+            if not _skill_should_show(
+                entry.get("conditions") or {},
+                available_tools,
+                available_toolsets,
+            ):
+                continue
+            skills_by_category.setdefault(category, []).append(
+                (skill_name, entry.get("description", ""))
+            )
+        category_descriptions = {
+            str(k): str(v)
+            for k, v in (snapshot.get("category_descriptions") or {}).items()
        }
-        if not _skill_should_show(conditions, available_tools, available_toolsets):
-            continue
-        skills_by_category.setdefault(category, []).append((skill_name, desc))
+    else:
+        # Cold path: full filesystem scan + write snapshot for next time
+        skill_entries: list[dict] = []
+        for skill_file in iter_skill_index_files(skills_dir, "SKILL.md"):
+            is_compatible, frontmatter, desc = _parse_skill_file(skill_file)
+            entry = _build_snapshot_entry(skill_file, skills_dir, frontmatter, desc)
+            skill_entries.append(entry)
+            if not is_compatible:
+                continue
+            skill_name = entry["skill_name"]
+            if entry["frontmatter_name"] in disabled or skill_name in disabled:
+                continue
+            if not _skill_should_show(
+                extract_skill_conditions(frontmatter),
+                available_tools,
+                available_toolsets,
+            ):
+                continue
+            skills_by_category.setdefault(entry["category"], []).append(
+                (skill_name, entry["description"])
+            )

-    if not skills_by_category:
-        return ""
-
-    # Read category-level descriptions from DESCRIPTION.md
-    # Checks both the exact category path and parent directories
-    category_descriptions = {}
-    for category in skills_by_category:
-        cat_path = Path(category)
-        desc_file = skills_dir / cat_path / "DESCRIPTION.md"
-        if desc_file.exists():
+        # Read category-level DESCRIPTION.md files
+        for desc_file in iter_skill_index_files(skills_dir, "DESCRIPTION.md"):
            try:
                content = desc_file.read_text(encoding="utf-8")
-                match = re.search(r"^---\s*\n.*?description:\s*(.+?)\s*\n.*?^---", content, re.MULTILINE | re.DOTALL)
-                if match:
-                    category_descriptions[category] = match.group(1).strip()
+                fm, _ = parse_frontmatter(content)
+                cat_desc = fm.get("description")
+                if not cat_desc:
+                    continue
+                rel = desc_file.relative_to(skills_dir)
+                cat = "/".join(rel.parts[:-1]) if len(rel.parts) > 1 else "general"
+                category_descriptions[cat] = str(cat_desc).strip().strip("'\"")
            except Exception as e:
                logger.debug("Could not read skill description %s: %s", desc_file, e)

-    index_lines = []
-    for category in sorted(skills_by_category.keys()):
-        cat_desc = category_descriptions.get(category, "")
-        if cat_desc:
-            index_lines.append(f"  {category}: {cat_desc}")
-        else:
-            index_lines.append(f"  {category}:")
-        # Deduplicate and sort skills within each category
-        seen = set()
-        for name, desc in sorted(skills_by_category[category], key=lambda x: x[0]):
-            if name in seen:
-                continue
-            seen.add(name)
-            if desc:
-                index_lines.append(f"    - {name}: {desc}")
-            else:
-                index_lines.append(f"    - {name}")
+        _write_skills_snapshot(
+            skills_dir,
+            _build_skills_manifest(skills_dir),
+            skill_entries,
+            category_descriptions,
+        )

-    return (
-        "## Skills (mandatory)\n"
-        "Before replying, scan the skills below. If one clearly matches your task, "
-        "load it with skill_view(name) and follow its instructions. "
-        "If a skill has issues, fix it with skill_manage(action='patch').\n"
-        "After difficult/iterative tasks, offer to save as a skill. "
-        "If a skill you loaded was missing steps, had wrong commands, or needed "
-        "pitfalls you discovered, update it before finishing.\n"
-        "\n"
-        "<available_skills>\n"
-        + "\n".join(index_lines) + "\n"
-        "</available_skills>\n"
-        "\n"
-        "If none match, proceed normally without loading a skill."
-    )
+    # ── External skill directories ─────────────────────────────────────
+    # Scan external dirs directly (no snapshot caching — they're read-only
+    # and typically small).  Local skills already in skills_by_category take
+    # precedence: we track seen names and skip duplicates from external dirs.
+    seen_skill_names: set[str] = set()
+    for cat_skills in skills_by_category.values():
+        for name, _desc in cat_skills:
+            seen_skill_names.add(name)
+
+    for ext_dir in external_dirs:
+        if not ext_dir.exists():
+            continue
+        for skill_file in iter_skill_index_files(ext_dir, "SKILL.md"):
+            try:
+                is_compatible, frontmatter, desc = _parse_skill_file(skill_file)
+                if not is_compatible:
+                    continue
+                entry = _build_snapshot_entry(skill_file, ext_dir, frontmatter, desc)
+                skill_name = entry["skill_name"]
+                if skill_name in seen_skill_names:
+                    continue
+                if entry["frontmatter_name"] in disabled or skill_name in disabled:
+                    continue
+                if not _skill_should_show(
+                    extract_skill_conditions(frontmatter),
+                    available_tools,
+                    available_toolsets,
+                ):
+                    continue
+                seen_skill_names.add(skill_name)
+                skills_by_category.setdefault(entry["category"], []).append(
+                    (skill_name, entry["description"])
+                )
+            except Exception as e:
+                logger.debug("Error reading external skill %s: %s", skill_file, e)
+
+        # External category descriptions
+        for desc_file in iter_skill_index_files(ext_dir, "DESCRIPTION.md"):
+            try:
+                content = desc_file.read_text(encoding="utf-8")
+                fm, _ = parse_frontmatter(content)
+                cat_desc = fm.get("description")
+                if not cat_desc:
+                    continue
+                rel = desc_file.relative_to(ext_dir)
+                cat = "/".join(rel.parts[:-1]) if len(rel.parts) > 1 else "general"
+                category_descriptions.setdefault(cat, str(cat_desc).strip().strip("'\""))
+            except Exception as e:
+                logger.debug("Could not read external skill description %s: %s", desc_file, e)
+
+    if not skills_by_category:
+        result = ""
+    else:
+        index_lines = []
+        for category in sorted(skills_by_category.keys()):
+            cat_desc = category_descriptions.get(category, "")
+            if cat_desc:
+                index_lines.append(f"  {category}: {cat_desc}")
+            else:
+                index_lines.append(f"  {category}:")
+            # Deduplicate and sort skills within each category
+            seen = set()
+            for name, desc in sorted(skills_by_category[category], key=lambda x: x[0]):
+                if name in seen:
+                    continue
+                seen.add(name)
+                if desc:
+                    index_lines.append(f"    - {name}: {desc}")
+                else:
+                    index_lines.append(f"    - {name}")
+
+        result = (
+            "## Skills (mandatory)\n"
+            "Before replying, scan the skills below. If one clearly matches your task, "
+            "load it with skill_view(name) and follow its instructions. "
+            "If a skill has issues, fix it with skill_manage(action='patch').\n"
+            "After difficult/iterative tasks, offer to save as a skill. "
+            "If a skill you loaded was missing steps, had wrong commands, or needed "
+            "pitfalls you discovered, update it before finishing.\n"
+            "\n"
+            "<available_skills>\n"
+            + "\n".join(index_lines) + "\n"
+            "</available_skills>\n"
+            "\n"
+            "If none match, proceed normally without loading a skill."
+        )
+
+    # ── Store in LRU cache ────────────────────────────────────────────
+    with _SKILLS_PROMPT_CACHE_LOCK:
+        _SKILLS_PROMPT_CACHE[cache_key] = result
+        _SKILLS_PROMPT_CACHE.move_to_end(cache_key)
+        while len(_SKILLS_PROMPT_CACHE) > _SKILLS_PROMPT_CACHE_MAX:
+            _SKILLS_PROMPT_CACHE.popitem(last=False)
+
+    return result


 def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -> str:
--- a/agent/skill_commands.py
+++ b/agent/skill_commands.py
@@ -128,7 +128,11 @@ def _build_skill_message(
                        supporting.append(rel)

    if supporting and skill_dir:
-        skill_view_target = str(skill_dir.relative_to(SKILLS_DIR))
+        try:
+            skill_view_target = str(skill_dir.relative_to(SKILLS_DIR))
+        except ValueError:
+            # Skill is from an external dir — use the skill name instead
+            skill_view_target = skill_dir.name
        parts.append("")
        parts.append("[This skill has supporting files you can load with the skill_view tool:]")
        for sf in supporting:
@@ -158,38 +162,49 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
    _skill_commands = {}
    try:
        from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
-        if not SKILLS_DIR.exists():
-            return _skill_commands
+        from agent.skill_utils import get_external_skills_dirs
        disabled = _get_disabled_skill_names()
-        for skill_md in SKILLS_DIR.rglob("SKILL.md"):
-            if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
-                continue
-            try:
-                content = skill_md.read_text(encoding='utf-8')
-                frontmatter, body = _parse_frontmatter(content)
-                # Skip skills incompatible with the current OS platform
-                if not skill_matches_platform(frontmatter):
+        seen_names: set = set()
+
+        # Scan local dir first, then external dirs
+        dirs_to_scan = []
+        if SKILLS_DIR.exists():
+            dirs_to_scan.append(SKILLS_DIR)
+        dirs_to_scan.extend(get_external_skills_dirs())
+
+        for scan_dir in dirs_to_scan:
+            for skill_md in scan_dir.rglob("SKILL.md"):
+                if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
                    continue
-                name = frontmatter.get('name', skill_md.parent.name)
-                # Respect user's disabled skills config
-                if name in disabled:
+                try:
+                    content = skill_md.read_text(encoding='utf-8')
+                    frontmatter, body = _parse_frontmatter(content)
+                    # Skip skills incompatible with the current OS platform
+                    if not skill_matches_platform(frontmatter):
+                        continue
+                    name = frontmatter.get('name', skill_md.parent.name)
+                    if name in seen_names:
+                        continue
+                    # Respect user's disabled skills config
+                    if name in disabled:
+                        continue
+                    description = frontmatter.get('description', '')
+                    if not description:
+                        for line in body.strip().split('\n'):
+                            line = line.strip()
+                            if line and not line.startswith('#'):
+                                description = line[:80]
+                                break
+                    seen_names.add(name)
+                    cmd_name = name.lower().replace(' ', '-').replace('_', '-')
+                    _skill_commands[f"/{cmd_name}"] = {
+                        "name": name,
+                        "description": description or f"Invoke the {name} skill",
+                        "skill_md_path": str(skill_md),
+                        "skill_dir": str(skill_md.parent),
+                    }
+                except Exception:
                    continue
-                description = frontmatter.get('description', '')
-                if not description:
-                    for line in body.strip().split('\n'):
-                        line = line.strip()
-                        if line and not line.startswith('#'):
-                            description = line[:80]
-                            break
-                cmd_name = name.lower().replace(' ', '-').replace('_', '-')
-                _skill_commands[f"/{cmd_name}"] = {
-                    "name": name,
-                    "description": description or f"Invoke the {name} skill",
-                    "skill_md_path": str(skill_md),
-                    "skill_dir": str(skill_md.parent),
-                }
-            except Exception:
-                continue
    except Exception:
        pass
    return _skill_commands
--- a/agent/skill_utils.py
+++ b/agent/skill_utils.py
@@ -0,0 +1,270 @@
+"""Lightweight skill metadata utilities shared by prompt_builder and skills_tool.
+
+This module intentionally avoids importing the tool registry, CLI config, or any
+heavy dependency chain.  It is safe to import at module level without triggering
+tool registration or provider resolution.
+"""
+
+import logging
+import os
+import re
+import sys
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set, Tuple
+
+from hermes_constants import get_hermes_home
+
+logger = logging.getLogger(__name__)
+
+# ── Platform mapping ──────────────────────────────────────────────────────
+
+PLATFORM_MAP = {
+    "macos": "darwin",
+    "linux": "linux",
+    "windows": "win32",
+}
+
+EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub"))
+
+# ── Lazy YAML loader ─────────────────────────────────────────────────────
+
+_yaml_load_fn = None
+
+
+def yaml_load(content: str):
+    """Parse YAML with lazy import and CSafeLoader preference."""
+    global _yaml_load_fn
+    if _yaml_load_fn is None:
+        import yaml
+
+        loader = getattr(yaml, "CSafeLoader", None) or yaml.SafeLoader
+
+        def _load(value: str):
+            return yaml.load(value, Loader=loader)
+
+        _yaml_load_fn = _load
+    return _yaml_load_fn(content)
+
+
+# ── Frontmatter parsing ──────────────────────────────────────────────────
+
+
+def parse_frontmatter(content: str) -> Tuple[Dict[str, Any], str]:
+    """Parse YAML frontmatter from a markdown string.
+
+    Uses yaml with CSafeLoader for full YAML support (nested metadata, lists)
+    with a fallback to simple key:value splitting for robustness.
+
+    Returns:
+        (frontmatter_dict, remaining_body)
+    """
+    frontmatter: Dict[str, Any] = {}
+    body = content
+
+    if not content.startswith("---"):
+        return frontmatter, body
+
+    end_match = re.search(r"\n---\s*\n", content[3:])
+    if not end_match:
+        return frontmatter, body
+
+    yaml_content = content[3 : end_match.start() + 3]
+    body = content[end_match.end() + 3 :]
+
+    try:
+        parsed = yaml_load(yaml_content)
+        if isinstance(parsed, dict):
+            frontmatter = parsed
+    except Exception:
+        # Fallback: simple key:value parsing for malformed YAML
+        for line in yaml_content.strip().split("\n"):
+            if ":" not in line:
+                continue
+            key, value = line.split(":", 1)
+            frontmatter[key.strip()] = value.strip()
+
+    return frontmatter, body
+
+
+# ── Platform matching ─────────────────────────────────────────────────────
+
+
+def skill_matches_platform(frontmatter: Dict[str, Any]) -> bool:
+    """Return True when the skill is compatible with the current OS.
+
+    Skills declare platform requirements via a top-level ``platforms`` list
+    in their YAML frontmatter::
+
+        platforms: [macos]          # macOS only
+        platforms: [macos, linux]   # macOS and Linux
+
+    If the field is absent or empty the skill is compatible with **all**
+    platforms (backward-compatible default).
+    """
+    platforms = frontmatter.get("platforms")
+    if not platforms:
+        return True
+    if not isinstance(platforms, list):
+        platforms = [platforms]
+    current = sys.platform
+    for platform in platforms:
+        normalized = str(platform).lower().strip()
+        mapped = PLATFORM_MAP.get(normalized, normalized)
+        if current.startswith(mapped):
+            return True
+    return False
+
+
+# ── Disabled skills ───────────────────────────────────────────────────────
+
+
+def get_disabled_skill_names() -> Set[str]:
+    """Read disabled skill names from config.yaml.
+
+    Resolves platform from ``HERMES_PLATFORM`` env var, falls back to
+    the global disabled list.  Reads the config file directly (no CLI
+    config imports) to stay lightweight.
+    """
+    config_path = get_hermes_home() / "config.yaml"
+    if not config_path.exists():
+        return set()
+    try:
+        parsed = yaml_load(config_path.read_text(encoding="utf-8"))
+    except Exception as e:
+        logger.debug("Could not read skill config %s: %s", config_path, e)
+        return set()
+    if not isinstance(parsed, dict):
+        return set()
+
+    skills_cfg = parsed.get("skills")
+    if not isinstance(skills_cfg, dict):
+        return set()
+
+    resolved_platform = os.getenv("HERMES_PLATFORM")
+    if resolved_platform:
+        platform_disabled = (skills_cfg.get("platform_disabled") or {}).get(
+            resolved_platform
+        )
+        if platform_disabled is not None:
+            return _normalize_string_set(platform_disabled)
+    return _normalize_string_set(skills_cfg.get("disabled"))
+
+
+def _normalize_string_set(values) -> Set[str]:
+    if values is None:
+        return set()
+    if isinstance(values, str):
+        values = [values]
+    return {str(v).strip() for v in values if str(v).strip()}
+
+
+# ── External skills directories ──────────────────────────────────────────
+
+
+def get_external_skills_dirs() -> List[Path]:
+    """Read ``skills.external_dirs`` from config.yaml and return validated paths.
+
+    Each entry is expanded (``~`` and ``${VAR}``) and resolved to an absolute
+    path.  Only directories that actually exist are returned.  Duplicates and
+    paths that resolve to the local ``~/.hermes/skills/`` are silently skipped.
+    """
+    config_path = get_hermes_home() / "config.yaml"
+    if not config_path.exists():
+        return []
+    try:
+        parsed = yaml_load(config_path.read_text(encoding="utf-8"))
+    except Exception:
+        return []
+    if not isinstance(parsed, dict):
+        return []
+
+    skills_cfg = parsed.get("skills")
+    if not isinstance(skills_cfg, dict):
+        return []
+
+    raw_dirs = skills_cfg.get("external_dirs")
+    if not raw_dirs:
+        return []
+    if isinstance(raw_dirs, str):
+        raw_dirs = [raw_dirs]
+    if not isinstance(raw_dirs, list):
+        return []
+
+    local_skills = (get_hermes_home() / "skills").resolve()
+    seen: Set[Path] = set()
+    result: List[Path] = []
+
+    for entry in raw_dirs:
+        entry = str(entry).strip()
+        if not entry:
+            continue
+        # Expand ~ and environment variables
+        expanded = os.path.expanduser(os.path.expandvars(entry))
+        p = Path(expanded).resolve()
+        if p == local_skills:
+            continue
+        if p in seen:
+            continue
+        if p.is_dir():
+            seen.add(p)
+            result.append(p)
+        else:
+            logger.debug("External skills dir does not exist, skipping: %s", p)
+
+    return result
+
+
+def get_all_skills_dirs() -> List[Path]:
+    """Return all skill directories: local ``~/.hermes/skills/`` first, then external.
+
+    The local dir is always first (and always included even if it doesn't exist
+    yet — callers handle that).  External dirs follow in config order.
+    """
+    dirs = [get_hermes_home() / "skills"]
+    dirs.extend(get_external_skills_dirs())
+    return dirs
+
+
+# ── Condition extraction ──────────────────────────────────────────────────
+
+
+def extract_skill_conditions(frontmatter: Dict[str, Any]) -> Dict[str, List]:
+    """Extract conditional activation fields from parsed frontmatter."""
+    hermes = (frontmatter.get("metadata") or {}).get("hermes") or {}
+    return {
+        "fallback_for_toolsets": hermes.get("fallback_for_toolsets", []),
+        "requires_toolsets": hermes.get("requires_toolsets", []),
+        "fallback_for_tools": hermes.get("fallback_for_tools", []),
+        "requires_tools": hermes.get("requires_tools", []),
+    }
+
+
+# ── Description extraction ────────────────────────────────────────────────
+
+
+def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
+    """Extract a truncated description from parsed frontmatter."""
+    raw_desc = frontmatter.get("description", "")
+    if not raw_desc:
+        return ""
+    desc = str(raw_desc).strip().strip("'\"")
+    if len(desc) > 60:
+        return desc[:57] + "..."
+    return desc
+
+
+# ── File iteration ────────────────────────────────────────────────────────
+
+
+def iter_skill_index_files(skills_dir: Path, filename: str):
+    """Walk skills_dir yielding sorted paths matching *filename*.
+
+    Excludes ``.git``, ``.github``, ``.hub`` directories.
+    """
+    matches = []
+    for root, dirs, files in os.walk(skills_dir):
+        dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
+        if filename in files:
+            matches.append(Path(root) / filename)
+    for path in sorted(matches, key=lambda p: str(p.relative_to(skills_dir))):
+        yield path
--- a/agent/title_generator.py
+++ b/agent/title_generator.py
@@ -19,7 +19,7 @@ _TITLE_PROMPT = (
 )


-def generate_title(user_message: str, assistant_response: str, timeout: float = 15.0) -> Optional[str]:
+def generate_title(user_message: str, assistant_response: str, timeout: float = 30.0) -> Optional[str]:
    """Generate a session title from the first exchange.

    Uses the auxiliary LLM client (cheapest/fastest available model).