Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor
This commit is contained in:
@@ -1327,8 +1327,7 @@ def build_execute_code_schema(enabled_sandbox_tools: set = None) -> dict:
|
||||
f"Available via `from hermes_tools import ...`:\n\n"
|
||||
f"{tool_lines}\n\n"
|
||||
"Limits: 5-minute timeout, 50KB stdout cap, max 50 tool calls per script. "
|
||||
"terminal() is foreground-only (no background or pty). "
|
||||
"If the session uses a cloud sandbox backend, treat it as resumable task state rather than a durable always-on machine.\n\n"
|
||||
"terminal() is foreground-only (no background or pty).\n\n"
|
||||
"Print your final result to stdout. Use Python stdlib (json, re, math, csv, "
|
||||
"datetime, collections, etc.) for processing between tool calls.\n\n"
|
||||
"Also available (no import needed — built into hermes_tools):\n"
|
||||
|
||||
@@ -465,7 +465,7 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
|
||||
},
|
||||
"deliver": {
|
||||
"type": "string",
|
||||
"description": "Omit this parameter to auto-deliver back to the current chat and topic (recommended). Auto-detection preserves thread/topic context. Only set explicitly when the user asks to deliver somewhere OTHER than the current conversation. Values: 'origin' (same as omitting), 'local' (no delivery, save only), or platform:chat_id:thread_id for a specific destination. Examples: 'telegram:-1001234567890:17585', 'discord:#engineering'. WARNING: 'platform:chat_id' without :thread_id loses topic targeting."
|
||||
"description": "Omit this parameter to auto-deliver back to the current chat and topic (recommended). Auto-detection preserves thread/topic context. Only set explicitly when the user asks to deliver somewhere OTHER than the current conversation. Values: 'origin' (same as omitting), 'local' (no delivery, save only), or platform:chat_id:thread_id for a specific destination. Examples: 'telegram:-1001234567890:17585', 'discord:#engineering', 'sms:+15551234567'. WARNING: 'platform:chat_id' without :thread_id loses topic targeting."
|
||||
},
|
||||
"skills": {
|
||||
"type": "array",
|
||||
|
||||
@@ -92,7 +92,10 @@ def _is_blocked_device(filepath: str) -> bool:
|
||||
|
||||
# Paths that file tools should refuse to write to without going through the
|
||||
# terminal tool's approval system. These match prefixes after os.path.realpath.
|
||||
_SENSITIVE_PATH_PREFIXES = ("/etc/", "/boot/", "/usr/lib/systemd/")
|
||||
_SENSITIVE_PATH_PREFIXES = (
|
||||
"/etc/", "/boot/", "/usr/lib/systemd/",
|
||||
"/private/etc/", "/private/var/",
|
||||
)
|
||||
_SENSITIVE_EXACT_PATHS = {"/var/run/docker.sock", "/run/docker.sock"}
|
||||
|
||||
|
||||
@@ -102,17 +105,16 @@ def _check_sensitive_path(filepath: str) -> str | None:
|
||||
resolved = os.path.realpath(os.path.expanduser(filepath))
|
||||
except (OSError, ValueError):
|
||||
resolved = filepath
|
||||
normalized = os.path.normpath(os.path.expanduser(filepath))
|
||||
_err = (
|
||||
f"Refusing to write to sensitive system path: {filepath}\n"
|
||||
"Use the terminal tool with sudo if you need to modify system files."
|
||||
)
|
||||
for prefix in _SENSITIVE_PATH_PREFIXES:
|
||||
if resolved.startswith(prefix):
|
||||
return (
|
||||
f"Refusing to write to sensitive system path: {filepath}\n"
|
||||
"Use the terminal tool with sudo if you need to modify system files."
|
||||
)
|
||||
if resolved in _SENSITIVE_EXACT_PATHS:
|
||||
return (
|
||||
f"Refusing to write to sensitive system path: {filepath}\n"
|
||||
"Use the terminal tool with sudo if you need to modify system files."
|
||||
)
|
||||
if resolved.startswith(prefix) or normalized.startswith(prefix):
|
||||
return _err
|
||||
if resolved in _SENSITIVE_EXACT_PATHS or normalized in _SENSITIVE_EXACT_PATHS:
|
||||
return _err
|
||||
return None
|
||||
|
||||
|
||||
|
||||
@@ -38,6 +38,15 @@ def _get_config():
|
||||
# Regex for valid HA entity_id format (e.g. "light.living_room", "sensor.temperature_1")
|
||||
_ENTITY_ID_RE = re.compile(r"^[a-z_][a-z0-9_]*\.[a-z0-9_]+$")
|
||||
|
||||
# Regex for valid HA service/domain names (e.g. "light", "turn_on", "shell_command").
|
||||
# Only lowercase ASCII letters, digits, and underscores — no slashes, dots, or
|
||||
# other characters that could allow path traversal in URL construction.
|
||||
# The domain and service are interpolated into /api/services/{domain}/{service},
|
||||
# so allowing arbitrary strings would enable SSRF via path traversal
|
||||
# (e.g. domain="../../api/config") or blocked-domain bypass
|
||||
# (e.g. domain="shell_command/../light").
|
||||
_SERVICE_NAME_RE = re.compile(r"^[a-z][a-z0-9_]*$")
|
||||
|
||||
# Service domains blocked for security -- these allow arbitrary code/command
|
||||
# execution on the HA host or enable SSRF attacks on the local network.
|
||||
# HA provides zero service-level access control; all safety must be in our layer.
|
||||
@@ -246,6 +255,14 @@ def _handle_call_service(args: dict, **kw) -> str:
|
||||
if not domain or not service:
|
||||
return tool_error("Missing required parameters: domain and service")
|
||||
|
||||
# Validate domain/service format BEFORE the blocklist check — prevents
|
||||
# path traversal in /api/services/{domain}/{service} and blocklist bypass
|
||||
# via payloads like "shell_command/../light".
|
||||
if not _SERVICE_NAME_RE.match(domain):
|
||||
return tool_error(f"Invalid domain format: {domain!r}")
|
||||
if not _SERVICE_NAME_RE.match(service):
|
||||
return tool_error(f"Invalid service format: {service!r}")
|
||||
|
||||
if domain in _BLOCKED_DOMAINS:
|
||||
return json.dumps({
|
||||
"error": f"Service domain '{domain}' is blocked for security. "
|
||||
@@ -257,6 +274,12 @@ def _handle_call_service(args: dict, **kw) -> str:
|
||||
return tool_error(f"Invalid entity_id format: {entity_id}")
|
||||
|
||||
data = args.get("data")
|
||||
if isinstance(data, str):
|
||||
try:
|
||||
data = json.loads(data) if data.strip() else None
|
||||
except json.JSONDecodeError as e:
|
||||
return tool_error(f"Invalid JSON string in 'data' parameter: {e}")
|
||||
|
||||
try:
|
||||
result = _run_async(_async_call_service(domain, service, entity_id, data))
|
||||
return json.dumps({"result": result})
|
||||
@@ -433,9 +456,9 @@ HA_CALL_SERVICE_SCHEMA = {
|
||||
),
|
||||
},
|
||||
"data": {
|
||||
"type": "object",
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Additional service data. Examples: "
|
||||
"Additional service data as a JSON string. Examples: "
|
||||
'{"brightness": 255, "color_name": "blue"} for lights, '
|
||||
'{"temperature": 22, "hvac_mode": "heat"} for climate, '
|
||||
'{"volume_level": 0.5} for media players.'
|
||||
|
||||
@@ -322,7 +322,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
|
||||
(preserves code-block boundaries, adds part indicators).
|
||||
"""
|
||||
from gateway.config import Platform
|
||||
from gateway.platforms.base import BasePlatformAdapter
|
||||
from gateway.platforms.base import BasePlatformAdapter, utf16_len
|
||||
from gateway.platforms.telegram import TelegramAdapter
|
||||
from gateway.platforms.discord import DiscordAdapter
|
||||
from gateway.platforms.slack import SlackAdapter
|
||||
@@ -354,9 +354,11 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
|
||||
|
||||
# Smart-chunk the message to fit within platform limits.
|
||||
# For short messages or platforms without a known limit this is a no-op.
|
||||
# Telegram measures length in UTF-16 code units, not Unicode codepoints.
|
||||
max_len = _MAX_LENGTHS.get(platform)
|
||||
if max_len:
|
||||
chunks = BasePlatformAdapter.truncate_message(message, max_len)
|
||||
_len_fn = utf16_len if platform == Platform.TELEGRAM else None
|
||||
chunks = BasePlatformAdapter.truncate_message(message, max_len, len_fn=_len_fn)
|
||||
else:
|
||||
chunks = [message]
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ import asyncio
|
||||
import concurrent.futures
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
|
||||
from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning
|
||||
@@ -90,31 +91,80 @@ def _truncate_around_matches(
|
||||
full_text: str, query: str, max_chars: int = MAX_SESSION_CHARS
|
||||
) -> str:
|
||||
"""
|
||||
Truncate a conversation transcript to max_chars, centered around
|
||||
where the query terms appear. Keeps content near matches, trims the edges.
|
||||
Truncate a conversation transcript to *max_chars*, choosing a window
|
||||
that maximises coverage of positions where the *query* actually appears.
|
||||
|
||||
Strategy (in priority order):
|
||||
1. Try to find the full query as a phrase (case-insensitive).
|
||||
2. If no phrase hit, look for positions where all query terms appear
|
||||
within a 200-char proximity window (co-occurrence).
|
||||
3. Fall back to individual term positions.
|
||||
|
||||
Once candidate positions are collected the function picks the window
|
||||
start that covers the most of them.
|
||||
"""
|
||||
if len(full_text) <= max_chars:
|
||||
return full_text
|
||||
|
||||
# Find the first occurrence of any query term
|
||||
query_terms = query.lower().split()
|
||||
text_lower = full_text.lower()
|
||||
first_match = len(full_text)
|
||||
for term in query_terms:
|
||||
pos = text_lower.find(term)
|
||||
if pos != -1 and pos < first_match:
|
||||
first_match = pos
|
||||
query_lower = query.lower().strip()
|
||||
match_positions: list[int] = []
|
||||
|
||||
if first_match == len(full_text):
|
||||
# No match found, take from the start
|
||||
first_match = 0
|
||||
# --- 1. Full-phrase search ------------------------------------------------
|
||||
phrase_pat = re.compile(re.escape(query_lower))
|
||||
match_positions = [m.start() for m in phrase_pat.finditer(text_lower)]
|
||||
|
||||
# Center the window around the first match
|
||||
half = max_chars // 2
|
||||
start = max(0, first_match - half)
|
||||
# --- 2. Proximity co-occurrence of all terms (within 200 chars) -----------
|
||||
if not match_positions:
|
||||
terms = query_lower.split()
|
||||
if len(terms) > 1:
|
||||
# Collect every occurrence of each term
|
||||
term_positions: dict[str, list[int]] = {}
|
||||
for t in terms:
|
||||
term_positions[t] = [
|
||||
m.start() for m in re.finditer(re.escape(t), text_lower)
|
||||
]
|
||||
# Slide through positions of the rarest term and check proximity
|
||||
rarest = min(terms, key=lambda t: len(term_positions.get(t, [])))
|
||||
for pos in term_positions.get(rarest, []):
|
||||
if all(
|
||||
any(abs(p - pos) < 200 for p in term_positions.get(t, []))
|
||||
for t in terms
|
||||
if t != rarest
|
||||
):
|
||||
match_positions.append(pos)
|
||||
|
||||
# --- 3. Individual term positions (last resort) ---------------------------
|
||||
if not match_positions:
|
||||
terms = query_lower.split()
|
||||
for t in terms:
|
||||
for m in re.finditer(re.escape(t), text_lower):
|
||||
match_positions.append(m.start())
|
||||
|
||||
if not match_positions:
|
||||
# Nothing at all — take from the start
|
||||
truncated = full_text[:max_chars]
|
||||
suffix = "\n\n...[later conversation truncated]..." if max_chars < len(full_text) else ""
|
||||
return truncated + suffix
|
||||
|
||||
# --- Pick window that covers the most match positions ---------------------
|
||||
match_positions.sort()
|
||||
|
||||
best_start = 0
|
||||
best_count = 0
|
||||
for candidate in match_positions:
|
||||
ws = max(0, candidate - max_chars // 4) # bias: 25% before, 75% after
|
||||
we = ws + max_chars
|
||||
if we > len(full_text):
|
||||
ws = max(0, len(full_text) - max_chars)
|
||||
we = len(full_text)
|
||||
count = sum(1 for p in match_positions if ws <= p < we)
|
||||
if count > best_count:
|
||||
best_count = count
|
||||
best_start = ws
|
||||
|
||||
start = best_start
|
||||
end = min(len(full_text), start + max_chars)
|
||||
if end - start < max_chars:
|
||||
start = max(0, end - max_chars)
|
||||
|
||||
truncated = full_text[start:end]
|
||||
prefix = "...[earlier conversation truncated]...\n\n" if start > 0 else ""
|
||||
|
||||
@@ -296,10 +296,20 @@ class GitHubSource(SkillSource):
|
||||
self.taps = list(self.DEFAULT_TAPS)
|
||||
if extra_taps:
|
||||
self.taps.extend(extra_taps)
|
||||
# Per-instance cache: repo -> (default_branch, tree_entries)
|
||||
# Survives within a single search/install flow, avoiding redundant API calls.
|
||||
self._tree_cache: Dict[str, Tuple[str, List[dict]]] = {}
|
||||
# Set when GitHub returns 403 with rate limit exhausted
|
||||
self._rate_limited: bool = False
|
||||
|
||||
def source_id(self) -> str:
|
||||
return "github"
|
||||
|
||||
@property
|
||||
def is_rate_limited(self) -> bool:
|
||||
"""Whether GitHub API rate limit was hit during operations."""
|
||||
return self._rate_limited
|
||||
|
||||
def trust_level_for(self, identifier: str) -> str:
|
||||
# identifier format: "owner/repo/path/to/skill"
|
||||
parts = identifier.split("/", 2)
|
||||
@@ -443,6 +453,69 @@ class GitHubSource(SkillSource):
|
||||
self._write_cache(cache_key, [self._meta_to_dict(s) for s in skills])
|
||||
return skills
|
||||
|
||||
# -- Repo tree cache (avoids redundant API calls) --
|
||||
|
||||
def _get_repo_tree(self, repo: str) -> Optional[Tuple[str, List[dict]]]:
|
||||
"""Get cached or fresh repo tree.
|
||||
|
||||
Returns ``(default_branch, tree_entries)`` or ``None``.
|
||||
A single install can call ``_download_directory_via_tree`` and
|
||||
``_find_skill_in_repo_tree`` multiple times for the same repo — this
|
||||
cache eliminates the redundant ``GET /repos/{repo}`` +
|
||||
``GET /repos/{repo}/git/trees/{branch}`` round-trips (previously up to
|
||||
6 duplicated pairs per install, consuming ~12 of the 60/hr
|
||||
unauthenticated rate limit for nothing).
|
||||
"""
|
||||
if repo in self._tree_cache:
|
||||
return self._tree_cache[repo]
|
||||
|
||||
headers = self.auth.get_headers()
|
||||
|
||||
# Resolve default branch
|
||||
try:
|
||||
resp = httpx.get(
|
||||
f"https://api.github.com/repos/{repo}",
|
||||
headers=headers, timeout=15, follow_redirects=True,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
self._check_rate_limit_response(resp)
|
||||
return None
|
||||
default_branch = resp.json().get("default_branch", "main")
|
||||
except (httpx.HTTPError, ValueError):
|
||||
return None
|
||||
|
||||
# Fetch recursive tree
|
||||
try:
|
||||
resp = httpx.get(
|
||||
f"https://api.github.com/repos/{repo}/git/trees/{default_branch}",
|
||||
params={"recursive": "1"},
|
||||
headers=headers, timeout=30, follow_redirects=True,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
self._check_rate_limit_response(resp)
|
||||
return None
|
||||
tree_data = resp.json()
|
||||
if tree_data.get("truncated"):
|
||||
logger.debug("Git tree truncated for %s, cannot cache", repo)
|
||||
return None
|
||||
except (httpx.HTTPError, ValueError):
|
||||
return None
|
||||
|
||||
entries = tree_data.get("tree", [])
|
||||
self._tree_cache[repo] = (default_branch, entries)
|
||||
return (default_branch, entries)
|
||||
|
||||
def _check_rate_limit_response(self, resp: "httpx.Response") -> None:
|
||||
"""Flag the instance as rate-limited when GitHub returns 403 + exhausted quota."""
|
||||
if resp.status_code == 403:
|
||||
remaining = resp.headers.get("X-RateLimit-Remaining", "")
|
||||
if remaining == "0":
|
||||
self._rate_limited = True
|
||||
logger.warning(
|
||||
"GitHub API rate limit exhausted (unauthenticated: 60 req/hr). "
|
||||
"Set GITHUB_TOKEN or install the gh CLI to raise the limit to 5,000/hr."
|
||||
)
|
||||
|
||||
def _download_directory(self, repo: str, path: str) -> Dict[str, str]:
|
||||
"""Recursively download all text files from a GitHub directory.
|
||||
|
||||
@@ -458,40 +531,34 @@ class GitHubSource(SkillSource):
|
||||
return self._download_directory_recursive(repo, path)
|
||||
|
||||
def _download_directory_via_tree(self, repo: str, path: str) -> Optional[Dict[str, str]]:
|
||||
"""Download an entire directory using the Git Trees API (single request)."""
|
||||
"""Download an entire directory using the Git Trees API (single request).
|
||||
|
||||
Returns:
|
||||
dict of files if the path exists and has content,
|
||||
empty dict ``{}`` if the tree is cached but the path doesn't exist
|
||||
(prevents unnecessary Contents API fallback),
|
||||
``None`` if the tree couldn't be fetched (triggers Contents API fallback).
|
||||
"""
|
||||
path = path.rstrip("/")
|
||||
headers = self.auth.get_headers()
|
||||
|
||||
# Resolve the default branch via the repo endpoint
|
||||
try:
|
||||
repo_url = f"https://api.github.com/repos/{repo}"
|
||||
resp = httpx.get(repo_url, headers=headers, timeout=15, follow_redirects=True)
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
default_branch = resp.json().get("default_branch", "main")
|
||||
except (httpx.HTTPError, ValueError):
|
||||
cached = self._get_repo_tree(repo)
|
||||
if cached is None:
|
||||
return None
|
||||
_default_branch, tree_entries = cached
|
||||
|
||||
# Fetch the full recursive tree (branch name works as tree-ish)
|
||||
try:
|
||||
tree_url = f"https://api.github.com/repos/{repo}/git/trees/{default_branch}"
|
||||
resp = httpx.get(
|
||||
tree_url, params={"recursive": "1"},
|
||||
headers=headers, timeout=30, follow_redirects=True,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
tree_data = resp.json()
|
||||
if tree_data.get("truncated"):
|
||||
logger.debug("Git tree truncated for %s, falling back to Contents API", repo)
|
||||
return None
|
||||
except (httpx.HTTPError, ValueError):
|
||||
return None
|
||||
# Check if ANY entry lives under the target path
|
||||
prefix = f"{path}/"
|
||||
has_entries = any(
|
||||
item.get("path", "").startswith(prefix) for item in tree_entries
|
||||
)
|
||||
if not has_entries:
|
||||
# Path definitively doesn't exist in the repo — return empty
|
||||
# instead of None to skip the Contents API fallback.
|
||||
return {}
|
||||
|
||||
# Filter to blobs under our target path and fetch content
|
||||
prefix = f"{path}/"
|
||||
files: Dict[str, str] = {}
|
||||
for item in tree_data.get("tree", []):
|
||||
for item in tree_entries:
|
||||
if item.get("type") != "blob":
|
||||
continue
|
||||
item_path = item.get("path", "")
|
||||
@@ -548,38 +615,14 @@ class GitHubSource(SkillSource):
|
||||
handles deeply nested directory structures like
|
||||
``cli-tool/components/skills/development/<skill>/SKILL.md``.
|
||||
"""
|
||||
# Get default branch
|
||||
try:
|
||||
resp = httpx.get(
|
||||
f"https://api.github.com/repos/{repo}",
|
||||
headers=self.auth.get_headers(),
|
||||
timeout=15,
|
||||
follow_redirects=True,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
default_branch = resp.json().get("default_branch", "main")
|
||||
except (httpx.HTTPError, json.JSONDecodeError):
|
||||
return None
|
||||
|
||||
# Get recursive tree (single API call for the entire repo)
|
||||
try:
|
||||
resp = httpx.get(
|
||||
f"https://api.github.com/repos/{repo}/git/trees/{default_branch}",
|
||||
params={"recursive": "1"},
|
||||
headers=self.auth.get_headers(),
|
||||
timeout=30,
|
||||
follow_redirects=True,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
tree_data = resp.json()
|
||||
except (httpx.HTTPError, json.JSONDecodeError):
|
||||
cached = self._get_repo_tree(repo)
|
||||
if cached is None:
|
||||
return None
|
||||
_default_branch, tree_entries = cached
|
||||
|
||||
# Look for SKILL.md files inside directories named <skill_name>
|
||||
skill_md_suffix = f"/{skill_name}/SKILL.md"
|
||||
for entry in tree_data.get("tree", []):
|
||||
for entry in tree_entries:
|
||||
if entry.get("type") != "blob":
|
||||
continue
|
||||
path = entry.get("path", "")
|
||||
@@ -601,6 +644,7 @@ class GitHubSource(SkillSource):
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
return resp.text
|
||||
self._check_rate_limit_response(resp)
|
||||
except httpx.HTTPError as e:
|
||||
logger.debug("GitHub contents API fetch failed: %s", e)
|
||||
return None
|
||||
@@ -2654,6 +2698,222 @@ def check_for_skill_updates(
|
||||
return results
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Hermes centralized index source
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
HERMES_INDEX_URL = "https://hermes-agent.nousresearch.com/docs/api/skills-index.json"
|
||||
HERMES_INDEX_CACHE_FILE = INDEX_CACHE_DIR / "hermes-index.json"
|
||||
HERMES_INDEX_TTL = 6 * 3600 # 6 hours
|
||||
|
||||
|
||||
def _load_hermes_index() -> Optional[dict]:
|
||||
"""Fetch the centralized skills index, with local cache.
|
||||
|
||||
The index is a JSON file hosted on the docs site, rebuilt daily by CI.
|
||||
We cache it locally for HERMES_INDEX_TTL seconds to avoid repeated
|
||||
downloads within a session.
|
||||
"""
|
||||
# Check local cache
|
||||
if HERMES_INDEX_CACHE_FILE.exists():
|
||||
try:
|
||||
age = time.time() - HERMES_INDEX_CACHE_FILE.stat().st_mtime
|
||||
if age < HERMES_INDEX_TTL:
|
||||
return json.loads(HERMES_INDEX_CACHE_FILE.read_text())
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
|
||||
# Fetch from docs site
|
||||
try:
|
||||
resp = httpx.get(HERMES_INDEX_URL, timeout=15, follow_redirects=True)
|
||||
if resp.status_code != 200:
|
||||
logger.debug("Hermes index fetch returned %d", resp.status_code)
|
||||
return _load_stale_index_cache()
|
||||
data = resp.json()
|
||||
except (httpx.HTTPError, json.JSONDecodeError) as e:
|
||||
logger.debug("Hermes index fetch failed: %s", e)
|
||||
return _load_stale_index_cache()
|
||||
|
||||
# Validate structure
|
||||
if not isinstance(data, dict) or "skills" not in data:
|
||||
return _load_stale_index_cache()
|
||||
|
||||
# Cache locally
|
||||
try:
|
||||
HERMES_INDEX_CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
HERMES_INDEX_CACHE_FILE.write_text(json.dumps(data))
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def _load_stale_index_cache() -> Optional[dict]:
|
||||
"""Fall back to stale cache when the network fetch fails."""
|
||||
if HERMES_INDEX_CACHE_FILE.exists():
|
||||
try:
|
||||
return json.loads(HERMES_INDEX_CACHE_FILE.read_text())
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
class HermesIndexSource(SkillSource):
|
||||
"""Skill source backed by the centralized Hermes Skills Index.
|
||||
|
||||
The index is a JSON catalog published to the docs site and rebuilt
|
||||
daily by CI. It contains metadata + resolved GitHub paths for every
|
||||
skill, eliminating the need for users to hit the GitHub API for
|
||||
search or path discovery.
|
||||
|
||||
When the index is unavailable, all methods return empty / None so
|
||||
downstream sources take over transparently.
|
||||
"""
|
||||
|
||||
def __init__(self, auth: GitHubAuth):
|
||||
self._index: Optional[dict] = None
|
||||
self._loaded = False
|
||||
self.auth = auth
|
||||
# Lazily create GitHubSource for fetch — only used when actually
|
||||
# downloading files, which requires real GitHub API calls.
|
||||
self._github: Optional[GitHubSource] = None
|
||||
|
||||
def _ensure_loaded(self) -> dict:
|
||||
if not self._loaded:
|
||||
self._index = _load_hermes_index()
|
||||
self._loaded = True
|
||||
return self._index or {}
|
||||
|
||||
def _get_github(self) -> GitHubSource:
|
||||
if self._github is None:
|
||||
self._github = GitHubSource(auth=self.auth)
|
||||
return self._github
|
||||
|
||||
def source_id(self) -> str:
|
||||
return "hermes-index"
|
||||
|
||||
@property
|
||||
def is_available(self) -> bool:
|
||||
"""Whether the index is loaded and has skills."""
|
||||
index = self._ensure_loaded()
|
||||
return bool(index.get("skills"))
|
||||
|
||||
def trust_level_for(self, identifier: str) -> str:
|
||||
index = self._ensure_loaded()
|
||||
for skill in index.get("skills", []):
|
||||
if skill.get("identifier") == identifier:
|
||||
return skill.get("trust_level", "community")
|
||||
return "community"
|
||||
|
||||
def search(self, query: str, limit: int = 10) -> List[SkillMeta]:
|
||||
"""Search the cached index. Zero API calls."""
|
||||
index = self._ensure_loaded()
|
||||
skills = index.get("skills", [])
|
||||
if not skills:
|
||||
return []
|
||||
|
||||
if not query.strip():
|
||||
# No query — return featured/popular
|
||||
return [self._to_meta(s) for s in skills[:limit]]
|
||||
|
||||
query_lower = query.lower()
|
||||
results: List[SkillMeta] = []
|
||||
for s in skills:
|
||||
searchable = f"{s.get('name', '')} {s.get('description', '')} {' '.join(s.get('tags', []))}".lower()
|
||||
if query_lower in searchable:
|
||||
results.append(self._to_meta(s))
|
||||
if len(results) >= limit:
|
||||
break
|
||||
return results
|
||||
|
||||
def fetch(self, identifier: str) -> Optional[SkillBundle]:
|
||||
"""Fetch a skill using the resolved path from the index.
|
||||
|
||||
If the index has a ``resolved_github_id`` for this skill, we skip
|
||||
the entire candidate/discovery chain and go directly to GitHub
|
||||
with the exact path. This reduces install from ~31 API calls to
|
||||
just the file content downloads (~5-22 depending on skill size).
|
||||
"""
|
||||
index = self._ensure_loaded()
|
||||
entry = self._find_entry(identifier, index)
|
||||
if not entry:
|
||||
return None
|
||||
|
||||
# Use resolved path if available
|
||||
resolved = entry.get("resolved_github_id")
|
||||
if resolved:
|
||||
bundle = self._get_github().fetch(resolved)
|
||||
if bundle:
|
||||
bundle.source = entry.get("source", "hermes-index")
|
||||
bundle.identifier = identifier
|
||||
return bundle
|
||||
|
||||
# Fall back to identifier-based fetch via repo/path
|
||||
repo = entry.get("repo", "")
|
||||
path = entry.get("path", "")
|
||||
if repo and path:
|
||||
github_id = f"{repo}/{path}"
|
||||
bundle = self._get_github().fetch(github_id)
|
||||
if bundle:
|
||||
bundle.source = entry.get("source", "hermes-index")
|
||||
bundle.identifier = identifier
|
||||
return bundle
|
||||
|
||||
return None
|
||||
|
||||
def inspect(self, identifier: str) -> Optional[SkillMeta]:
|
||||
"""Return metadata from the index. Zero API calls."""
|
||||
index = self._ensure_loaded()
|
||||
entry = self._find_entry(identifier, index)
|
||||
if entry:
|
||||
return self._to_meta(entry)
|
||||
return None
|
||||
|
||||
def _find_entry(self, identifier: str, index: dict) -> Optional[dict]:
|
||||
"""Look up a skill in the index by identifier or name."""
|
||||
skills = index.get("skills", [])
|
||||
|
||||
# Exact identifier match
|
||||
for s in skills:
|
||||
if s.get("identifier") == identifier:
|
||||
return s
|
||||
|
||||
# Try without source prefix (e.g. "skills-sh/" stripped)
|
||||
normalized = identifier
|
||||
for prefix in ("skills-sh/", "skills.sh/", "official/", "github/", "clawhub/"):
|
||||
if identifier.startswith(prefix):
|
||||
normalized = identifier[len(prefix):]
|
||||
break
|
||||
|
||||
# Match on normalized identifier or name
|
||||
for s in skills:
|
||||
sid = s.get("identifier", "")
|
||||
# Strip prefix from stored identifier too
|
||||
stored_normalized = sid
|
||||
for prefix in ("skills-sh/", "skills.sh/", "official/", "github/", "clawhub/"):
|
||||
if sid.startswith(prefix):
|
||||
stored_normalized = sid[len(prefix):]
|
||||
break
|
||||
if stored_normalized == normalized:
|
||||
return s
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _to_meta(entry: dict) -> SkillMeta:
|
||||
return SkillMeta(
|
||||
name=entry.get("name", ""),
|
||||
description=entry.get("description", ""),
|
||||
source=entry.get("source", "hermes-index"),
|
||||
identifier=entry.get("identifier", ""),
|
||||
trust_level=entry.get("trust_level", "community"),
|
||||
repo=entry.get("repo"),
|
||||
path=entry.get("path"),
|
||||
tags=entry.get("tags", []),
|
||||
extra=entry.get("extra", {}),
|
||||
)
|
||||
|
||||
|
||||
def create_source_router(auth: Optional[GitHubAuth] = None) -> List[SkillSource]:
|
||||
"""
|
||||
Create all configured source adapters.
|
||||
@@ -2667,6 +2927,7 @@ def create_source_router(auth: Optional[GitHubAuth] = None) -> List[SkillSource]
|
||||
|
||||
sources: List[SkillSource] = [
|
||||
OptionalSkillSource(), # Official optional skills (highest priority)
|
||||
HermesIndexSource(auth=auth), # Centralized index (search + resolved install paths)
|
||||
SkillsShSource(auth=auth),
|
||||
WellKnownSkillSource(),
|
||||
GitHubSource(auth=auth, extra_taps=extra_taps),
|
||||
@@ -2709,10 +2970,27 @@ def parallel_search_sources(
|
||||
per_source_limits = per_source_limits or {}
|
||||
|
||||
active: List[SkillSource] = []
|
||||
# When the centralized index is available and the user hasn't filtered
|
||||
# to a specific source, skip external API sources (github, skills-sh,
|
||||
# clawhub, etc.) — the index already has their data. This avoids
|
||||
# ~70 GitHub API calls per search for unauthenticated users.
|
||||
_index_available = False
|
||||
_api_source_ids = frozenset({"github", "skills-sh", "clawhub",
|
||||
"claude-marketplace", "lobehub", "well-known"})
|
||||
if source_filter == "all":
|
||||
for src in sources:
|
||||
if (src.source_id() == "hermes-index"
|
||||
and getattr(src, "is_available", False)):
|
||||
_index_available = True
|
||||
break
|
||||
|
||||
for src in sources:
|
||||
sid = src.source_id()
|
||||
if source_filter != "all" and sid != source_filter and sid != "official":
|
||||
continue
|
||||
# Skip external API sources when the index covers them
|
||||
if _index_available and sid in _api_source_ids:
|
||||
continue
|
||||
active.append(src)
|
||||
|
||||
all_results: List[SkillMeta] = []
|
||||
|
||||
@@ -531,7 +531,6 @@ Working directory: Use 'workdir' for per-command cwd.
|
||||
PTY mode: Set pty=true for interactive CLI tools (Codex, Claude Code, Python REPL).
|
||||
|
||||
Do NOT use vim/nano/interactive tools without pty=true — they hang without a pseudo-terminal. Pipe git output to cat if it might page.
|
||||
Important: cloud sandboxes may be cleaned up, idled out, or recreated between turns. Persistent filesystem means files can resume later; it does NOT guarantee a continuously running machine or surviving background processes. Use terminal sandboxes for task work, not durable hosting.
|
||||
"""
|
||||
|
||||
# Global state for environment lifecycle management
|
||||
|
||||
@@ -188,8 +188,14 @@ async def _generate_edge_tts(text: str, output_path: str, tts_config: Dict[str,
|
||||
_edge_tts = _import_edge_tts()
|
||||
edge_config = tts_config.get("edge", {})
|
||||
voice = edge_config.get("voice", DEFAULT_EDGE_VOICE)
|
||||
speed = float(edge_config.get("speed", tts_config.get("speed", 1.0)))
|
||||
|
||||
communicate = _edge_tts.Communicate(text, voice)
|
||||
kwargs = {"voice": voice}
|
||||
if speed != 1.0:
|
||||
pct = round((speed - 1.0) * 100)
|
||||
kwargs["rate"] = f"{pct:+d}%"
|
||||
|
||||
communicate = _edge_tts.Communicate(text, **kwargs)
|
||||
await communicate.save(output_path)
|
||||
return output_path
|
||||
|
||||
@@ -261,6 +267,7 @@ def _generate_openai_tts(text: str, output_path: str, tts_config: Dict[str, Any]
|
||||
model = oai_config.get("model", DEFAULT_OPENAI_MODEL)
|
||||
voice = oai_config.get("voice", DEFAULT_OPENAI_VOICE)
|
||||
base_url = oai_config.get("base_url", base_url)
|
||||
speed = float(oai_config.get("speed", tts_config.get("speed", 1.0)))
|
||||
|
||||
# Determine response format from extension
|
||||
if output_path.endswith(".ogg"):
|
||||
@@ -271,13 +278,16 @@ def _generate_openai_tts(text: str, output_path: str, tts_config: Dict[str, Any]
|
||||
OpenAIClient = _import_openai_client()
|
||||
client = OpenAIClient(api_key=api_key, base_url=base_url)
|
||||
try:
|
||||
response = client.audio.speech.create(
|
||||
create_kwargs = dict(
|
||||
model=model,
|
||||
voice=voice,
|
||||
input=text,
|
||||
response_format=response_format,
|
||||
extra_headers={"x-idempotency-key": str(uuid.uuid4())},
|
||||
)
|
||||
if speed != 1.0:
|
||||
create_kwargs["speed"] = max(0.25, min(4.0, speed))
|
||||
response = client.audio.speech.create(**create_kwargs)
|
||||
|
||||
response.stream_to_file(output_path)
|
||||
return output_path
|
||||
@@ -314,7 +324,7 @@ def _generate_minimax_tts(text: str, output_path: str, tts_config: Dict[str, Any
|
||||
mm_config = tts_config.get("minimax", {})
|
||||
model = mm_config.get("model", DEFAULT_MINIMAX_MODEL)
|
||||
voice_id = mm_config.get("voice_id", DEFAULT_MINIMAX_VOICE_ID)
|
||||
speed = mm_config.get("speed", 1)
|
||||
speed = mm_config.get("speed", tts_config.get("speed", 1))
|
||||
vol = mm_config.get("vol", 1)
|
||||
pitch = mm_config.get("pitch", 0)
|
||||
base_url = mm_config.get("base_url", DEFAULT_MINIMAX_BASE_URL)
|
||||
|
||||
@@ -106,8 +106,9 @@ def detect_audio_environment() -> dict:
|
||||
if any(os.environ.get(v) for v in ('SSH_CLIENT', 'SSH_TTY', 'SSH_CONNECTION')):
|
||||
warnings.append("Running over SSH -- no audio devices available")
|
||||
|
||||
# Docker detection
|
||||
if os.path.exists('/.dockerenv'):
|
||||
# Docker/Podman container detection
|
||||
from hermes_constants import is_container
|
||||
if is_container():
|
||||
warnings.append("Running inside Docker container -- no audio devices")
|
||||
|
||||
# WSL detection — PulseAudio bridge makes audio work in WSL.
|
||||
@@ -428,6 +429,11 @@ class AudioRecorder:
|
||||
"""Current audio input RMS level (0-32767). Updated each audio chunk."""
|
||||
return self._current_rms
|
||||
|
||||
@property
|
||||
def is_recording(self) -> bool:
|
||||
"""Whether audio recording is currently active."""
|
||||
return self._recording
|
||||
|
||||
# -- public methods ------------------------------------------------------
|
||||
|
||||
def _ensure_stream(self) -> None:
|
||||
|
||||
Reference in New Issue
Block a user