fix(skills): prune dependency/venv dirs from all skill scanners (#30042)

* fix(skills): skip dependency dirs in skill scan

* fix(skills): widen sibling rglob scanners to use shared exclusion set

Follow-up to PR #29968. The contributor's PR widened EXCLUDED_SKILL_DIRS
in the canonical walker (iter_skill_index_files), which fixes the
user-visible discovery path. This commit sweeps the ~12 other
rglob('SKILL.md') sites that did their own ad-hoc filtering — most only
checked .git/.hub, some had no filter at all — so dependency dirs
(.venv, node_modules, site-packages, etc.) cannot leak ghost skills
through the secondary paths.

Adds agent.skill_utils.is_excluded_skill_path(path) helper. Migrates
all 13 sites to use it. Removes 3 hardcoded duplicate filter sets.

Sites touched:
  agent/curator_backup.py        - skill backup file count
  gateway/run.py                 - disabled-skill response (2 sites)
  hermes_cli/dump.py             - skill count in env dump
  hermes_cli/profile_describer.py- profile description (2 sites)
  hermes_cli/profile_distribution.py - profile install count
  hermes_cli/profiles.py         - profile skill count
  hermes_cli/skills_hub.py       - category detection
  tools/skill_manager_tool.py    - skill name lookup (already used set, now uses helper)
  tools/skill_usage.py           - usage tracking + skill dir lookup (2 sites)
  tools/skills_hub.py            - optional skills find + scan (2 sites)
  tools/skills_sync.py           - bundled skills sync

E2E verified with the exact reported shape
(bring/scripts/.venv/.../typer/.agents/skills/typer/SKILL.md): no
sibling site picks up the ghost skill, all five legit-skill counts
still return 1.

* chore(infographic): retro-pop-grid bento for PR #30042 skill-scanner sweep

---------

Co-authored-by: helix4u <4317663+helix4u@users.noreply.github.com>
This commit is contained in:
Teknium
2026-05-21 14:18:02 -07:00
committed by GitHub
parent 3462b097e2
commit 3fde8c153d
16 changed files with 150 additions and 33 deletions

View File

@@ -283,12 +283,12 @@ def _find_skill(name: str) -> Optional[Dict[str, Any]]:
external dirs configured via skills.external_dirs. Returns
{"path": Path} or None.
"""
from agent.skill_utils import EXCLUDED_SKILL_DIRS, get_all_skills_dirs
from agent.skill_utils import get_all_skills_dirs, is_excluded_skill_path
for skills_dir in get_all_skills_dirs():
if not skills_dir.exists():
continue
for skill_md in skills_dir.rglob("SKILL.md"):
if any(part in EXCLUDED_SKILL_DIRS for part in skill_md.parts):
if is_excluded_skill_path(skill_md):
continue
if skill_md.parent.name == name:
return {"path": skill_md.parent}

View File

@@ -34,6 +34,7 @@ from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
from hermes_constants import get_hermes_home
from agent.skill_utils import is_excluded_skill_path
logger = logging.getLogger(__name__)
@@ -236,14 +237,13 @@ def list_agent_created_skill_names() -> List[str]:
names: List[str] = []
# Top-level SKILL.md files (flat layout) AND nested category/skill/SKILL.md
for skill_md in base.rglob("SKILL.md"):
# Skip anything under .archive or .hub
# Skip Hermes metadata, VCS, virtualenv/dependency, and cache dirs
if is_excluded_skill_path(skill_md):
continue
try:
rel = skill_md.relative_to(base)
except ValueError:
continue
parts = rel.parts
if parts and (parts[0].startswith(".") or parts[0] == "node_modules"):
continue
name = _read_skill_name(skill_md, fallback=skill_md.parent.name)
if name in off_limits:
continue
@@ -577,11 +577,7 @@ def _find_skill_dir(skill_name: str) -> Optional[Path]:
if not base.exists():
return None
for skill_md in base.rglob("SKILL.md"):
try:
rel = skill_md.relative_to(base)
except ValueError:
continue
if rel.parts and rel.parts[0].startswith("."):
if is_excluded_skill_path(skill_md):
continue
if _read_skill_name(skill_md, fallback=skill_md.parent.name) == skill_name:
return skill_md.parent

View File

@@ -26,6 +26,7 @@ from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path, PurePosixPath
from hermes_constants import get_hermes_home
from agent.skill_utils import is_excluded_skill_path
from typing import Any, Dict, List, Optional, Tuple, Union
from urllib.parse import urljoin, urlparse, urlunparse
@@ -2639,6 +2640,8 @@ class OptionalSkillSource(SkillSource):
if not self._optional_dir.is_dir():
return None
for skill_md in self._optional_dir.rglob("SKILL.md"):
if is_excluded_skill_path(skill_md):
continue
if skill_md.parent.name == name:
return skill_md.parent
return None
@@ -2650,10 +2653,9 @@ class OptionalSkillSource(SkillSource):
results: List[SkillMeta] = []
for skill_md in sorted(self._optional_dir.rglob("SKILL.md")):
parent = skill_md.parent
rel_parts = parent.relative_to(self._optional_dir).parts
if any(part.startswith(".") for part in rel_parts):
if is_excluded_skill_path(skill_md):
continue
parent = skill_md.parent
try:
content = skill_md.read_text(encoding="utf-8")

View File

@@ -27,6 +27,7 @@ import os
import shutil
from pathlib import Path
from hermes_constants import get_bundled_skills_dir, get_hermes_home
from agent.skill_utils import is_excluded_skill_path
from typing import Dict, List, Tuple
from utils import atomic_replace
@@ -139,8 +140,7 @@ def _discover_bundled_skills(bundled_dir: Path) -> List[Tuple[str, Path]]:
return skills
for skill_md in bundled_dir.rglob("SKILL.md"):
path_str = str(skill_md)
if "/.git/" in path_str or "/.github/" in path_str or "/.hub/" in path_str:
if is_excluded_skill_path(skill_md):
continue
skill_dir = skill_md.parent
skill_name = _read_skill_name(skill_md, skill_dir.name)

View File

@@ -79,6 +79,7 @@ from typing import Dict, Any, List, Optional, Set, Tuple
from tools.registry import registry, tool_error
from hermes_cli.config import cfg_get
from utils import env_var_enabled
from agent.skill_utils import EXCLUDED_SKILL_DIRS as _EXCLUDED_SKILL_DIRS
logger = logging.getLogger(__name__)
@@ -101,7 +102,6 @@ _PLATFORM_MAP = {
"windows": "win32",
}
_ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
_EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive"))
_REMOTE_ENV_BACKENDS = frozenset(
{"docker", "singularity", "modal", "ssh", "daytona", "vercel_sandbox"}
)
@@ -1565,4 +1565,3 @@ registry.register(
check_fn=check_skills_requirements,
emoji="📚",
)