fix(skills): prune dependency/venv dirs from all skill scanners (#30042)

* fix(skills): skip dependency dirs in skill scan

* fix(skills): widen sibling rglob scanners to use shared exclusion set

Follow-up to PR #29968. The contributor's PR widened EXCLUDED_SKILL_DIRS
in the canonical walker (iter_skill_index_files), which fixes the
user-visible discovery path. This commit sweeps the ~12 other
rglob('SKILL.md') sites that did their own ad-hoc filtering — most only
checked .git/.hub, some had no filter at all — so dependency dirs
(.venv, node_modules, site-packages, etc.) cannot leak ghost skills
through the secondary paths.

Adds agent.skill_utils.is_excluded_skill_path(path) helper. Migrates
all 13 sites to use it. Removes 3 hardcoded duplicate filter sets.

Sites touched:
  agent/curator_backup.py        - skill backup file count
  gateway/run.py                 - disabled-skill response (2 sites)
  hermes_cli/dump.py             - skill count in env dump
  hermes_cli/profile_describer.py- profile description (2 sites)
  hermes_cli/profile_distribution.py - profile install count
  hermes_cli/profiles.py         - profile skill count
  hermes_cli/skills_hub.py       - category detection
  tools/skill_manager_tool.py    - skill name lookup (already used set, now uses helper)
  tools/skill_usage.py           - usage tracking + skill dir lookup (2 sites)
  tools/skills_hub.py            - optional skills find + scan (2 sites)
  tools/skills_sync.py           - bundled skills sync

E2E verified with the exact reported shape
(bring/scripts/.venv/.../typer/.agents/skills/typer/SKILL.md): no
sibling site picks up the ghost skill, all five legit-skill counts
still return 1.

* chore(infographic): retro-pop-grid bento for PR #30042 skill-scanner sweep

---------

Co-authored-by: helix4u <4317663+helix4u@users.noreply.github.com>
This commit is contained in:
Teknium
2026-05-21 14:18:02 -07:00
committed by GitHub
parent 3462b097e2
commit 3fde8c153d
16 changed files with 150 additions and 33 deletions

View File

@@ -50,6 +50,7 @@ from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple from typing import Any, Dict, List, Optional, Tuple
from hermes_constants import get_hermes_home from hermes_constants import get_hermes_home
from agent.skill_utils import is_excluded_skill_path
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -176,7 +177,9 @@ def get_keep() -> int:
def _count_skill_files(base: Path) -> int: def _count_skill_files(base: Path) -> int:
try: try:
return sum(1 for _ in base.rglob("SKILL.md")) return sum(
1 for p in base.rglob("SKILL.md") if not is_excluded_skill_path(p)
)
except OSError: except OSError:
return 0 return 0

View File

@@ -24,7 +24,43 @@ PLATFORM_MAP = {
"windows": "win32", "windows": "win32",
} }
EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive")) EXCLUDED_SKILL_DIRS = frozenset(
(
".git",
".github",
".hub",
".archive",
".venv",
"venv",
"node_modules",
"site-packages",
"__pycache__",
".tox",
".nox",
".pytest_cache",
".mypy_cache",
".ruff_cache",
)
)
def is_excluded_skill_path(path) -> bool:
"""True if any component of *path* is in EXCLUDED_SKILL_DIRS.
Use this on every SKILL.md path produced by ``rglob`` to prune
dependency, virtualenv, VCS, and cache directories. Centralising the
check here keeps every skill-scanning site in sync with the shared
exclusion set.
Accepts a Path or string.
"""
try:
parts = path.parts # Path
except AttributeError:
from pathlib import PurePath
parts = PurePath(str(path)).parts
return any(part in EXCLUDED_SKILL_DIRS for part in parts)
# ── Lazy YAML loader ───────────────────────────────────────────────────── # ── Lazy YAML loader ─────────────────────────────────────────────────────
@@ -478,7 +514,8 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
def iter_skill_index_files(skills_dir: Path, filename: str): def iter_skill_index_files(skills_dir: Path, filename: str):
"""Walk skills_dir yielding sorted paths matching *filename*. """Walk skills_dir yielding sorted paths matching *filename*.
Excludes ``.git``, ``.github``, ``.hub``, ``.archive`` directories. Excludes Hermes metadata, VCS, virtualenv/dependency, and cache
directories so dependencies cannot register nested skills.
""" """
matches = [] matches = []
for root, dirs, files in os.walk(skills_dir, followlinks=True): for root, dirs, files in os.walk(skills_dir, followlinks=True):

View File

@@ -1109,7 +1109,7 @@ def _check_unavailable_skill(command_name: str) -> str | None:
normalized = command_name.lower().replace("_", "-") normalized = command_name.lower().replace("_", "-")
try: try:
from tools.skills_tool import _get_disabled_skill_names from tools.skills_tool import _get_disabled_skill_names
from agent.skill_utils import get_all_skills_dirs from agent.skill_utils import get_all_skills_dirs, is_excluded_skill_path
disabled = _get_disabled_skill_names() disabled = _get_disabled_skill_names()
# Check disabled skills across all dirs (local + external) # Check disabled skills across all dirs (local + external)
@@ -1117,7 +1117,7 @@ def _check_unavailable_skill(command_name: str) -> str | None:
if not skills_dir.exists(): if not skills_dir.exists():
continue continue
for skill_md in skills_dir.rglob("SKILL.md"): for skill_md in skills_dir.rglob("SKILL.md"):
if any(part in {'.git', '.github', '.hub', '.archive'} for part in skill_md.parts): if is_excluded_skill_path(skill_md):
continue continue
slug, declared_name = _skill_slug_from_frontmatter(skill_md) slug, declared_name = _skill_slug_from_frontmatter(skill_md)
if not slug or not declared_name: if not slug or not declared_name:
@@ -1136,6 +1136,8 @@ def _check_unavailable_skill(command_name: str) -> str | None:
optional_dir = get_optional_skills_dir(repo_root / "optional-skills") optional_dir = get_optional_skills_dir(repo_root / "optional-skills")
if optional_dir.exists(): if optional_dir.exists():
for skill_md in optional_dir.rglob("SKILL.md"): for skill_md in optional_dir.rglob("SKILL.md"):
if is_excluded_skill_path(skill_md):
continue
slug, _declared = _skill_slug_from_frontmatter(skill_md) slug, _declared = _skill_slug_from_frontmatter(skill_md)
if not slug: if not slug:
continue continue

View File

@@ -16,6 +16,7 @@ from pathlib import Path
from hermes_cli.config import get_hermes_home, get_env_path, get_project_root, load_config from hermes_cli.config import get_hermes_home, get_env_path, get_project_root, load_config
from hermes_cli.env_loader import load_hermes_dotenv from hermes_cli.env_loader import load_hermes_dotenv
from hermes_constants import display_hermes_home from hermes_constants import display_hermes_home
from agent.skill_utils import is_excluded_skill_path
def _get_git_commit(project_root: Path) -> str: def _get_git_commit(project_root: Path) -> str:
@@ -69,6 +70,8 @@ def _count_skills(hermes_home: Path) -> int:
return 0 return 0
count = 0 count = 0
for item in skills_dir.rglob("SKILL.md"): for item in skills_dir.rglob("SKILL.md"):
if is_excluded_skill_path(item):
continue
count += 1 count += 1
return count return count

View File

@@ -35,6 +35,7 @@ from pathlib import Path
from typing import Optional from typing import Optional
from hermes_cli import profiles as profiles_mod from hermes_cli import profiles as profiles_mod
from agent.skill_utils import is_excluded_skill_path
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -109,8 +110,7 @@ def _collect_skills(profile_dir: Path) -> list[str]:
return [] return []
names: list[str] = [] names: list[str] = []
for md in skills_dir.rglob("SKILL.md"): for md in skills_dir.rglob("SKILL.md"):
path_str = str(md) if is_excluded_skill_path(md):
if "/.hub/" in path_str or "/.git/" in path_str:
continue continue
try: try:
rel = md.relative_to(skills_dir) rel = md.relative_to(skills_dir)
@@ -201,7 +201,7 @@ def describe_profile(
skill_list = "\n".join(f" - {n}" for n in skill_names) or " (no skills installed)" skill_list = "\n".join(f" - {n}" for n in skill_names) or " (no skills installed)"
skill_count = sum( skill_count = sum(
1 for _ in (profile_dir / "skills").rglob("SKILL.md") 1 for _ in (profile_dir / "skills").rglob("SKILL.md")
if "/.hub/" not in str(_) and "/.git/" not in str(_) if not is_excluded_skill_path(_)
) if (profile_dir / "skills").is_dir() else 0 ) if (profile_dir / "skills").is_dir() else 0
# Read model + provider from the profile's config. # Read model + provider from the profile's config.

View File

@@ -70,6 +70,8 @@ from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple from typing import Any, Dict, List, Optional, Tuple
from agent.skill_utils import is_excluded_skill_path
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Constants # Constants
@@ -463,7 +465,9 @@ def _count_skills(staged: Path) -> int:
skills_dir = staged / "skills" skills_dir = staged / "skills"
if not skills_dir.is_dir(): if not skills_dir.is_dir():
return 0 return 0
return sum(1 for _ in skills_dir.rglob("SKILL.md")) return sum(
1 for p in skills_dir.rglob("SKILL.md") if not is_excluded_skill_path(p)
)
def plan_install( def plan_install(

View File

@@ -30,6 +30,8 @@ from dataclasses import dataclass
from pathlib import Path, PurePosixPath, PureWindowsPath from pathlib import Path, PurePosixPath, PureWindowsPath
from typing import List, Optional from typing import List, Optional
from agent.skill_utils import is_excluded_skill_path
_PROFILE_ID_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$") _PROFILE_ID_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$")
# Directories bootstrapped inside every new profile # Directories bootstrapped inside every new profile
@@ -485,8 +487,9 @@ def _count_skills(profile_dir: Path) -> int:
return 0 return 0
count = 0 count = 0
for md in skills_dir.rglob("SKILL.md"): for md in skills_dir.rglob("SKILL.md"):
if "/.hub/" not in str(md) and "/.git/" not in str(md): if is_excluded_skill_path(md):
count += 1 continue
count += 1
return count return count

View File

@@ -23,6 +23,7 @@ from rich.table import Table
# Lazy imports to avoid circular dependencies and slow startup. # Lazy imports to avoid circular dependencies and slow startup.
# tools.skills_hub and tools.skills_guard are imported inside functions. # tools.skills_hub and tools.skills_guard are imported inside functions.
from hermes_constants import display_hermes_home from hermes_constants import display_hermes_home
from agent.skill_utils import is_excluded_skill_path
_console = Console() _console = Console()
@@ -178,9 +179,12 @@ def _existing_categories() -> List[str]:
# top level (no category); otherwise treat as a category bucket. # top level (no category); otherwise treat as a category bucket.
if (entry / "SKILL.md").exists(): if (entry / "SKILL.md").exists():
continue continue
# Has at least one nested SKILL.md? # Has at least one nested SKILL.md (excluding dependency/cache dirs)?
try: try:
if any(entry.rglob("SKILL.md")): if any(
not is_excluded_skill_path(p)
for p in entry.rglob("SKILL.md")
):
out.append(entry.name) out.append(entry.name)
except OSError: except OSError:
continue continue

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 MiB

View File

@@ -1,6 +1,6 @@
"""Tests for agent/skill_utils.py — extract_skill_conditions metadata handling.""" """Tests for agent/skill_utils.py."""
from agent.skill_utils import extract_skill_conditions from agent.skill_utils import extract_skill_conditions, iter_skill_index_files
def test_metadata_as_dict_with_hermes(): def test_metadata_as_dict_with_hermes():
@@ -56,3 +56,41 @@ def test_metadata_missing_entirely():
"fallback_for_tools": [], "fallback_for_tools": [],
"requires_tools": [], "requires_tools": [],
} }
def test_iter_skill_index_files_prunes_dependency_dirs(tmp_path):
real = tmp_path / "real-skill"
real.mkdir()
(real / "SKILL.md").write_text("---\nname: real-skill\n---\n", encoding="utf-8")
nested = (
tmp_path
/ "bring"
/ "scripts"
/ ".venv"
/ "lib"
/ "python3.13"
/ "site-packages"
/ "typer"
/ ".agents"
/ "skills"
/ "typer"
)
nested.mkdir(parents=True)
(nested / "SKILL.md").write_text("---\nname: typer\n---\n", encoding="utf-8")
node_module = (
tmp_path
/ "web-skill"
/ "node_modules"
/ "dep"
/ ".agents"
/ "skills"
/ "dep"
)
node_module.mkdir(parents=True)
(node_module / "SKILL.md").write_text("---\nname: dep\n---\n", encoding="utf-8")
found = list(iter_skill_index_files(tmp_path, "SKILL.md"))
assert found == [real / "SKILL.md"]

View File

@@ -267,6 +267,32 @@ class TestFindAllSkills:
assert len(skills) == 1 assert len(skills) == 1
assert skills[0]["name"] == "real-skill" assert skills[0]["name"] == "real-skill"
def test_skips_nested_virtualenv_dependency_skills(self, tmp_path):
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
_make_skill(tmp_path, "real-skill")
typer_skill = (
tmp_path
/ "bring"
/ "scripts"
/ ".venv"
/ "lib"
/ "python3.13"
/ "site-packages"
/ "typer"
/ ".agents"
/ "skills"
/ "typer"
)
typer_skill.mkdir(parents=True)
(typer_skill / "SKILL.md").write_text(
"---\nname: typer\ndescription: Should not be discovered.\n---\n",
encoding="utf-8",
)
skills = _find_all_skills()
assert [skill["name"] for skill in skills] == ["real-skill"]
def test_finds_skills_in_symlinked_category_dir(self, tmp_path): def test_finds_skills_in_symlinked_category_dir(self, tmp_path):
external_root = tmp_path / "repo" external_root = tmp_path / "repo"
skills_root = tmp_path / "skills" skills_root = tmp_path / "skills"

View File

@@ -283,12 +283,12 @@ def _find_skill(name: str) -> Optional[Dict[str, Any]]:
external dirs configured via skills.external_dirs. Returns external dirs configured via skills.external_dirs. Returns
{"path": Path} or None. {"path": Path} or None.
""" """
from agent.skill_utils import EXCLUDED_SKILL_DIRS, get_all_skills_dirs from agent.skill_utils import get_all_skills_dirs, is_excluded_skill_path
for skills_dir in get_all_skills_dirs(): for skills_dir in get_all_skills_dirs():
if not skills_dir.exists(): if not skills_dir.exists():
continue continue
for skill_md in skills_dir.rglob("SKILL.md"): for skill_md in skills_dir.rglob("SKILL.md"):
if any(part in EXCLUDED_SKILL_DIRS for part in skill_md.parts): if is_excluded_skill_path(skill_md):
continue continue
if skill_md.parent.name == name: if skill_md.parent.name == name:
return {"path": skill_md.parent} return {"path": skill_md.parent}

View File

@@ -34,6 +34,7 @@ from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
from hermes_constants import get_hermes_home from hermes_constants import get_hermes_home
from agent.skill_utils import is_excluded_skill_path
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -236,14 +237,13 @@ def list_agent_created_skill_names() -> List[str]:
names: List[str] = [] names: List[str] = []
# Top-level SKILL.md files (flat layout) AND nested category/skill/SKILL.md # Top-level SKILL.md files (flat layout) AND nested category/skill/SKILL.md
for skill_md in base.rglob("SKILL.md"): for skill_md in base.rglob("SKILL.md"):
# Skip anything under .archive or .hub # Skip Hermes metadata, VCS, virtualenv/dependency, and cache dirs
if is_excluded_skill_path(skill_md):
continue
try: try:
rel = skill_md.relative_to(base) rel = skill_md.relative_to(base)
except ValueError: except ValueError:
continue continue
parts = rel.parts
if parts and (parts[0].startswith(".") or parts[0] == "node_modules"):
continue
name = _read_skill_name(skill_md, fallback=skill_md.parent.name) name = _read_skill_name(skill_md, fallback=skill_md.parent.name)
if name in off_limits: if name in off_limits:
continue continue
@@ -577,11 +577,7 @@ def _find_skill_dir(skill_name: str) -> Optional[Path]:
if not base.exists(): if not base.exists():
return None return None
for skill_md in base.rglob("SKILL.md"): for skill_md in base.rglob("SKILL.md"):
try: if is_excluded_skill_path(skill_md):
rel = skill_md.relative_to(base)
except ValueError:
continue
if rel.parts and rel.parts[0].startswith("."):
continue continue
if _read_skill_name(skill_md, fallback=skill_md.parent.name) == skill_name: if _read_skill_name(skill_md, fallback=skill_md.parent.name) == skill_name:
return skill_md.parent return skill_md.parent

View File

@@ -26,6 +26,7 @@ from dataclasses import dataclass, field
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path, PurePosixPath from pathlib import Path, PurePosixPath
from hermes_constants import get_hermes_home from hermes_constants import get_hermes_home
from agent.skill_utils import is_excluded_skill_path
from typing import Any, Dict, List, Optional, Tuple, Union from typing import Any, Dict, List, Optional, Tuple, Union
from urllib.parse import urljoin, urlparse, urlunparse from urllib.parse import urljoin, urlparse, urlunparse
@@ -2639,6 +2640,8 @@ class OptionalSkillSource(SkillSource):
if not self._optional_dir.is_dir(): if not self._optional_dir.is_dir():
return None return None
for skill_md in self._optional_dir.rglob("SKILL.md"): for skill_md in self._optional_dir.rglob("SKILL.md"):
if is_excluded_skill_path(skill_md):
continue
if skill_md.parent.name == name: if skill_md.parent.name == name:
return skill_md.parent return skill_md.parent
return None return None
@@ -2650,10 +2653,9 @@ class OptionalSkillSource(SkillSource):
results: List[SkillMeta] = [] results: List[SkillMeta] = []
for skill_md in sorted(self._optional_dir.rglob("SKILL.md")): for skill_md in sorted(self._optional_dir.rglob("SKILL.md")):
parent = skill_md.parent if is_excluded_skill_path(skill_md):
rel_parts = parent.relative_to(self._optional_dir).parts
if any(part.startswith(".") for part in rel_parts):
continue continue
parent = skill_md.parent
try: try:
content = skill_md.read_text(encoding="utf-8") content = skill_md.read_text(encoding="utf-8")

View File

@@ -27,6 +27,7 @@ import os
import shutil import shutil
from pathlib import Path from pathlib import Path
from hermes_constants import get_bundled_skills_dir, get_hermes_home from hermes_constants import get_bundled_skills_dir, get_hermes_home
from agent.skill_utils import is_excluded_skill_path
from typing import Dict, List, Tuple from typing import Dict, List, Tuple
from utils import atomic_replace from utils import atomic_replace
@@ -139,8 +140,7 @@ def _discover_bundled_skills(bundled_dir: Path) -> List[Tuple[str, Path]]:
return skills return skills
for skill_md in bundled_dir.rglob("SKILL.md"): for skill_md in bundled_dir.rglob("SKILL.md"):
path_str = str(skill_md) if is_excluded_skill_path(skill_md):
if "/.git/" in path_str or "/.github/" in path_str or "/.hub/" in path_str:
continue continue
skill_dir = skill_md.parent skill_dir = skill_md.parent
skill_name = _read_skill_name(skill_md, skill_dir.name) skill_name = _read_skill_name(skill_md, skill_dir.name)

View File

@@ -79,6 +79,7 @@ from typing import Dict, Any, List, Optional, Set, Tuple
from tools.registry import registry, tool_error from tools.registry import registry, tool_error
from hermes_cli.config import cfg_get from hermes_cli.config import cfg_get
from utils import env_var_enabled from utils import env_var_enabled
from agent.skill_utils import EXCLUDED_SKILL_DIRS as _EXCLUDED_SKILL_DIRS
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -101,7 +102,6 @@ _PLATFORM_MAP = {
"windows": "win32", "windows": "win32",
} }
_ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
_EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive"))
_REMOTE_ENV_BACKENDS = frozenset( _REMOTE_ENV_BACKENDS = frozenset(
{"docker", "singularity", "modal", "ssh", "daytona", "vercel_sandbox"} {"docker", "singularity", "modal", "ssh", "daytona", "vercel_sandbox"}
) )
@@ -1565,4 +1565,3 @@ registry.register(
check_fn=check_skills_requirements, check_fn=check_skills_requirements,
emoji="📚", emoji="📚",
) )