fix(skills): prune dependency/venv dirs from all skill scanners (#30042)
* fix(skills): skip dependency dirs in skill scan * fix(skills): widen sibling rglob scanners to use shared exclusion set Follow-up to PR #29968. The contributor's PR widened EXCLUDED_SKILL_DIRS in the canonical walker (iter_skill_index_files), which fixes the user-visible discovery path. This commit sweeps the ~12 other rglob('SKILL.md') sites that did their own ad-hoc filtering — most only checked .git/.hub, some had no filter at all — so dependency dirs (.venv, node_modules, site-packages, etc.) cannot leak ghost skills through the secondary paths. Adds agent.skill_utils.is_excluded_skill_path(path) helper. Migrates all 13 sites to use it. Removes 3 hardcoded duplicate filter sets. Sites touched: agent/curator_backup.py - skill backup file count gateway/run.py - disabled-skill response (2 sites) hermes_cli/dump.py - skill count in env dump hermes_cli/profile_describer.py- profile description (2 sites) hermes_cli/profile_distribution.py - profile install count hermes_cli/profiles.py - profile skill count hermes_cli/skills_hub.py - category detection tools/skill_manager_tool.py - skill name lookup (already used set, now uses helper) tools/skill_usage.py - usage tracking + skill dir lookup (2 sites) tools/skills_hub.py - optional skills find + scan (2 sites) tools/skills_sync.py - bundled skills sync E2E verified with the exact reported shape (bring/scripts/.venv/.../typer/.agents/skills/typer/SKILL.md): no sibling site picks up the ghost skill, all five legit-skill counts still return 1. * chore(infographic): retro-pop-grid bento for PR #30042 skill-scanner sweep --------- Co-authored-by: helix4u <4317663+helix4u@users.noreply.github.com>
This commit is contained in:
@@ -50,6 +50,7 @@ from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -176,7 +177,9 @@ def get_keep() -> int:
|
||||
|
||||
def _count_skill_files(base: Path) -> int:
|
||||
try:
|
||||
return sum(1 for _ in base.rglob("SKILL.md"))
|
||||
return sum(
|
||||
1 for p in base.rglob("SKILL.md") if not is_excluded_skill_path(p)
|
||||
)
|
||||
except OSError:
|
||||
return 0
|
||||
|
||||
|
||||
@@ -24,7 +24,43 @@ PLATFORM_MAP = {
|
||||
"windows": "win32",
|
||||
}
|
||||
|
||||
EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive"))
|
||||
EXCLUDED_SKILL_DIRS = frozenset(
|
||||
(
|
||||
".git",
|
||||
".github",
|
||||
".hub",
|
||||
".archive",
|
||||
".venv",
|
||||
"venv",
|
||||
"node_modules",
|
||||
"site-packages",
|
||||
"__pycache__",
|
||||
".tox",
|
||||
".nox",
|
||||
".pytest_cache",
|
||||
".mypy_cache",
|
||||
".ruff_cache",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def is_excluded_skill_path(path) -> bool:
|
||||
"""True if any component of *path* is in EXCLUDED_SKILL_DIRS.
|
||||
|
||||
Use this on every SKILL.md path produced by ``rglob`` to prune
|
||||
dependency, virtualenv, VCS, and cache directories. Centralising the
|
||||
check here keeps every skill-scanning site in sync with the shared
|
||||
exclusion set.
|
||||
|
||||
Accepts a Path or string.
|
||||
"""
|
||||
try:
|
||||
parts = path.parts # Path
|
||||
except AttributeError:
|
||||
from pathlib import PurePath
|
||||
parts = PurePath(str(path)).parts
|
||||
return any(part in EXCLUDED_SKILL_DIRS for part in parts)
|
||||
|
||||
|
||||
# ── Lazy YAML loader ─────────────────────────────────────────────────────
|
||||
|
||||
@@ -478,7 +514,8 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
|
||||
def iter_skill_index_files(skills_dir: Path, filename: str):
|
||||
"""Walk skills_dir yielding sorted paths matching *filename*.
|
||||
|
||||
Excludes ``.git``, ``.github``, ``.hub``, ``.archive`` directories.
|
||||
Excludes Hermes metadata, VCS, virtualenv/dependency, and cache
|
||||
directories so dependencies cannot register nested skills.
|
||||
"""
|
||||
matches = []
|
||||
for root, dirs, files in os.walk(skills_dir, followlinks=True):
|
||||
|
||||
Reference in New Issue
Block a user