feat(skills): add opt-in AST deep diagnostics

Add opt-in AST diagnostics for skill review without making Skills Guard stricter by default. - Add hermes skills inspect --ast-deep to scan fetched skill bundles before installation - Add hermes skills audit --deep to scan already-installed hub skills - Keep AST analysis in tools/skills_ast_audit.py, separate from tools/skills_guard.py - Label output as diagnostic hints, not security verdicts - Cover dynamic import/access patterns: importlib, __import__(computed), getattr(computed), and __dict__[computed] This follows the maintainer guidance from closed PR #7436: useful AST-level analysis belongs in an opt-in diagnostic path, not in Skills Guard's default heuristic scan.
2026-05-23 10:06:51 +00:00
parent 86871ee25a
commit 7255050c99
5 changed files with 704 additions and 12 deletions
--- a/tools/skills_ast_audit.py
+++ b/tools/skills_ast_audit.py
@@ -0,0 +1,353 @@
+"""
+AST-level deep audit for skill Python files — opt-in diagnostic, not a security gate.
+
+This is a standalone diagnostic tool per SECURITY.md spirit: it helps operators
+inspect skill code for patterns that *could* enable dynamic import/access
+obfuscation, but it is NOT a security boundary. Every pattern flagged here has
+legitimate uses. Use your judgment.
+
+Usage::
+
+    from tools.skills_ast_audit import ast_scan_skill, format_ast_report
+
+    findings = ast_scan_skill(Path("~/.hermes/skills/some-skill"))
+    if findings:
+        print(format_ast_report(findings))
+
+CLI integration: ``hermes skills audit --deep``
+"""
+
+from __future__ import annotations
+
+import ast
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Mapping, List, Optional, Union
+
+
+# ---------------------------------------------------------------------------
+# Data model
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class AstFinding:
+    """A single finding from AST-level analysis."""
+
+    pattern_id: str
+    """Short identifier for deduplication and grouping (e.g. 'ast_importlib_import')."""
+
+    severity: str
+    """One of 'high', 'medium', 'low' — for display only, not a security claim."""
+
+    category: str
+    """Grouping label — currently always 'obfuscation'."""
+
+    file: str
+    """Relative path to the file containing the finding."""
+
+    line: int
+    """1-based line number."""
+
+    match: str
+    """The matched source construct (human-readable snippet)."""
+
+    description: str
+    """Why this pattern is worth reviewing."""
+
+
+# ---------------------------------------------------------------------------
+# Scanner
+# ---------------------------------------------------------------------------
+
+def _ast_scan_python(content: str, rel_path: str) -> List[AstFinding]:
+    """Detect obfuscation via dynamic imports, attribute access, and string construction.
+
+    Hostile or pathological input (deeply-nested expressions, malformed source)
+    must not crash the scan. Both ``ast.parse`` and the visitor traversal are
+    guarded so parse/visit failures degrade gracefully to "no AST findings"
+    rather than raising.
+    """
+    try:
+        tree = ast.parse(content)
+    except (SyntaxError, ValueError, RecursionError):
+        return []
+
+    findings: List[AstFinding] = []
+
+    class _Visitor(ast.NodeVisitor):
+        def visit_Call(self, node):
+            # Detect importlib.import_module(...)
+            if (
+                isinstance(node.func, ast.Attribute)
+                and node.func.attr == "import_module"
+            ):
+                findings.append(
+                    AstFinding(
+                        pattern_id="ast_dynamic_import",
+                        severity="high",
+                        category="obfuscation",
+                        file=rel_path,
+                        line=node.lineno,
+                        match="importlib.import_module()",
+                        description="dynamic import via importlib — can load arbitrary modules at runtime",
+                    )
+                )
+            # Detect __import__ with non-literal argument
+            if isinstance(node.func, ast.Name) and node.func.id == "__import__":
+                if node.args and not isinstance(node.args[0], ast.Constant):
+                    findings.append(
+                        AstFinding(
+                            pattern_id="ast_dynamic_import_computed",
+                            severity="high",
+                            category="obfuscation",
+                            file=rel_path,
+                            line=node.lineno,
+                            match="__import__(<computed>)",
+                            description="__import__ with dynamically constructed module name",
+                        )
+                    )
+            # Detect getattr with computed attribute name
+            if isinstance(node.func, ast.Name) and node.func.id == "getattr":
+                if len(node.args) >= 2 and not isinstance(
+                    node.args[1], ast.Constant
+                ):
+                    findings.append(
+                        AstFinding(
+                            pattern_id="ast_dynamic_getattr",
+                            severity="medium",
+                            category="obfuscation",
+                            file=rel_path,
+                            line=node.lineno,
+                            match="getattr(<obj>, <computed>)",
+                            description="getattr with dynamically constructed attribute name",
+                        )
+                    )
+            self.generic_visit(node)
+
+        def visit_Subscript(self, node):
+            # Detect obj.__dict__[<computed>]
+            if (
+                isinstance(node.value, ast.Attribute)
+                and node.value.attr == "__dict__"
+            ):
+                if not isinstance(node.slice, ast.Constant):
+                    findings.append(
+                        AstFinding(
+                            pattern_id="ast_dict_access",
+                            severity="high",
+                            category="obfuscation",
+                            file=rel_path,
+                            line=node.lineno,
+                            match="__dict__[<computed>]",
+                            description="dynamic attribute access via __dict__ with computed key",
+                        )
+                    )
+            self.generic_visit(node)
+
+        def visit_Import(self, node):
+            # Flag importlib and any importlib.* submodule.
+            for alias in node.names:
+                if alias.name == "importlib" or alias.name.startswith(
+                    "importlib."
+                ):
+                    findings.append(
+                        AstFinding(
+                            pattern_id="ast_importlib_import",
+                            severity="medium",
+                            category="obfuscation",
+                            file=rel_path,
+                            line=node.lineno,
+                            match=f"import {alias.name}",
+                            description="importlib imported — enables dynamic module loading",
+                        )
+                    )
+            self.generic_visit(node)
+
+        def visit_ImportFrom(self, node):
+            module = node.module or ""
+            if module == "importlib" or module.startswith("importlib."):
+                findings.append(
+                    AstFinding(
+                        pattern_id="ast_importlib_import",
+                        severity="medium",
+                        category="obfuscation",
+                        file=rel_path,
+                        line=node.lineno,
+                        match=f"from {module} import ...",
+                        description="importlib imported — enables dynamic module loading",
+                    )
+                )
+            self.generic_visit(node)
+
+    try:
+        _Visitor().visit(tree)
+    except (RecursionError, ValueError, RuntimeError):
+        # Visitor traversal can fail on hostile input even when ast.parse
+        # succeeded (e.g. deeply-nested call/attribute chains). Return
+        # whatever findings we collected before the failure.
+        return findings
+
+    return findings
+
+
+def ast_scan_file(file_path: Path, rel_path: Optional[str] = None) -> List[AstFinding]:
+    """Scan a single Python file and return AST-level findings.
+
+    Args:
+        file_path: Absolute path to the .py file.
+        rel_path: Relative path for display (defaults to file_path.name).
+
+    Returns:
+        List of :class:`AstFinding` — empty if the file isn't Python or scan yields nothing.
+    """
+    if file_path.suffix.lower() != ".py":
+        return []
+
+    if rel_path is None:
+        rel_path = file_path.name
+
+    try:
+        content = file_path.read_text(encoding="utf-8", errors="replace")
+    except (OSError, UnicodeDecodeError):
+        return []
+
+    return _ast_scan_python(content, rel_path)
+
+
+def ast_scan_skill(skill_path: Path) -> List[AstFinding]:
+    """Recursively scan all Python files in a skill directory.
+
+    Args:
+        skill_path: Path to the installed skill directory.
+
+    Returns:
+        Combined list of :class:`AstFinding` across all .py files.
+    """
+    if not skill_path.is_dir():
+        return []
+
+    all_findings: List[AstFinding] = []
+
+    for py_file in sorted(skill_path.rglob("*.py")):
+        # Skip __pycache__ and .venv/venv directories
+        parts = set(py_file.parent.parts)
+        if parts & {"__pycache__", ".venv", "venv", "node_modules"}:
+            continue
+        try:
+            rel = py_file.relative_to(skill_path).as_posix()
+        except ValueError:
+            rel = py_file.name
+        all_findings.extend(ast_scan_file(py_file, rel))
+
+    return all_findings
+
+
+def ast_scan_bundle_files(
+    files: Mapping[str, Union[str, bytes]],
+) -> List[AstFinding]:
+    """Scan Python files from an in-memory skill bundle.
+
+    This powers ``hermes skills inspect --ast-deep`` so operators can review
+    a skill before installing it. The input is the bundle's filename -> content
+    mapping, as returned by the skills hub source adapters.
+    """
+    all_findings: List[AstFinding] = []
+
+    for rel_path, content in sorted(files.items()):
+        path = Path(rel_path)
+        if path.suffix.lower() != ".py":
+            continue
+        if set(path.parts) & {"__pycache__", ".venv", "venv", "node_modules"}:
+            continue
+        if isinstance(content, bytes):
+            text = content.decode("utf-8", errors="replace")
+        else:
+            text = str(content)
+        all_findings.extend(_ast_scan_python(text, rel_path))
+
+    return all_findings
+
+
+# ---------------------------------------------------------------------------
+# Rich formatting
+# ---------------------------------------------------------------------------
+
+
+def format_ast_report(
+    findings: List[AstFinding],
+    skill_name: str = "",
+) -> str:
+    """Format AST findings as a Rich-markup string.
+
+    Args:
+        findings: List of findings from :func:`ast_scan_skill`.
+        skill_name: Optional skill name for the report header.
+
+    Returns:
+        Rich-markup string suitable for ``console.print()``.
+    """
+    if not findings:
+        header = (
+            f"[bold]AST Deep Scan: {skill_name}[/]"
+            if skill_name
+            else "[bold]AST Deep Scan[/]"
+        )
+        return f"{header}\n[dim green]No AST-level patterns detected.[/]"
+
+    lines: List[str] = []
+    severity_order = {"high": 0, "medium": 1, "low": 2}
+    findings_sorted = sorted(
+        findings,
+        key=lambda f: (
+            severity_order.get(f.severity, 99),
+            f.file,
+            f.line,
+        ),
+    )
+
+    if skill_name:
+        lines.append(f"[bold]AST Deep Scan: {skill_name}[/]")
+    else:
+        lines.append("[bold]AST Deep Scan[/]")
+
+    total = len(findings_sorted)
+    high_count = sum(1 for f in findings_sorted if f.severity == "high")
+    med_count = sum(1 for f in findings_sorted if f.severity == "medium")
+    low_count = sum(1 for f in findings_sorted if f.severity == "low")
+
+    summary_parts = []
+    if high_count:
+        summary_parts.append(f"[bold red]{high_count} high[/]")
+    if med_count:
+        summary_parts.append(f"[yellow]{med_count} medium[/]")
+    if low_count:
+        summary_parts.append(f"[dim]{low_count} low[/]")
+    lines.append(
+        f"[dim]{total} finding(s)[/] — "
+        + ", ".join(summary_parts)
+        if summary_parts
+        else f"[dim]{total} finding(s)[/]"
+    )
+    lines.append("")
+
+    current_file = None
+    for f in findings_sorted:
+        if f.file != current_file:
+            current_file = f.file
+            lines.append(f"  [bold cyan]{f.file}[/]")
+        sev_color = {"high": "bold red", "medium": "yellow", "low": "dim"}.get(
+            f.severity, "dim"
+        )
+        lines.append(
+            f"    L{f.line:>4} [{sev_color}]{f.severity:6}[/] {f.description}"
+        )
+        lines.append(f"          [dim]{f.match}[/]")
+
+    lines.append("")
+    lines.append(
+        "[dim]Note: AST findings are diagnostic hints, not security verdicts. "
+        "Review each pattern in context.[/]"
+    )
+
+    return "\n".join(lines)