feat(skills): add opt-in AST deep diagnostics

Add opt-in AST diagnostics for skill review without making Skills Guard stricter by default.

- Add hermes skills inspect --ast-deep to scan fetched skill bundles before installation
- Add hermes skills audit --deep to scan already-installed hub skills
- Keep AST analysis in tools/skills_ast_audit.py, separate from tools/skills_guard.py
- Label output as diagnostic hints, not security verdicts
- Cover dynamic import/access patterns: importlib, __import__(computed), getattr(computed), and __dict__[computed]

This follows the maintainer guidance from closed PR #7436: useful AST-level analysis belongs in an opt-in diagnostic path, not in Skills Guard's default heuristic scan.
This commit is contained in:
Tranquil-Flow
2026-05-23 10:06:51 +00:00
committed by Teknium
parent 86871ee25a
commit 7255050c99
5 changed files with 704 additions and 12 deletions

353
tools/skills_ast_audit.py Normal file
View File

@@ -0,0 +1,353 @@
"""
AST-level deep audit for skill Python files — opt-in diagnostic, not a security gate.
This is a standalone diagnostic tool per SECURITY.md spirit: it helps operators
inspect skill code for patterns that *could* enable dynamic import/access
obfuscation, but it is NOT a security boundary. Every pattern flagged here has
legitimate uses. Use your judgment.
Usage::
from tools.skills_ast_audit import ast_scan_skill, format_ast_report
findings = ast_scan_skill(Path("~/.hermes/skills/some-skill"))
if findings:
print(format_ast_report(findings))
CLI integration: ``hermes skills audit --deep``
"""
from __future__ import annotations
import ast
from dataclasses import dataclass
from pathlib import Path
from typing import Mapping, List, Optional, Union
# ---------------------------------------------------------------------------
# Data model
# ---------------------------------------------------------------------------
@dataclass
class AstFinding:
"""A single finding from AST-level analysis."""
pattern_id: str
"""Short identifier for deduplication and grouping (e.g. 'ast_importlib_import')."""
severity: str
"""One of 'high', 'medium', 'low' — for display only, not a security claim."""
category: str
"""Grouping label — currently always 'obfuscation'."""
file: str
"""Relative path to the file containing the finding."""
line: int
"""1-based line number."""
match: str
"""The matched source construct (human-readable snippet)."""
description: str
"""Why this pattern is worth reviewing."""
# ---------------------------------------------------------------------------
# Scanner
# ---------------------------------------------------------------------------
def _ast_scan_python(content: str, rel_path: str) -> List[AstFinding]:
"""Detect obfuscation via dynamic imports, attribute access, and string construction.
Hostile or pathological input (deeply-nested expressions, malformed source)
must not crash the scan. Both ``ast.parse`` and the visitor traversal are
guarded so parse/visit failures degrade gracefully to "no AST findings"
rather than raising.
"""
try:
tree = ast.parse(content)
except (SyntaxError, ValueError, RecursionError):
return []
findings: List[AstFinding] = []
class _Visitor(ast.NodeVisitor):
def visit_Call(self, node):
# Detect importlib.import_module(...)
if (
isinstance(node.func, ast.Attribute)
and node.func.attr == "import_module"
):
findings.append(
AstFinding(
pattern_id="ast_dynamic_import",
severity="high",
category="obfuscation",
file=rel_path,
line=node.lineno,
match="importlib.import_module()",
description="dynamic import via importlib — can load arbitrary modules at runtime",
)
)
# Detect __import__ with non-literal argument
if isinstance(node.func, ast.Name) and node.func.id == "__import__":
if node.args and not isinstance(node.args[0], ast.Constant):
findings.append(
AstFinding(
pattern_id="ast_dynamic_import_computed",
severity="high",
category="obfuscation",
file=rel_path,
line=node.lineno,
match="__import__(<computed>)",
description="__import__ with dynamically constructed module name",
)
)
# Detect getattr with computed attribute name
if isinstance(node.func, ast.Name) and node.func.id == "getattr":
if len(node.args) >= 2 and not isinstance(
node.args[1], ast.Constant
):
findings.append(
AstFinding(
pattern_id="ast_dynamic_getattr",
severity="medium",
category="obfuscation",
file=rel_path,
line=node.lineno,
match="getattr(<obj>, <computed>)",
description="getattr with dynamically constructed attribute name",
)
)
self.generic_visit(node)
def visit_Subscript(self, node):
# Detect obj.__dict__[<computed>]
if (
isinstance(node.value, ast.Attribute)
and node.value.attr == "__dict__"
):
if not isinstance(node.slice, ast.Constant):
findings.append(
AstFinding(
pattern_id="ast_dict_access",
severity="high",
category="obfuscation",
file=rel_path,
line=node.lineno,
match="__dict__[<computed>]",
description="dynamic attribute access via __dict__ with computed key",
)
)
self.generic_visit(node)
def visit_Import(self, node):
# Flag importlib and any importlib.* submodule.
for alias in node.names:
if alias.name == "importlib" or alias.name.startswith(
"importlib."
):
findings.append(
AstFinding(
pattern_id="ast_importlib_import",
severity="medium",
category="obfuscation",
file=rel_path,
line=node.lineno,
match=f"import {alias.name}",
description="importlib imported — enables dynamic module loading",
)
)
self.generic_visit(node)
def visit_ImportFrom(self, node):
module = node.module or ""
if module == "importlib" or module.startswith("importlib."):
findings.append(
AstFinding(
pattern_id="ast_importlib_import",
severity="medium",
category="obfuscation",
file=rel_path,
line=node.lineno,
match=f"from {module} import ...",
description="importlib imported — enables dynamic module loading",
)
)
self.generic_visit(node)
try:
_Visitor().visit(tree)
except (RecursionError, ValueError, RuntimeError):
# Visitor traversal can fail on hostile input even when ast.parse
# succeeded (e.g. deeply-nested call/attribute chains). Return
# whatever findings we collected before the failure.
return findings
return findings
def ast_scan_file(file_path: Path, rel_path: Optional[str] = None) -> List[AstFinding]:
"""Scan a single Python file and return AST-level findings.
Args:
file_path: Absolute path to the .py file.
rel_path: Relative path for display (defaults to file_path.name).
Returns:
List of :class:`AstFinding` — empty if the file isn't Python or scan yields nothing.
"""
if file_path.suffix.lower() != ".py":
return []
if rel_path is None:
rel_path = file_path.name
try:
content = file_path.read_text(encoding="utf-8", errors="replace")
except (OSError, UnicodeDecodeError):
return []
return _ast_scan_python(content, rel_path)
def ast_scan_skill(skill_path: Path) -> List[AstFinding]:
"""Recursively scan all Python files in a skill directory.
Args:
skill_path: Path to the installed skill directory.
Returns:
Combined list of :class:`AstFinding` across all .py files.
"""
if not skill_path.is_dir():
return []
all_findings: List[AstFinding] = []
for py_file in sorted(skill_path.rglob("*.py")):
# Skip __pycache__ and .venv/venv directories
parts = set(py_file.parent.parts)
if parts & {"__pycache__", ".venv", "venv", "node_modules"}:
continue
try:
rel = py_file.relative_to(skill_path).as_posix()
except ValueError:
rel = py_file.name
all_findings.extend(ast_scan_file(py_file, rel))
return all_findings
def ast_scan_bundle_files(
files: Mapping[str, Union[str, bytes]],
) -> List[AstFinding]:
"""Scan Python files from an in-memory skill bundle.
This powers ``hermes skills inspect --ast-deep`` so operators can review
a skill before installing it. The input is the bundle's filename -> content
mapping, as returned by the skills hub source adapters.
"""
all_findings: List[AstFinding] = []
for rel_path, content in sorted(files.items()):
path = Path(rel_path)
if path.suffix.lower() != ".py":
continue
if set(path.parts) & {"__pycache__", ".venv", "venv", "node_modules"}:
continue
if isinstance(content, bytes):
text = content.decode("utf-8", errors="replace")
else:
text = str(content)
all_findings.extend(_ast_scan_python(text, rel_path))
return all_findings
# ---------------------------------------------------------------------------
# Rich formatting
# ---------------------------------------------------------------------------
def format_ast_report(
findings: List[AstFinding],
skill_name: str = "",
) -> str:
"""Format AST findings as a Rich-markup string.
Args:
findings: List of findings from :func:`ast_scan_skill`.
skill_name: Optional skill name for the report header.
Returns:
Rich-markup string suitable for ``console.print()``.
"""
if not findings:
header = (
f"[bold]AST Deep Scan: {skill_name}[/]"
if skill_name
else "[bold]AST Deep Scan[/]"
)
return f"{header}\n[dim green]No AST-level patterns detected.[/]"
lines: List[str] = []
severity_order = {"high": 0, "medium": 1, "low": 2}
findings_sorted = sorted(
findings,
key=lambda f: (
severity_order.get(f.severity, 99),
f.file,
f.line,
),
)
if skill_name:
lines.append(f"[bold]AST Deep Scan: {skill_name}[/]")
else:
lines.append("[bold]AST Deep Scan[/]")
total = len(findings_sorted)
high_count = sum(1 for f in findings_sorted if f.severity == "high")
med_count = sum(1 for f in findings_sorted if f.severity == "medium")
low_count = sum(1 for f in findings_sorted if f.severity == "low")
summary_parts = []
if high_count:
summary_parts.append(f"[bold red]{high_count} high[/]")
if med_count:
summary_parts.append(f"[yellow]{med_count} medium[/]")
if low_count:
summary_parts.append(f"[dim]{low_count} low[/]")
lines.append(
f"[dim]{total} finding(s)[/] — "
+ ", ".join(summary_parts)
if summary_parts
else f"[dim]{total} finding(s)[/]"
)
lines.append("")
current_file = None
for f in findings_sorted:
if f.file != current_file:
current_file = f.file
lines.append(f" [bold cyan]{f.file}[/]")
sev_color = {"high": "bold red", "medium": "yellow", "low": "dim"}.get(
f.severity, "dim"
)
lines.append(
f" L{f.line:>4} [{sev_color}]{f.severity:6}[/] {f.description}"
)
lines.append(f" [dim]{f.match}[/]")
lines.append("")
lines.append(
"[dim]Note: AST findings are diagnostic hints, not security verdicts. "
"Review each pattern in context.[/]"
)
return "\n".join(lines)