fix(approval): pin 'silence is not consent' contract on timeout/deny (#24912) (#30879)

User incident (Slack, 2026-05-13): user walked away mid-conversation,
agent requested approval to run `rm -rf .git`, the prompt timed out
after the gateway_timeout (default 300s), and the agent removed the
.git folder on its own. Corroborated by an independent report from a
Telegram user.

The underlying code path was correct — `check_all_command_guards`
returns `approved=False` with a BLOCKED message on both timeout and
explicit deny, and `terminal_tool` surfaces that as `status=blocked`
to the agent. The bug is at the model-interface layer: the message
"BLOCKED: Command timed out. Do NOT retry this command." reads to
some models as "try a different command achieving the same outcome."

This commit changes only the model-facing message + the structured
return shape:

  - Timeout message now explicitly names the three evasion paths the
    agent must avoid: retry, rephrase, AND achieve the same outcome
    via a different command. Ends with "Silence is not consent."
  - Explicit deny gets the same shape minus the silence-is-not-consent
    line (it WAS an explicit deny, not silence).
  - New structured fields on the return dict: `outcome` ("timeout"
    or "denied") and `user_consent` (always False on this branch)
    so plugins, hooks, and audit pipelines don't have to string-parse
    the message to distinguish the two cases.

The mechanism that should already have prevented the original incident
— timeout treated as deny, BLOCKED result, post hook fires with
`choice="timeout"` — is unchanged. This commit hardens only the
agent's reading of the result.

Tests:
  - test_timeout_returns_approved_false_with_no_consent — pins the
    return shape on the Slack-shaped notify_cb-registered path
  - test_timeout_message_is_emphatic_against_retry_and_rephrase —
    pins the exact phrases the message must contain
  - test_explicit_deny_carries_same_no_consent_shape — same contract
    on explicit /deny
  - test_timeout_emits_post_hook_with_timeout_outcome — pins the
    post_approval_response hook payload so audit plugins can act

329 approval tests passing (4 new + 325 existing).

Fixes #24912
This commit is contained in:
Teknium
2026-05-23 02:59:13 -07:00
committed by GitHub
parent 6855d17753
commit 7f1b2b4569
2 changed files with 199 additions and 3 deletions

View File

@@ -1,6 +1,9 @@
"""Tests for the dangerous command approval module.""" """Tests for the dangerous command approval module."""
import ast import ast
import os
import threading
import time
from pathlib import Path from pathlib import Path
from types import SimpleNamespace from types import SimpleNamespace
from unittest.mock import patch as mock_patch from unittest.mock import patch as mock_patch
@@ -1305,3 +1308,165 @@ class TestEtcPatternsUnaffectedByRefactor:
def test_grep_etc_passwd_is_safe(self): def test_grep_etc_passwd_is_safe(self):
dangerous, _, _ = detect_dangerous_command("grep root /etc/passwd") dangerous, _, _ = detect_dangerous_command("grep root /etc/passwd")
assert dangerous is False assert dangerous is False
# =========================================================================
# Gateway approval timeout = deny, NOT consent (#24912)
#
# A Slack user walked away mid-conversation; the agent requested approval
# to run `rm -rf .git`; the prompt timed out; the agent ran the command
# anyway. Reported by @tofalck on 2026-05-13, corroborated by
# @angry-programmer on Telegram. Silence is not consent.
#
# These tests pin:
# 1. Gateway timeout → approved=False, with a message strong enough that
# a downstream agent reading "BLOCKED: ... Silence is not consent."
# treats it as a hard halt, not an invitation to rephrase.
# 2. The structured outcome / user_consent fields are present so
# plugins, hooks, and audit pipelines can act on the timeout without
# string-parsing the message.
# 3. Explicit /deny carries the same shape (treat-as-not-consented).
# =========================================================================
class TestApprovalTimeoutIsNotConsent:
"""The gateway approval contract: silence is not consent (#24912)."""
SESSION_KEY = "test-no-consent-session"
def setup_method(self):
"""Reset module state and force tight gateway_timeout for fast tests."""
from tools import approval as mod
mod._gateway_queues.clear()
mod._gateway_notify_cbs.clear()
mod._session_approved.clear()
mod._permanent_approved.clear()
mod._pending.clear()
self._saved_env = {
k: os.environ.get(k)
for k in ("HERMES_GATEWAY_SESSION", "HERMES_YOLO_MODE",
"HERMES_SESSION_KEY", "HERMES_INTERACTIVE")
}
os.environ.pop("HERMES_YOLO_MODE", None)
os.environ.pop("HERMES_INTERACTIVE", None)
os.environ["HERMES_GATEWAY_SESSION"] = "1"
os.environ["HERMES_SESSION_KEY"] = self.SESSION_KEY
def teardown_method(self):
from tools import approval as mod
mod._gateway_queues.clear()
mod._gateway_notify_cbs.clear()
for k, v in self._saved_env.items():
if v is None:
os.environ.pop(k, None)
else:
os.environ[k] = v
def _force_short_timeout(self, monkeypatch, seconds=1):
from tools import approval as mod
monkeypatch.setattr(
mod, "_get_approval_config",
lambda: {"mode": "manual", "gateway_timeout": seconds, "timeout": seconds},
)
def test_timeout_returns_approved_false_with_no_consent(self, monkeypatch):
"""The reported #24912 scenario — user never responds, agent must see BLOCKED."""
from tools import approval as mod
self._force_short_timeout(monkeypatch, seconds=1)
# Slack-shaped: notify_cb registered, but user doesn't respond.
notified = []
mod.register_gateway_notify(self.SESSION_KEY, lambda data: notified.append(data))
result = mod.check_all_command_guards("rm -rf .git", "local")
assert result["approved"] is False
assert result.get("user_consent") is False
assert result.get("outcome") == "timeout"
# The notify_cb DID fire — we did try to ask the user.
assert len(notified) == 1
def test_timeout_message_is_emphatic_against_retry_and_rephrase(self, monkeypatch):
"""The BLOCKED message must explicitly tell the agent not to rephrase.
Without this, the agent treats 'Do NOT retry this command' as
permission to try a different command achieving the same outcome.
"""
from tools import approval as mod
self._force_short_timeout(monkeypatch, seconds=1)
mod.register_gateway_notify(self.SESSION_KEY, lambda data: None)
result = mod.check_all_command_guards("rm -rf .git", "local")
msg = result["message"]
# Explicit halt signals — these are the model-facing contract.
assert "BLOCKED" in msg
assert "NOT consented" in msg
assert "Silence is not consent" in msg
# Both forms of evasion must be named:
assert "do NOT retry" in msg.lower() or "Do NOT retry" in msg
assert "rephrase" in msg.lower()
assert "different command" in msg.lower()
def test_explicit_deny_carries_same_no_consent_shape(self):
"""An explicit /deny must produce the same shape as timeout —
the agent should treat both identically."""
from tools import approval as mod
notified = []
mod.register_gateway_notify(self.SESSION_KEY, lambda data: notified.append(data))
# Spawn the approval wait in a thread, then resolve it with "deny".
result_holder = {}
def _check():
result_holder["r"] = mod.check_all_command_guards("rm -rf .git", "local")
t = threading.Thread(target=_check)
t.start()
# Wait for the queue entry to appear, then resolve.
for _ in range(50):
if mod._gateway_queues.get(self.SESSION_KEY):
break
time.sleep(0.02)
mod.resolve_gateway_approval(self.SESSION_KEY, "deny")
t.join(timeout=5)
assert "r" in result_holder, "approval wait did not return after deny"
r = result_holder["r"]
assert r["approved"] is False
assert r.get("user_consent") is False
assert r.get("outcome") == "denied"
assert "Silence is not consent" not in r["message"] # this one IS denied, not timed-out
assert "NOT consented" in r["message"]
assert "rephrase" in r["message"].lower()
def test_timeout_emits_post_hook_with_timeout_outcome(self, monkeypatch):
"""Plugins must be able to distinguish timeout from explicit deny.
This is what an audit / notification plugin needs to alert
operators on 'agent asked, user never replied' incidents like #24912.
"""
from tools import approval as mod
self._force_short_timeout(monkeypatch, seconds=1)
mod.register_gateway_notify(self.SESSION_KEY, lambda data: None)
hook_calls = []
original_fire = mod._fire_approval_hook
def _capture(event_name, **kwargs):
hook_calls.append((event_name, kwargs))
return original_fire(event_name, **kwargs)
monkeypatch.setattr(mod, "_fire_approval_hook", _capture)
mod.check_all_command_guards("rm -rf .git", "local")
# post_approval_response must be in the hook log with choice=timeout
posts = [c for c in hook_calls if c[0] == "post_approval_response"]
assert posts, "post_approval_response hook did not fire"
last_post = posts[-1][1]
assert last_post.get("choice") == "timeout", (
f"hook choice should be 'timeout' on no-response, got {last_post.get('choice')!r}"
)

View File

@@ -1299,12 +1299,34 @@ def check_all_command_guards(command: str, env_type: str,
) )
if not resolved or choice is None or choice == "deny": if not resolved or choice is None or choice == "deny":
reason = "timed out" if not resolved else "denied by user" # Consent contract: silence is NOT consent, and an explicit
# deny is also a hard halt — both produce a BLOCKED outcome
# that names the agent's most common evasion paths (retry,
# rephrase, achieve the same outcome via a different command).
# See issue #24912 for the original incident.
if not resolved:
reason = "timed out without user response"
timeout_addendum = " Silence is not consent."
outcome = "timeout"
else:
reason = "denied by user"
timeout_addendum = ""
outcome = "denied"
return { return {
"approved": False, "approved": False,
"message": f"BLOCKED: Command {reason}. Do NOT retry this command.", "message": (
f"BLOCKED: Command {reason}. The user has NOT consented "
f"to this action. Do NOT retry this command, do NOT "
f"rephrase it, and do NOT attempt the same outcome via "
f"a different command. Stop the current workflow and "
f"wait for the user to respond before taking any "
f"further destructive or irreversible action."
f"{timeout_addendum}"
),
"pattern_key": primary_key, "pattern_key": primary_key,
"description": combined_desc, "description": combined_desc,
"outcome": outcome,
"user_consent": False,
} }
# User approved — persist based on scope (same logic as CLI) # User approved — persist based on scope (same logic as CLI)
@@ -1369,9 +1391,18 @@ def check_all_command_guards(command: str, env_type: str,
if choice == "deny": if choice == "deny":
return { return {
"approved": False, "approved": False,
"message": "BLOCKED: User denied. Do NOT retry.", "message": (
"BLOCKED: User denied this command. The user has NOT consented "
"to this action. Do NOT retry this command, do NOT rephrase "
"it, and do NOT attempt the same outcome via a different "
"command. Stop the current workflow and wait for the user "
"to respond before taking any further destructive or "
"irreversible action."
),
"pattern_key": primary_key, "pattern_key": primary_key,
"description": combined_desc, "description": combined_desc,
"outcome": "denied",
"user_consent": False,
} }
# Persist approval for each warning individually # Persist approval for each warning individually