"""Tests for CodexAppServerSession — drive turns through a mock client. The session adapter has the most complex behavior of the three new modules: notification draining, server-request handling (approvals), interrupt, deadline timeouts. These tests pin all of that without spawning real codex. """ from __future__ import annotations import threading import time from typing import Any, Optional import pytest from agent.transports.codex_app_server_session import ( CodexAppServerSession, TurnResult, _ServerRequestRouting, _approval_choice_to_codex_decision, ) class FakeClient: """Stand-in for CodexAppServerClient that records calls and lets the test drive the notification / server-request streams synchronously.""" def __init__(self, *, codex_bin: str = "codex", codex_home=None) -> None: self.codex_bin = codex_bin self.codex_home = codex_home self.requests: list[tuple[str, dict]] = [] self.notifications_responses: list[dict] = [] self.responses: list[tuple[Any, dict]] = [] self.error_responses: list[tuple[Any, int, str]] = [] self._initialized = False self._closed = False self._notifications: list[dict] = [] self._server_requests: list[dict] = [] self._request_handler = None # Optional[Callable[[str, dict], dict]] # API matching CodexAppServerClient def initialize(self, **kwargs): self._initialized = True return {"userAgent": "fake/0.0.0", "codexHome": "/tmp", "platformOs": "linux", "platformFamily": "unix"} def request(self, method: str, params: Optional[dict] = None, timeout: float = 30.0): self.requests.append((method, params or {})) if self._request_handler is not None: return self._request_handler(method, params or {}) # Sensible defaults for protocol methods used by the session if method == "thread/start": return {"thread": {"id": "thread-fake-001"}, "activePermissionProfile": {"id": "workspace-write"}} if method == "turn/start": return {"turn": {"id": "turn-fake-001"}} if method == "turn/interrupt": return {} return {} def notify(self, method: str, params=None): pass def respond(self, request_id, result): self.responses.append((request_id, result)) def respond_error(self, request_id, code, message, data=None): self.error_responses.append((request_id, code, message)) def take_notification(self, timeout: float = 0.0): if self._notifications: return self._notifications.pop(0) # Honor a tiny sleep so the loop doesn't hot-spin; the real client # blocks on a queue. For tests we want determinism. if timeout > 0: time.sleep(min(timeout, 0.001)) return None def take_server_request(self, timeout: float = 0.0): if self._server_requests: return self._server_requests.pop(0) return None def close(self): self._closed = True # Test helpers def queue_notification(self, method: str, **params): self._notifications.append({"method": method, "params": params}) def queue_server_request(self, method: str, request_id: Any = "srv-1", **params): self._server_requests.append({"id": request_id, "method": method, "params": params}) def make_session(client: FakeClient, **kwargs) -> CodexAppServerSession: return CodexAppServerSession( cwd="/tmp", client_factory=lambda **kw: client, **kwargs, ) # ---- choice mapping ---- class TestApprovalChoiceMapping: @pytest.mark.parametrize("choice,expected", [ ("once", "accept"), ("session", "acceptForSession"), ("always", "acceptForSession"), ("deny", "decline"), ("anything-else", "decline"), ]) def test_mapping(self, choice, expected): assert _approval_choice_to_codex_decision(choice) == expected # ---- lifecycle ---- class TestLifecycle: def test_ensure_started_is_idempotent(self): client = FakeClient() s = make_session(client) tid_a = s.ensure_started() tid_b = s.ensure_started() assert tid_a == tid_b == "thread-fake-001" # thread/start should be called exactly once method_calls = [m for (m, _) in client.requests if m == "thread/start"] assert len(method_calls) == 1 def test_thread_start_passes_cwd_only(self): """thread/start carries cwd. We intentionally do NOT pass `permissions` on this codex version (experimentalApi-gated + requires matching config.toml [permissions] table). Letting codex use its default (read-only unless user configures otherwise) is the documented path.""" client = FakeClient() s = make_session(client, permission_profile="workspace-write") s.ensure_started() method, params = next(r for r in client.requests if r[0] == "thread/start") assert params["cwd"] == "/tmp" assert "permissions" not in params # see session.ensure_started() comment def test_close_idempotent(self): client = FakeClient() s = make_session(client) s.ensure_started() s.close() s.close() assert client._closed is True # ---- turn loop ---- class TestRunTurn: def test_simple_text_turn_returns_final_message(self): client = FakeClient() client.queue_notification("turn/started", threadId="t", turn={"id": "tu1"}) client.queue_notification( "item/completed", item={"type": "agentMessage", "id": "m1", "text": "hello world"}, threadId="t", turnId="tu1", ) client.queue_notification( "turn/completed", threadId="t", turn={"id": "tu1", "status": "completed", "error": None}, ) s = make_session(client) r = s.run_turn("hi", turn_timeout=2.0) assert r.final_text == "hello world" assert r.interrupted is False assert r.error is None assert any(m["role"] == "assistant" and m.get("content") == "hello world" for m in r.projected_messages) # turn_id propagated for downstream session-DB linkage assert r.turn_id == "turn-fake-001" def test_tool_iteration_counter_ticks(self): client = FakeClient() # Two completed exec items + one final agent message for i, item_id in enumerate(("ex1", "ex2"), start=1): client.queue_notification( "item/completed", item={ "type": "commandExecution", "id": item_id, "command": f"cmd{i}", "cwd": "/tmp", "status": "completed", "aggregatedOutput": "ok", "exitCode": 0, "commandActions": [], }, threadId="t", turnId="tu1", ) client.queue_notification( "item/completed", item={"type": "agentMessage", "id": "m1", "text": "done"}, threadId="t", turnId="tu1", ) client.queue_notification( "turn/completed", threadId="t", turn={"id": "tu1", "status": "completed", "error": None}, ) s = make_session(client) r = s.run_turn("do stuff", turn_timeout=2.0) assert r.tool_iterations == 2 # Each tool item produces (assistant, tool) — 2*2 + final assistant = 5 msgs assert len(r.projected_messages) == 5 def test_turn_start_failure_returns_error(self): client = FakeClient() from agent.transports.codex_app_server import CodexAppServerError def boom(method, params): if method == "turn/start": raise CodexAppServerError(code=-32600, message="bad input") return {"thread": {"id": "t"}, "activePermissionProfile": {"id": "x"}} client._request_handler = boom s = make_session(client) r = s.run_turn("hi", turn_timeout=2.0) assert r.error is not None assert "bad input" in r.error assert r.final_text == "" def test_interrupt_during_turn_issues_turn_interrupt(self): client = FakeClient() # Don't queue turn/completed — the loop has to interrupt out client.queue_notification( "item/completed", item={"type": "commandExecution", "id": "x", "command": "sleep 60", "cwd": "/", "status": "inProgress", "aggregatedOutput": None, "exitCode": None, "commandActions": []}, threadId="t", turnId="tu1", ) s = make_session(client) s.ensure_started() # Trip the interrupt before run_turn even consumes the notification. # The loop will see interrupt set on its first iteration and bail. s.request_interrupt() r = s.run_turn("loop forever", turn_timeout=2.0) assert r.interrupted is True # turn/interrupt was requested with the right turnId assert any( method == "turn/interrupt" and params.get("turnId") == "turn-fake-001" for (method, params) in client.requests ) def test_deadline_exceeded_records_error(self): client = FakeClient() # No notifications and no completion → must hit deadline s = make_session(client) r = s.run_turn("never finishes", turn_timeout=0.05, notification_poll_timeout=0.01) assert r.interrupted is True assert r.error and "timed out" in r.error def test_failed_turn_records_error_from_turn_completed(self): client = FakeClient() client.queue_notification( "turn/completed", threadId="t", turn={"id": "tu1", "status": "failed", "error": {"message": "model error"}}, ) s = make_session(client) r = s.run_turn("x", turn_timeout=1.0) assert r.error and "model error" in r.error # ---- approval bridge ---- class TestServerRequestRouting: def test_exec_approval_with_callback_approves_once(self): client = FakeClient() client.queue_server_request( "item/commandExecution/requestApproval", request_id="req-1", command="ls /tmp", cwd="/tmp", ) client.queue_notification( "turn/completed", threadId="t", turn={"id": "tu1", "status": "completed", "error": None}, ) captured: dict = {} def cb(command, description, *, allow_permanent=True): captured["command"] = command captured["description"] = description return "once" s = make_session(client, approval_callback=cb) s.run_turn("hi", turn_timeout=1.0) assert captured["command"] == "ls /tmp" # The session must have responded to the server request with "accept" assert ("req-1", {"decision": "accept"}) in client.responses def test_exec_approval_no_callback_denies(self): client = FakeClient() client.queue_server_request("item/commandExecution/requestApproval", request_id="req-1", command="rm -rf /", cwd="/") client.queue_notification( "turn/completed", threadId="t", turn={"id": "tu1", "status": "completed", "error": None}, ) s = make_session(client) # no approval_callback wired s.run_turn("hi", turn_timeout=1.0) assert ("req-1", {"decision": "decline"}) in client.responses def test_apply_patch_approval_session_maps_to_session_decision(self): client = FakeClient() client.queue_server_request( "item/fileChange/requestApproval", request_id="req-2", itemId="fc-1", turnId="t1", threadId="th", startedAtMs=1234567890, reason="create new file with hello() function", ) client.queue_notification( "turn/completed", threadId="t", turn={"id": "tu1", "status": "completed", "error": None}, ) def cb(command, description, *, allow_permanent=True): return "session" s = make_session(client, approval_callback=cb) s.run_turn("hi", turn_timeout=1.0) assert ("req-2", {"decision": "acceptForSession"}) in client.responses def test_unknown_server_request_replied_with_error(self): client = FakeClient() client.queue_server_request("totally/unknown", request_id="req-3") client.queue_notification( "turn/completed", threadId="t", turn={"id": "tu1", "status": "completed", "error": None}, ) s = make_session(client) s.run_turn("hi", turn_timeout=1.0) assert any( rid == "req-3" and code == -32601 for (rid, code, _msg) in client.error_responses ) def test_mcp_elicitation_for_hermes_tools_auto_accepts(self): """When codex elicits on behalf of hermes-tools (our own callback), accept automatically — the user already opted in by enabling the runtime.""" client = FakeClient() client.queue_server_request( "mcpServer/elicitation/request", request_id="elic-1", threadId="t", turnId="tu1", serverName="hermes-tools", mode="form", message="confirm", requestedSchema={"type": "object", "properties": {}}, ) client.queue_notification( "turn/completed", threadId="t", turn={"id": "tu1", "status": "completed", "error": None}, ) s = make_session(client) s.run_turn("hi", turn_timeout=1.0) assert ("elic-1", {"action": "accept", "content": None, "_meta": None}) in client.responses def test_mcp_elicitation_for_other_servers_declines(self): """For third-party MCP servers we decline by default so users explicitly opt in through codex's own UI.""" client = FakeClient() client.queue_server_request( "mcpServer/elicitation/request", request_id="elic-2", threadId="t", turnId="tu1", serverName="some-third-party", mode="url", message="please log in", url="https://example.com/oauth", ) client.queue_notification( "turn/completed", threadId="t", turn={"id": "tu1", "status": "completed", "error": None}, ) s = make_session(client) s.run_turn("hi", turn_timeout=1.0) assert ("elic-2", {"action": "decline", "content": None, "_meta": None}) in client.responses def test_routing_auto_approve_bypass(self): client = FakeClient() client.queue_server_request("item/commandExecution/requestApproval", request_id="r1", command="ls", cwd="/") client.queue_notification( "turn/completed", threadId="t", turn={"id": "tu1", "status": "completed", "error": None}, ) # No callback, but routing says auto-approve. Should approve. s = make_session(client, request_routing=_ServerRequestRouting( auto_approve_exec=True)) s.run_turn("hi", turn_timeout=1.0) assert ("r1", {"decision": "accept"}) in client.responses def test_callback_raises_falls_back_to_decline(self): client = FakeClient() client.queue_server_request("item/commandExecution/requestApproval", request_id="r1", command="ls", cwd="/") client.queue_notification( "turn/completed", threadId="t", turn={"id": "tu1", "status": "completed", "error": None}, ) def boom(*a, **kw): raise RuntimeError("ui crashed") s = make_session(client, approval_callback=boom) s.run_turn("hi", turn_timeout=1.0) # Fail-closed: deny on callback exception assert ("r1", {"decision": "decline"}) in client.responses # ---- enriched approval prompts ---- class TestApprovalPromptEnrichment: """Quirk #4: apply_patch prompt should show what's changing. Quirk #10: exec prompt should never show empty cwd.""" def test_exec_falls_back_to_session_cwd(self): """When codex omits cwd from the approval params, the prompt shows the session cwd, not an empty string.""" client = FakeClient() client.queue_server_request( "item/commandExecution/requestApproval", request_id="r1", command="ls", # no cwd ) client.queue_notification( "turn/completed", threadId="t", turn={"id": "tu1", "status": "completed", "error": None}, ) captured = {} def cb(command, description, *, allow_permanent=True): captured["description"] = description return "once" s = make_session(client, approval_callback=cb) s.run_turn("hi", turn_timeout=1.0) # Session cwd is /tmp by default in make_session() assert "/tmp" in captured["description"] assert "Codex requests exec in " not in captured["description"] def test_apply_patch_prompt_summarizes_pending_changes(self): """When the projector has cached the fileChange item from item/started, the approval prompt surfaces the change summary.""" client = FakeClient() # item/started fires first (carries the changes), then approval request client.queue_notification( "item/started", item={"type": "fileChange", "id": "fc-1", "changes": [ {"kind": {"type": "add"}, "path": "/tmp/new.py"}, {"kind": {"type": "update"}, "path": "/tmp/old.py"}, ]}, threadId="t", turnId="tu1", ) client.queue_server_request( "item/fileChange/requestApproval", request_id="req-2", itemId="fc-1", turnId="tu1", threadId="t", startedAtMs=1234567890, reason="add and update files", ) client.queue_notification( "turn/completed", threadId="t", turn={"id": "tu1", "status": "completed", "error": None}, ) captured = {} def cb(command, description, *, allow_permanent=True): captured["command"] = command captured["description"] = description return "once" s = make_session(client, approval_callback=cb) s.run_turn("hi", turn_timeout=1.0) # Both add and update kinds should be in the summary assert "1 add" in captured["command"] or "1 add" in captured["description"] assert "1 update" in captured["command"] or "1 update" in captured["description"] # And at least one of the paths joined = captured["command"] + " " + captured["description"] assert "/tmp/new.py" in joined or "/tmp/old.py" in joined def test_apply_patch_prompt_works_without_cached_summary(self): """When approval arrives before item/started (or without changes info), prompt falls back to whatever codex provided.""" client = FakeClient() client.queue_server_request( "item/fileChange/requestApproval", request_id="req-2", itemId="fc-orphan", turnId="tu1", threadId="t", startedAtMs=1234567890, reason="apply some changes", ) client.queue_notification( "turn/completed", threadId="t", turn={"id": "tu1", "status": "completed", "error": None}, ) captured = {} def cb(command, description, *, allow_permanent=True): captured["command"] = command return "once" s = make_session(client, approval_callback=cb) s.run_turn("hi", turn_timeout=1.0) # Falls back to the reason assert "apply some changes" in captured["command"]