From 70d7f79bef44721ac1f53ef85fdcfc060c7a3c49 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 20 Apr 2026 22:18:49 -0700 Subject: [PATCH] refactor(steer): simplify injection marker to 'User guidance:' prefix (#13340) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mid-run steer marker was '[USER STEER (injected mid-run, not tool output): ]'. Replaced with a plain two-newline-prefixed 'User guidance: ' suffix. Rationale: the marker lives inside the tool result's content string regardless of whether the tool returned JSON, plain text, an MCP result, or a plugin result. The bracketed tag read like structured metadata that some tools (terminal, execute_code) could confuse with their own output formatting. A plain labelled suffix works uniformly across every content shape we produce. Behavior unchanged: - Still injected into the last tool-role message's content. - Still preserves multimodal (Anthropic) content-block lists by appending a text block. - Still drained at both sites added in #12959 and #13205 — per-tool drain between individual calls, and pre-API-call drain at the top of each main-loop iteration. Checked Codex's equivalent (pending_input / inject_user_message_without_turn in codex-rs/core): they record mid-turn user input as a real role:user message via record_user_prompt_and_emit_turn_item(). That's cleaner for their Responses-API model but not portable to Chat Completions where role alternation after tool_calls is strict. Embedding the guidance in the last tool result remains the correct placement for us. Validation: all 21 tests in tests/run_agent/test_steer.py pass. --- run_agent.py | 4 ++-- tests/run_agent/test_steer.py | 21 +++++++++++---------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/run_agent.py b/run_agent.py index 5ec62a06a..49240d70f 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3683,7 +3683,7 @@ class AIAgent: existing = getattr(self, "_pending_steer", None) self._pending_steer = (existing + "\n" + steer_text) if existing else steer_text return - marker = f"\n\n[USER STEER (injected mid-run, not tool output): {steer_text}]" + marker = f"\n\nUser guidance: {steer_text}" existing_content = messages[target_idx].get("content", "") if not isinstance(existing_content, str): # Anthropic multimodal content blocks — preserve them and append @@ -8979,7 +8979,7 @@ class AIAgent: for _si in range(len(messages) - 1, -1, -1): _sm = messages[_si] if isinstance(_sm, dict) and _sm.get("role") == "tool": - marker = f"\n\n[USER STEER (injected mid-run, not tool output): {_pre_api_steer}]" + marker = f"\n\nUser guidance: {_pre_api_steer}" existing = _sm.get("content", "") if isinstance(existing, str): _sm["content"] = existing + marker diff --git a/tests/run_agent/test_steer.py b/tests/run_agent/test_steer.py index 9a9e4b51c..d99a0af80 100644 --- a/tests/run_agent/test_steer.py +++ b/tests/run_agent/test_steer.py @@ -85,7 +85,7 @@ class TestSteerInjection: # The LAST tool result is modified; earlier ones are untouched. assert messages[2]["content"] == "ls output A" assert "ls output B" in messages[3]["content"] - assert "[USER STEER" in messages[3]["content"] + assert "User guidance:" in messages[3]["content"] assert "please also check auth.log" in messages[3]["content"] # And pending_steer is consumed. assert agent._pending_steer is None @@ -107,18 +107,19 @@ class TestSteerInjection: # Steer should remain pending (nothing to drain into) assert agent._pending_steer == "steer" - def test_marker_is_unambiguous_about_origin(self): - """The injection marker must make clear the text is from the user - and not tool output — this is the cache-safe way to signal - provenance without violating message-role alternation. + def test_marker_labels_text_as_user_guidance(self): + """The injection marker must label the appended text as user + guidance so the model attributes it to the user rather than + confusing it with tool output. This is the cache-safe way to + signal provenance without violating message-role alternation. """ agent = _bare_agent() agent.steer("stop after next step") messages = [{"role": "tool", "content": "x", "tool_call_id": "1"}] agent._apply_pending_steer_to_tool_results(messages, num_tool_msgs=1) content = messages[-1]["content"] - assert "USER STEER" in content - assert "not tool output" in content.lower() or "injected mid-run" in content.lower() + assert "User guidance:" in content + assert "stop after next step" in content def test_multimodal_content_list_preserved(self): """Anthropic-style list content should be preserved, with the steer @@ -226,9 +227,9 @@ class TestPreApiCallSteerDrain: # Inject into last tool msg (mirrors the new code in run_conversation) for _si in range(len(messages) - 1, -1, -1): if messages[_si].get("role") == "tool": - messages[_si]["content"] += f"\n\n[USER STEER (injected mid-run, not tool output): {_pre_api_steer}]" + messages[_si]["content"] += f"\n\nUser guidance: {_pre_api_steer}" break - assert "[USER STEER" in messages[-1]["content"] + assert "User guidance:" in messages[-1]["content"] assert "focus on error handling" in messages[-1]["content"] assert agent._pending_steer is None @@ -270,7 +271,7 @@ class TestPreApiCallSteerDrain: assert _pre_api_steer is not None for _si in range(len(messages) - 1, -1, -1): if messages[_si].get("role") == "tool": - messages[_si]["content"] += f"\n\n[USER STEER (injected mid-run, not tool output): {_pre_api_steer}]" + messages[_si]["content"] += f"\n\nUser guidance: {_pre_api_steer}" break assert "change approach" in messages[2]["content"]