From 70d7f79bef44721ac1f53ef85fdcfc060c7a3c49 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Mon, 20 Apr 2026 22:18:49 -0700
Subject: [PATCH] refactor(steer): simplify injection marker to 'User
 guidance:' prefix (#13340)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The mid-run steer marker was '[USER STEER (injected mid-run, not tool
output): <text>]'. Replaced with a plain two-newline-prefixed
'User guidance: <text>' suffix.

Rationale: the marker lives inside the tool result's content string
regardless of whether the tool returned JSON, plain text, an MCP
result, or a plugin result. The bracketed tag read like structured
metadata that some tools (terminal, execute_code) could confuse with
their own output formatting. A plain labelled suffix works uniformly
across every content shape we produce.

Behavior unchanged:
- Still injected into the last tool-role message's content.
- Still preserves multimodal (Anthropic) content-block lists by
  appending a text block.
- Still drained at both sites added in #12959 and #13205 — per-tool
  drain between individual calls, and pre-API-call drain at the top
  of each main-loop iteration.

Checked Codex's equivalent (pending_input / inject_user_message_without_turn
in codex-rs/core): they record mid-turn user input as a real role:user
message via record_user_prompt_and_emit_turn_item(). That's cleaner for
their Responses-API model but not portable to Chat Completions where
role alternation after tool_calls is strict. Embedding the guidance in
the last tool result remains the correct placement for us.

Validation: all 21 tests in tests/run_agent/test_steer.py pass.
---
 run_agent.py                  |  4 ++--
 tests/run_agent/test_steer.py | 21 +++++++++++----------
 2 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/run_agent.py b/run_agent.py
index 5ec62a06a..49240d70f 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -3683,7 +3683,7 @@ class AIAgent:
                 existing = getattr(self, "_pending_steer", None)
                 self._pending_steer = (existing + "\n" + steer_text) if existing else steer_text
             return
-        marker = f"\n\n[USER STEER (injected mid-run, not tool output): {steer_text}]"
+        marker = f"\n\nUser guidance: {steer_text}"
         existing_content = messages[target_idx].get("content", "")
         if not isinstance(existing_content, str):
             # Anthropic multimodal content blocks — preserve them and append
@@ -8979,7 +8979,7 @@ class AIAgent:
                 for _si in range(len(messages) - 1, -1, -1):
                     _sm = messages[_si]
                     if isinstance(_sm, dict) and _sm.get("role") == "tool":
-                        marker = f"\n\n[USER STEER (injected mid-run, not tool output): {_pre_api_steer}]"
+                        marker = f"\n\nUser guidance: {_pre_api_steer}"
                         existing = _sm.get("content", "")
                         if isinstance(existing, str):
                             _sm["content"] = existing + marker
diff --git a/tests/run_agent/test_steer.py b/tests/run_agent/test_steer.py
index 9a9e4b51c..d99a0af80 100644
--- a/tests/run_agent/test_steer.py
+++ b/tests/run_agent/test_steer.py
@@ -85,7 +85,7 @@ class TestSteerInjection:
         # The LAST tool result is modified; earlier ones are untouched.
         assert messages[2]["content"] == "ls output A"
         assert "ls output B" in messages[3]["content"]
-        assert "[USER STEER" in messages[3]["content"]
+        assert "User guidance:" in messages[3]["content"]
         assert "please also check auth.log" in messages[3]["content"]
         # And pending_steer is consumed.
         assert agent._pending_steer is None
@@ -107,18 +107,19 @@ class TestSteerInjection:
         # Steer should remain pending (nothing to drain into)
         assert agent._pending_steer == "steer"
 
-    def test_marker_is_unambiguous_about_origin(self):
-        """The injection marker must make clear the text is from the user
-        and not tool output — this is the cache-safe way to signal
-        provenance without violating message-role alternation.
+    def test_marker_labels_text_as_user_guidance(self):
+        """The injection marker must label the appended text as user
+        guidance so the model attributes it to the user rather than
+        confusing it with tool output.  This is the cache-safe way to
+        signal provenance without violating message-role alternation.
         """
         agent = _bare_agent()
         agent.steer("stop after next step")
         messages = [{"role": "tool", "content": "x", "tool_call_id": "1"}]
         agent._apply_pending_steer_to_tool_results(messages, num_tool_msgs=1)
         content = messages[-1]["content"]
-        assert "USER STEER" in content
-        assert "not tool output" in content.lower() or "injected mid-run" in content.lower()
+        assert "User guidance:" in content
+        assert "stop after next step" in content
 
     def test_multimodal_content_list_preserved(self):
         """Anthropic-style list content should be preserved, with the steer
@@ -226,9 +227,9 @@ class TestPreApiCallSteerDrain:
         # Inject into last tool msg (mirrors the new code in run_conversation)
         for _si in range(len(messages) - 1, -1, -1):
             if messages[_si].get("role") == "tool":
-                messages[_si]["content"] += f"\n\n[USER STEER (injected mid-run, not tool output): {_pre_api_steer}]"
+                messages[_si]["content"] += f"\n\nUser guidance: {_pre_api_steer}"
                 break
-        assert "[USER STEER" in messages[-1]["content"]
+        assert "User guidance:" in messages[-1]["content"]
         assert "focus on error handling" in messages[-1]["content"]
         assert agent._pending_steer is None
 
@@ -270,7 +271,7 @@ class TestPreApiCallSteerDrain:
         assert _pre_api_steer is not None
         for _si in range(len(messages) - 1, -1, -1):
             if messages[_si].get("role") == "tool":
-                messages[_si]["content"] += f"\n\n[USER STEER (injected mid-run, not tool output): {_pre_api_steer}]"
+                messages[_si]["content"] += f"\n\nUser guidance: {_pre_api_steer}"
                 break
         assert "change approach" in messages[2]["content"]