fix(tui): handle images with codex app-server
This commit is contained in:
@@ -87,6 +87,39 @@ class TurnResult:
|
|||||||
_TURN_ABORTED_MARKERS = ("<turn_aborted>", "<turn_aborted/>")
|
_TURN_ABORTED_MARKERS = ("<turn_aborted>", "<turn_aborted/>")
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_turn_input_text(user_input: Any) -> str:
|
||||||
|
"""Collapse Hermes/OpenAI rich content into app-server text input.
|
||||||
|
|
||||||
|
The current `turn/start` path sends text items only. TUI image attachment
|
||||||
|
can hand us OpenAI-style content parts, so keep the text/path hints and
|
||||||
|
replace opaque image payloads with a small marker instead of putting a
|
||||||
|
Python list into the `text` field.
|
||||||
|
"""
|
||||||
|
if isinstance(user_input, str):
|
||||||
|
return user_input
|
||||||
|
if isinstance(user_input, list):
|
||||||
|
parts: list[str] = []
|
||||||
|
for item in user_input:
|
||||||
|
if isinstance(item, str):
|
||||||
|
if item.strip():
|
||||||
|
parts.append(item)
|
||||||
|
continue
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
if item is not None:
|
||||||
|
parts.append(str(item))
|
||||||
|
continue
|
||||||
|
item_type = item.get("type")
|
||||||
|
if item_type in {"text", "input_text"}:
|
||||||
|
text = item.get("text") or item.get("content") or ""
|
||||||
|
if text:
|
||||||
|
parts.append(str(text))
|
||||||
|
elif item_type in {"image", "image_url", "input_image"}:
|
||||||
|
parts.append("[image attached]")
|
||||||
|
text = "\n\n".join(p for p in parts if p).strip()
|
||||||
|
return text or "What do you see in this image?"
|
||||||
|
return "" if user_input is None else str(user_input)
|
||||||
|
|
||||||
|
|
||||||
# Substrings in codex stderr / JSON-RPC error messages that signal the
|
# Substrings in codex stderr / JSON-RPC error messages that signal the
|
||||||
# subprocess died because its OAuth credentials are no longer valid.
|
# subprocess died because its OAuth credentials are no longer valid.
|
||||||
# Kept conservative: we only redirect users to `codex login` when we're
|
# Kept conservative: we only redirect users to `codex login` when we're
|
||||||
@@ -327,7 +360,7 @@ class CodexAppServerSession:
|
|||||||
|
|
||||||
def run_turn(
|
def run_turn(
|
||||||
self,
|
self,
|
||||||
user_input: str,
|
user_input: Any,
|
||||||
*,
|
*,
|
||||||
turn_timeout: float = 600.0,
|
turn_timeout: float = 600.0,
|
||||||
notification_poll_timeout: float = 0.25,
|
notification_poll_timeout: float = 0.25,
|
||||||
@@ -365,6 +398,8 @@ class CodexAppServerSession:
|
|||||||
self._interrupt_event.clear()
|
self._interrupt_event.clear()
|
||||||
projector = CodexEventProjector()
|
projector = CodexEventProjector()
|
||||||
|
|
||||||
|
user_input_text = _coerce_turn_input_text(user_input)
|
||||||
|
|
||||||
# Send turn/start with the user input. Text-only for now (codex
|
# Send turn/start with the user input. Text-only for now (codex
|
||||||
# supports rich content but Hermes' text path is the common case).
|
# supports rich content but Hermes' text path is the common case).
|
||||||
try:
|
try:
|
||||||
@@ -372,7 +407,7 @@ class CodexAppServerSession:
|
|||||||
"turn/start",
|
"turn/start",
|
||||||
{
|
{
|
||||||
"threadId": self._thread_id,
|
"threadId": self._thread_id,
|
||||||
"input": [{"type": "text", "text": user_input}],
|
"input": [{"type": "text", "text": user_input_text}],
|
||||||
},
|
},
|
||||||
timeout=10,
|
timeout=10,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ from agent.transports.codex_app_server_session import (
|
|||||||
TurnResult,
|
TurnResult,
|
||||||
_ServerRequestRouting,
|
_ServerRequestRouting,
|
||||||
_approval_choice_to_codex_decision,
|
_approval_choice_to_codex_decision,
|
||||||
|
_coerce_turn_input_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -128,6 +129,15 @@ class TestApprovalChoiceMapping:
|
|||||||
assert _approval_choice_to_codex_decision(choice) == expected
|
assert _approval_choice_to_codex_decision(choice) == expected
|
||||||
|
|
||||||
|
|
||||||
|
class TestTurnInputCoercion:
|
||||||
|
def test_list_content_keeps_text_and_marks_images(self):
|
||||||
|
text = _coerce_turn_input_text([
|
||||||
|
{"type": "text", "text": "caption"},
|
||||||
|
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
|
||||||
|
])
|
||||||
|
assert text == "caption\n\n[image attached]"
|
||||||
|
|
||||||
|
|
||||||
# ---- lifecycle ----
|
# ---- lifecycle ----
|
||||||
|
|
||||||
class TestLifecycle:
|
class TestLifecycle:
|
||||||
@@ -188,6 +198,35 @@ class TestRunTurn:
|
|||||||
# turn_id propagated for downstream session-DB linkage
|
# turn_id propagated for downstream session-DB linkage
|
||||||
assert r.turn_id == "turn-fake-001"
|
assert r.turn_id == "turn-fake-001"
|
||||||
|
|
||||||
|
def test_rich_content_turn_is_collapsed_to_text_payload(self):
|
||||||
|
client = FakeClient()
|
||||||
|
client.queue_notification(
|
||||||
|
"turn/completed",
|
||||||
|
threadId="t",
|
||||||
|
turn={"id": "tu1", "status": "completed", "error": None},
|
||||||
|
)
|
||||||
|
s = make_session(client)
|
||||||
|
r = s.run_turn(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "look at this\n\n[Image attached at: /tmp/a.png]",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": "data:image/png;base64,abc"},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
turn_timeout=2.0,
|
||||||
|
)
|
||||||
|
assert r.error is None
|
||||||
|
method, params = next(req for req in client.requests if req[0] == "turn/start")
|
||||||
|
assert method == "turn/start"
|
||||||
|
text = params["input"][0]["text"]
|
||||||
|
assert isinstance(text, str)
|
||||||
|
assert "[Image attached at: /tmp/a.png]" in text
|
||||||
|
assert "[image attached]" in text
|
||||||
|
|
||||||
def test_tool_iteration_counter_ticks(self):
|
def test_tool_iteration_counter_ticks(self):
|
||||||
client = FakeClient()
|
client = FakeClient()
|
||||||
# Two completed exec items + one final agent message
|
# Two completed exec items + one final agent message
|
||||||
|
|||||||
@@ -3350,6 +3350,8 @@ def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None:
|
|||||||
_read_main_model(),
|
_read_main_model(),
|
||||||
_cfg,
|
_cfg,
|
||||||
)
|
)
|
||||||
|
if getattr(agent, "api_mode", "") == "codex_app_server":
|
||||||
|
_mode = "text"
|
||||||
except Exception as _img_exc:
|
except Exception as _img_exc:
|
||||||
print(
|
print(
|
||||||
f"[tui_gateway] image_routing decision failed, defaulting to text: {_img_exc}",
|
f"[tui_gateway] image_routing decision failed, defaulting to text: {_img_exc}",
|
||||||
|
|||||||
Reference in New Issue
Block a user