From 4f5e8b22a723bd7e85d9162fe93b42f18f780dec Mon Sep 17 00:00:00 2001
From: ran <edding.suree@gmail.com>
Date: Tue, 7 Apr 2026 18:07:08 +0800
Subject: [PATCH] fix: drop incompatible model slugs on auxiliary client cache
 hit

`resolve_provider_client()` already drops OpenRouter-format model slugs
(containing "/") when the resolved provider is not OpenRouter (line 1097).
However, `_get_cached_client()` returns `model or cached_default` directly
on cache hits, bypassing this check entirely.

When the main provider is openai-codex, the auto-detection chain (Step 1
of `_resolve_auto`) caches a CodexAuxiliaryClient. Subsequent auxiliary
calls for different tasks (e.g. compression with `summary_model:
google/gemini-3-flash-preview`) hit the cache and pass the OpenRouter-
format model slug straight to the Codex Responses API, which does not
understand it and returns an empty `response.output`.

This causes two user-visible failures:
- "Invalid API response shape" (empty output after 3 retries)
- "Context length exceeded, cannot compress further" (compression itself
  fails through the same path)

Add `_compat_model()` helper that mirrors the "/" check from
`resolve_provider_client()` and call it on the cache-hit return path.
---
 agent/auxiliary_client.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 32188b2e8..6e4f752cf 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -1840,6 +1840,23 @@ def cleanup_stale_async_clients() -> None:
             del _client_cache[key]
 
 
+def _is_openrouter_client(client: Any) -> bool:
+    for obj in (client, getattr(client, "_client", None), getattr(client, "client", None)):
+        if obj and "openrouter" in str(getattr(obj, "base_url", "") or "").lower():
+            return True
+    return False
+
+
+def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]:
+    """Drop OpenRouter-format model slugs (with '/') for non-OpenRouter clients.
+
+    Mirrors the guard in resolve_provider_client() which is skipped on cache hits.
+    """
+    if model and "/" in model and not _is_openrouter_client(client):
+        return cached_default
+    return model or cached_default
+
+
 def _get_cached_client(
     provider: str,
     model: str = None,
@@ -1882,9 +1899,11 @@ def _get_cached_client(
                     _force_close_async_httpx(cached_client)
                     del _client_cache[cache_key]
                 else:
-                    return cached_client, model or cached_default
+                    effective = _compat_model(cached_client, model, cached_default)
+                    return cached_client, effective
             else:
-                return cached_client, model or cached_default
+                effective = _compat_model(cached_client, model, cached_default)
+                return cached_client, effective
     # Build outside the lock
     client, default_model = resolve_provider_client(
         provider,