From 4f5e8b22a723bd7e85d9162fe93b42f18f780dec Mon Sep 17 00:00:00 2001 From: ran Date: Tue, 7 Apr 2026 18:07:08 +0800 Subject: [PATCH] fix: drop incompatible model slugs on auxiliary client cache hit `resolve_provider_client()` already drops OpenRouter-format model slugs (containing "/") when the resolved provider is not OpenRouter (line 1097). However, `_get_cached_client()` returns `model or cached_default` directly on cache hits, bypassing this check entirely. When the main provider is openai-codex, the auto-detection chain (Step 1 of `_resolve_auto`) caches a CodexAuxiliaryClient. Subsequent auxiliary calls for different tasks (e.g. compression with `summary_model: google/gemini-3-flash-preview`) hit the cache and pass the OpenRouter- format model slug straight to the Codex Responses API, which does not understand it and returns an empty `response.output`. This causes two user-visible failures: - "Invalid API response shape" (empty output after 3 retries) - "Context length exceeded, cannot compress further" (compression itself fails through the same path) Add `_compat_model()` helper that mirrors the "/" check from `resolve_provider_client()` and call it on the cache-hit return path. --- agent/auxiliary_client.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 32188b2e8..6e4f752cf 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1840,6 +1840,23 @@ def cleanup_stale_async_clients() -> None: del _client_cache[key] +def _is_openrouter_client(client: Any) -> bool: + for obj in (client, getattr(client, "_client", None), getattr(client, "client", None)): + if obj and "openrouter" in str(getattr(obj, "base_url", "") or "").lower(): + return True + return False + + +def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]: + """Drop OpenRouter-format model slugs (with '/') for non-OpenRouter clients. + + Mirrors the guard in resolve_provider_client() which is skipped on cache hits. + """ + if model and "/" in model and not _is_openrouter_client(client): + return cached_default + return model or cached_default + + def _get_cached_client( provider: str, model: str = None, @@ -1882,9 +1899,11 @@ def _get_cached_client( _force_close_async_httpx(cached_client) del _client_cache[cache_key] else: - return cached_client, model or cached_default + effective = _compat_model(cached_client, model, cached_default) + return cached_client, effective else: - return cached_client, model or cached_default + effective = _compat_model(cached_client, model, cached_default) + return cached_client, effective # Build outside the lock client, default_model = resolve_provider_client( provider,