diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 4e5860420..5d957ca86 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -5,11 +5,11 @@ session search, web extraction, vision analysis, browser vision) picks up the best available backend without duplicating fallback logic. Resolution order for text tasks (auto mode): - 1. OpenRouter (OPENROUTER_API_KEY) - 2. Nous Portal (~/.hermes/auth.json active provider) - 3. Custom endpoint (config.yaml model.base_url + OPENAI_API_KEY) - 4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex, - wrapped to look like a chat.completions client) + 1. User's main provider + main model (used regardless of provider type — + aggregators, direct API-key providers, native Anthropic, Codex, etc.) + 2. OpenRouter (OPENROUTER_API_KEY) + 3. Nous Portal (~/.hermes/auth.json active provider) + 4. Custom endpoint (config.yaml model.base_url + OPENAI_API_KEY) 5. Native Anthropic 6. Direct API-key providers (z.ai/GLM, Kimi/Moonshot, MiniMax, MiniMax-CN) 7. None @@ -18,10 +18,16 @@ Resolution order for vision/multimodal tasks (auto mode): 1. Selected main provider, if it is one of the supported vision backends below 2. OpenRouter 3. Nous Portal - 4. Codex OAuth (gpt-5.3-codex supports vision via Responses API) - 5. Native Anthropic - 6. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.) - 7. None + 4. Native Anthropic + 5. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.) + 6. None + +Codex OAuth (ChatGPT-account auth) is intentionally NOT in either +fallback chain: OpenAI gates this endpoint behind an undocumented, +shifting model allow-list, so "just try Codex with a hardcoded model" +rots on its own. Codex is used only when the user's main provider *is* +openai-codex (Step 1 above) or when a caller explicitly requests it with +a model (auxiliary..provider + auxiliary..model). Per-task overrides are configured in config.yaml under the ``auxiliary:`` section (e.g. ``auxiliary.vision.provider``, ``auxiliary.compression.model``). @@ -285,12 +291,14 @@ _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1" _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com" _AUTH_JSON_PATH = get_hermes_home() / "auth.json" -# Codex fallback: uses the Responses API (the only endpoint the Codex -# OAuth token can access) with a fast model for auxiliary tasks. -# ChatGPT-backed Codex accounts currently reject gpt-5.3-codex for these -# auxiliary flows, while gpt-5.2-codex remains broadly available and supports -# vision via Responses. -_CODEX_AUX_MODEL = "gpt-5.2-codex" +# Codex OAuth endpoint used when a caller explicitly requests +# provider="openai-codex". There is deliberately no hardcoded default +# model: the set of models OpenAI accepts on this endpoint for +# ChatGPT-account auth is an undocumented, shifting allow-list, and +# pinning one here has drifted silently twice (gpt-5.3-codex → gpt-5.2-codex +# → gpt-5.4 over 6 weeks in early 2026). Callers must pass the model +# they want explicitly (from config.yaml model.model, auxiliary..model, +# or the user's active Codex model selection). _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex" @@ -1420,7 +1428,23 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]: return _fallback_client, model -def _try_codex() -> Tuple[Optional[Any], Optional[str]]: +def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]: + """Build a CodexAuxiliaryClient for an explicitly-requested model. + + There is no auto-selection of the Codex model: the ChatGPT-account + Codex endpoint's accepted model list is an undocumented, drifting + allow-list, so any hardcoded default we pick goes stale. The caller + is responsible for passing the model (e.g. from the user's own + ``model.model`` or ``auxiliary..model`` config). + + Returns (None, None) when no Codex OAuth token is available. + """ + if not model: + logger.warning( + "Auxiliary client: openai-codex requested without a model; " + "pass model explicitly (auxiliary..model in config.yaml)." + ) + return None, None pool_present, entry = _select_pool_entry("openai-codex") if pool_present: codex_token = _pool_runtime_api_key(entry) @@ -1436,13 +1460,13 @@ def _try_codex() -> Tuple[Optional[Any], Optional[str]]: if not codex_token: return None, None base_url = _CODEX_AUX_BASE_URL - logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL) + logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", model) real_client = OpenAI( api_key=codex_token, base_url=base_url, default_headers=_codex_cloudflare_headers(codex_token), ) - return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL + return CodexAuxiliaryClient(real_client, model), model def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]: @@ -1497,7 +1521,6 @@ _AUTO_PROVIDER_LABELS = { "_try_openrouter": "openrouter", "_try_nous": "nous", "_try_custom_endpoint": "local/custom", - "_try_codex": "openai-codex", "_resolve_api_key_provider": "api-key", } @@ -1524,12 +1547,18 @@ def _get_provider_chain() -> List[tuple]: Built at call time (not module level) so that test patches on the ``_try_*`` functions are picked up correctly. + + NOTE: ``openai-codex`` is deliberately NOT in this chain. The + ChatGPT-account Codex endpoint only accepts a shifting, undocumented + allow-list of model IDs, so falling back to it with a guessed model + fails more often than not. Codex is used only when the user's main + provider *is* openai-codex (see Step 1 of ``_resolve_auto``) or when + a caller explicitly requests it with a model. """ return [ ("openrouter", _try_openrouter), ("nous", _try_nous), ("local/custom", _try_custom_endpoint), - ("openai-codex", _try_codex), ("api-key", _resolve_api_key_provider), ] @@ -2045,6 +2074,13 @@ def resolve_provider_client( # ── OpenAI Codex (OAuth → Responses API) ───────────────────────── if provider == "openai-codex": + if not model: + logger.warning( + "resolve_provider_client: openai-codex requested without a " + "model; pass model explicitly (e.g. model.model in config.yaml " + "or auxiliary..model for per-task aux routing)." + ) + return None, None if raw_codex: # Return the raw OpenAI client for callers that need direct # access to responses.stream() (e.g., the main agent loop). @@ -2053,7 +2089,7 @@ def resolve_provider_client( logger.warning("resolve_provider_client: openai-codex requested " "but no Codex OAuth token found (run: hermes model)") return None, None - final_model = _normalize_resolved_model(model or _CODEX_AUX_MODEL, provider) + final_model = _normalize_resolved_model(model, provider) raw_client = OpenAI( api_key=codex_token, base_url=_CODEX_AUX_BASE_URL, @@ -2061,7 +2097,7 @@ def resolve_provider_client( ) return (raw_client, final_model) # Standard path: wrap in CodexAuxiliaryClient adapter - client, default = _try_codex() + client, default = _build_codex_client(model) if client is None: logger.warning("resolve_provider_client: openai-codex requested " "but no Codex OAuth token found (run: hermes model)") @@ -2104,9 +2140,9 @@ def resolve_provider_client( client = _wrap_if_needed(client, final_model, custom_base, custom_key) return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode else (client, final_model)) - # Try custom first, then codex, then API-key providers - for try_fn in (_try_custom_endpoint, _try_codex, - _resolve_api_key_provider): + # Try custom first, then API-key providers (Codex excluded here: + # falling through to Codex with no model is a stale-constant trap). + for try_fn in (_try_custom_endpoint, _resolve_api_key_provider): client, default = try_fn() if client is not None: final_model = _normalize_resolved_model(model or default, provider) @@ -2453,7 +2489,10 @@ def _resolve_strict_vision_backend( if provider == "nous": return _try_nous(vision=True) if provider == "openai-codex": - return _try_codex() + # Route through resolve_provider_client so the caller's explicit + # model is used. There is no safe default Codex model (shifting + # allow-list); callers must specify via auxiliary..model. + return resolve_provider_client("openai-codex", model, is_vision=True) if provider == "anthropic": return _try_anthropic() if provider == "custom": diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index c15e655db..32290b061 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -259,7 +259,7 @@ class TestAnthropicOAuthFlag: assert mock_build.call_args.args[0] == "sk-ant-oat01-pooled" -class TestTryCodex: +class TestBuildCodexClient: def test_pool_without_selected_entry_falls_back_to_auth_store(self): with ( patch("agent.auxiliary_client._select_pool_entry", return_value=(True, None)), @@ -267,15 +267,23 @@ class TestTryCodex: patch("agent.auxiliary_client.OpenAI") as mock_openai, ): mock_openai.return_value = MagicMock() - from agent.auxiliary_client import _try_codex + from agent.auxiliary_client import _build_codex_client - client, model = _try_codex() + client, model = _build_codex_client("gpt-5.4") assert client is not None - assert model == "gpt-5.2-codex" + assert model == "gpt-5.4" assert mock_openai.call_args.kwargs["api_key"] == "codex-auth-token" assert mock_openai.call_args.kwargs["base_url"] == "https://chatgpt.com/backend-api/codex" + def test_rejects_missing_model(self): + """Callers must pass an explicit model; no hardcoded default.""" + from agent.auxiliary_client import _build_codex_client + + client, model = _build_codex_client("") + assert client is None + assert model is None + class TestExpiredCodexFallback: """Test that expired Codex tokens don't block the auto chain.""" @@ -507,14 +515,14 @@ class TestGetTextAuxiliaryClient: patch("agent.auxiliary_client.OpenAI"), patch("hermes_cli.auth._read_codex_tokens", side_effect=AssertionError("legacy codex store should not run")), ): - from agent.auxiliary_client import _try_codex + from agent.auxiliary_client import _build_codex_client - client, model = _try_codex() + client, model = _build_codex_client("gpt-5.4") from agent.auxiliary_client import CodexAuxiliaryClient assert isinstance(client, CodexAuxiliaryClient) - assert model == "gpt-5.2-codex" + assert model == "gpt-5.4" def test_returns_none_when_nothing_available(self, monkeypatch): monkeypatch.delenv("OPENAI_BASE_URL", raising=False) @@ -783,11 +791,15 @@ class TestIsPaymentError: class TestGetProviderChain: """_get_provider_chain() resolves functions at call time (testable).""" - def test_returns_five_entries(self): + def test_returns_four_entries(self): chain = _get_provider_chain() - assert len(chain) == 5 + assert len(chain) == 4 labels = [label for label, _ in chain] - assert labels == ["openrouter", "nous", "local/custom", "openai-codex", "api-key"] + assert labels == ["openrouter", "nous", "local/custom", "api-key"] + # Codex is deliberately NOT in this chain — see _get_provider_chain + # docstring. ChatGPT-account Codex has a shifting model allow-list; + # guessing a model to fall back on breaks more often than it helps. + assert "openai-codex" not in labels def test_picks_up_patched_functions(self): """Patches on _try_* functions must be visible in the chain.""" @@ -814,7 +826,6 @@ class TestTryPaymentFallback: with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \ patch("agent.auxiliary_client._try_nous", return_value=(None, None)), \ patch("agent.auxiliary_client._try_custom_endpoint", return_value=(None, None)), \ - patch("agent.auxiliary_client._try_codex", return_value=(None, None)), \ patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \ patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"): client, model, label = _try_payment_fallback("openrouter") @@ -825,23 +836,26 @@ class TestTryPaymentFallback: """'codex' should map to 'openai-codex' in the skip set.""" mock_client = MagicMock() with patch("agent.auxiliary_client._try_openrouter", return_value=(mock_client, "or-model")), \ - patch("agent.auxiliary_client._try_codex", return_value=(None, None)), \ patch("agent.auxiliary_client._read_main_provider", return_value="openai-codex"): client, model, label = _try_payment_fallback("openai-codex", task="vision") assert client is mock_client assert label == "openrouter" - def test_skips_to_codex_when_or_and_nous_fail(self): - mock_codex = MagicMock() + def test_codex_not_in_fallback_chain(self): + """Codex is deliberately NOT a fallback rung (shifting model allow-list). + + When OR/Nous/custom/api-key all fail, payment-fallback returns None — + Codex is never tried with a guessed model. + """ with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \ patch("agent.auxiliary_client._try_nous", return_value=(None, None)), \ patch("agent.auxiliary_client._try_custom_endpoint", return_value=(None, None)), \ - patch("agent.auxiliary_client._try_codex", return_value=(mock_codex, "gpt-5.2-codex")), \ + patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \ patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"): client, model, label = _try_payment_fallback("openrouter") - assert client is mock_codex - assert model == "gpt-5.2-codex" - assert label == "openai-codex" + assert client is None + assert model is None + assert label == "" class TestCallLlmPaymentFallback: @@ -1360,14 +1374,14 @@ class TestAuxiliaryAuthRefreshRetry: with ( patch( "agent.auxiliary_client.resolve_vision_provider_client", - side_effect=[("openai-codex", failing_client, "gpt-5.2-codex"), ("openai-codex", fresh_client, "gpt-5.2-codex")], + side_effect=[("openai-codex", failing_client, "gpt-5.4"), ("openai-codex", fresh_client, "gpt-5.4")], ), patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh, ): resp = call_llm( task="vision", provider="openai-codex", - model="gpt-5.2-codex", + model="gpt-5.4", messages=[{"role": "user", "content": "hi"}], ) @@ -1384,14 +1398,14 @@ class TestAuxiliaryAuthRefreshRetry: fresh_client.chat.completions.create.return_value = _DummyResponse("fresh-non-vision") with ( - patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("openai-codex", "gpt-5.2-codex", None, None, None)), - patch("agent.auxiliary_client._get_cached_client", side_effect=[(stale_client, "gpt-5.2-codex"), (fresh_client, "gpt-5.2-codex")]), + patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("openai-codex", "gpt-5.4", None, None, None)), + patch("agent.auxiliary_client._get_cached_client", side_effect=[(stale_client, "gpt-5.4"), (fresh_client, "gpt-5.4")]), patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh, ): resp = call_llm( task="compression", provider="openai-codex", - model="gpt-5.2-codex", + model="gpt-5.4", messages=[{"role": "user", "content": "hi"}], ) @@ -1439,14 +1453,14 @@ class TestAuxiliaryAuthRefreshRetry: with ( patch( "agent.auxiliary_client.resolve_vision_provider_client", - side_effect=[("openai-codex", failing_client, "gpt-5.2-codex"), ("openai-codex", fresh_client, "gpt-5.2-codex")], + side_effect=[("openai-codex", failing_client, "gpt-5.4"), ("openai-codex", fresh_client, "gpt-5.4")], ), patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh, ): resp = await async_call_llm( task="vision", provider="openai-codex", - model="gpt-5.2-codex", + model="gpt-5.4", messages=[{"role": "user", "content": "hi"}], ) diff --git a/tests/agent/test_codex_cloudflare_headers.py b/tests/agent/test_codex_cloudflare_headers.py index 6a343c8f8..2d9633a80 100644 --- a/tests/agent/test_codex_cloudflare_headers.py +++ b/tests/agent/test_codex_cloudflare_headers.py @@ -10,7 +10,7 @@ of auth correctness. ``_codex_cloudflare_headers`` in ``agent.auxiliary_client`` centralizes the header set so the primary chat client (``run_agent.AIAgent.__init__`` + ``_apply_client_headers_for_base_url``) and the auxiliary client paths -(``_try_codex`` and the ``raw_codex`` branch of ``resolve_provider_client``) +(``_build_codex_client`` and the ``raw_codex`` branch of ``resolve_provider_client``) all emit the same headers. These tests pin: @@ -207,9 +207,10 @@ class TestPrimaryClientWiring: # --------------------------------------------------------------------------- class TestAuxiliaryClientWiring: - def test_try_codex_passes_codex_headers(self, monkeypatch): - """_try_codex builds the OpenAI client used for compression / vision / - title generation when routed through Codex. Must emit codex headers.""" + def test_build_codex_client_passes_codex_headers(self, monkeypatch): + """_build_codex_client builds the OpenAI client used for compression / + vision / title generation when routed through Codex. Must emit codex + headers.""" from agent import auxiliary_client token = _make_codex_jwt("acct-aux-try-codex") @@ -225,7 +226,7 @@ class TestAuxiliaryClientWiring: ) with patch("agent.auxiliary_client.OpenAI") as mock_openai: mock_openai.return_value = MagicMock() - client, model = auxiliary_client._try_codex() + client, model = auxiliary_client._build_codex_client("gpt-5.4") assert client is not None headers = mock_openai.call_args.kwargs.get("default_headers") or {} assert headers.get("originator") == "codex_cli_rs" @@ -244,7 +245,7 @@ class TestAuxiliaryClientWiring: with patch("agent.auxiliary_client.OpenAI") as mock_openai: mock_openai.return_value = MagicMock() client, model = auxiliary_client.resolve_provider_client( - "openai-codex", raw_codex=True, + "openai-codex", model="gpt-5.4", raw_codex=True, ) assert client is not None headers = mock_openai.call_args.kwargs.get("default_headers") or {} diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py index dabcb8b74..8eb7478b4 100644 --- a/tests/run_agent/test_provider_parity.py +++ b/tests/run_agent/test_provider_parity.py @@ -966,17 +966,25 @@ class TestAuxiliaryClientProviderPriority: client, model = get_text_auxiliary_client() assert mock.call_args.kwargs["base_url"] == "http://localhost:1234/v1" - def test_codex_fallback_last_resort(self, monkeypatch): + def test_codex_not_in_auto_fallback(self, monkeypatch): + """Codex is deliberately NOT part of the auto fallback chain. + + ChatGPT-account Codex gates which models it accepts via an + undocumented, shifting allow-list, so falling through to Codex with + a hardcoded default model breaks silently whenever OpenAI rotates + the list. When nothing else is available, ``get_text_auxiliary_client`` + now returns (None, None) rather than guessing a Codex model. + """ monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) monkeypatch.delenv("OPENAI_BASE_URL", raising=False) monkeypatch.delenv("OPENAI_API_KEY", raising=False) - from agent.auxiliary_client import get_text_auxiliary_client, CodexAuxiliaryClient + from agent.auxiliary_client import get_text_auxiliary_client with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ patch("agent.auxiliary_client._read_codex_access_token", return_value="codex-tok"), \ patch("agent.auxiliary_client.OpenAI"): client, model = get_text_auxiliary_client() - assert model == "gpt-5.2-codex" - assert isinstance(client, CodexAuxiliaryClient) + assert client is None + assert model is None # ── Provider routing tests ───────────────────────────────────────────────────