From c1af61428953a4b97d986bc36108df18603a324b Mon Sep 17 00:00:00 2001 From: hermes-agent-dhabibi <274096618+hermes-agent-dhabibi@users.noreply.github.com> Date: Fri, 10 Apr 2026 04:35:07 +0000 Subject: [PATCH] fix: wrap copilot Responses-API models in CodexAuxiliaryClient for auxiliary tasks GPT-5+ models (except gpt-5-mini) are only accessible via the Responses API on Copilot. When these models were configured as the compression summary_model (or any auxiliary task), the plain OpenAI client sent them to /chat/completions which returned a 400 error: model "gpt-5.4-mini" is not accessible via the /chat/completions endpoint resolve_provider_client() now checks _should_use_copilot_responses_api() for the copilot provider and wraps the client in CodexAuxiliaryClient when needed, routing calls through responses.stream() transparently. Adds tests for both the wrapping (gpt-5.4-mini) and non-wrapping (gpt-4.1-mini) paths. --- agent/auxiliary_client.py | 17 ++++++++ tests/agent/test_auxiliary_client.py | 63 ++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index d21b96240..104162cfe 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1425,6 +1425,23 @@ def resolve_provider_client( client = OpenAI(api_key=api_key, base_url=base_url, **({"default_headers": headers} if headers else {})) + + # Copilot GPT-5+ models (except gpt-5-mini) require the Responses + # API — they are not accessible via /chat/completions. Wrap the + # plain client in CodexAuxiliaryClient so call_llm() transparently + # routes through responses.stream(). + if provider == "copilot" and final_model and not raw_codex: + try: + from hermes_cli.models import _should_use_copilot_responses_api + if _should_use_copilot_responses_api(final_model): + logger.debug( + "resolve_provider_client: copilot model %s needs " + "Responses API — wrapping with CodexAuxiliaryClient", + final_model) + client = CodexAuxiliaryClient(client, final_model) + except ImportError: + pass + logger.debug("resolve_provider_client: %s (%s)", provider, final_model) return (_to_async_client(client, final_model) if async_mode else (client, final_model)) diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 7038582ff..9a376d674 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -756,6 +756,69 @@ class TestAuxiliaryPoolAwareness: assert call_kwargs["base_url"] == "https://api.githubcopilot.com" assert call_kwargs["default_headers"]["Editor-Version"] + def test_copilot_responses_api_model_wrapped_in_codex_client(self, monkeypatch): + """Copilot GPT-5+ models (needing Responses API) are wrapped in CodexAuxiliaryClient.""" + monkeypatch.delenv("GITHUB_TOKEN", raising=False) + monkeypatch.delenv("GH_TOKEN", raising=False) + + with ( + patch( + "hermes_cli.auth.resolve_api_key_provider_credentials", + return_value={ + "provider": "copilot", + "api_key": "test-token", + "base_url": "https://api.githubcopilot.com", + "source": "gh auth token", + }, + ), + patch("agent.auxiliary_client.OpenAI"), + ): + client, model = resolve_provider_client("copilot", model="gpt-5.4-mini") + + from agent.auxiliary_client import CodexAuxiliaryClient + assert isinstance(client, CodexAuxiliaryClient) + assert model == "gpt-5.4-mini" + + def test_copilot_chat_completions_model_not_wrapped(self, monkeypatch): + """Copilot models using Chat Completions are returned as plain OpenAI clients.""" + monkeypatch.delenv("GITHUB_TOKEN", raising=False) + monkeypatch.delenv("GH_TOKEN", raising=False) + + with ( + patch( + "hermes_cli.auth.resolve_api_key_provider_credentials", + return_value={ + "provider": "copilot", + "api_key": "test-token", + "base_url": "https://api.githubcopilot.com", + "source": "gh auth token", + }, + ), + patch("agent.auxiliary_client.OpenAI") as mock_openai, + ): + client, model = resolve_provider_client("copilot", model="gpt-4.1-mini") + + from agent.auxiliary_client import CodexAuxiliaryClient + assert not isinstance(client, CodexAuxiliaryClient) + assert model == "gpt-4.1-mini" + # Should be the raw mock OpenAI client + assert client is mock_openai.return_value + + def test_vision_auto_uses_active_provider_as_fallback(self, monkeypatch): + """When no OpenRouter/Nous available, vision auto falls back to active provider.""" + monkeypatch.setenv("ANTHROPIC_API_KEY", "***") + with ( + patch("agent.auxiliary_client._read_nous_auth", return_value=None), + patch("agent.auxiliary_client._read_main_provider", return_value="anthropic"), + patch("agent.auxiliary_client._read_main_model", return_value="claude-sonnet-4"), + patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()), + patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"), + ): + client, model = get_vision_auxiliary_client() + + assert client is not None + assert client.__class__.__name__ == "AnthropicAuxiliaryClient" + def test_vision_auto_prefers_active_provider_over_openrouter(self, monkeypatch): """Active provider is tried before OpenRouter in vision auto.""" monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")