From 0f778f776877cd452cf7475f06a6044cf07ebfe8 Mon Sep 17 00:00:00 2001 From: jarvischer Date: Sat, 18 Apr 2026 22:46:36 +0530 Subject: [PATCH 1/5] fix: prevent tool name duplication in streaming accumulator (MiniMax/NVIDIA NIM) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on #11984 by @maxchernin. Fixes #8259. Some providers (MiniMax M2.7 via NVIDIA NIM) resend the full function name in every streaming chunk instead of only the first. The old accumulator used += which concatenated them into 'read_fileread_file'. Changed to simple assignment (=), matching the OpenAI Node SDK, LiteLLM, and Vercel AI SDK patterns. Function names are atomic identifiers delivered complete — no provider splits them across chunks, so concatenation was never correct semantics. --- run_agent.py | 10 ++++++- scripts/release.py | 1 + tests/run_agent/test_streaming.py | 44 +++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/run_agent.py b/run_agent.py index a47455e53..e88096a60 100644 --- a/run_agent.py +++ b/run_agent.py @@ -5868,7 +5868,15 @@ class AIAgent: entry["id"] = tc_delta.id if tc_delta.function: if tc_delta.function.name: - entry["function"]["name"] += tc_delta.function.name + # Use assignment, not +=. Function names are + # atomic identifiers delivered complete in the + # first chunk (OpenAI spec). Some providers + # (MiniMax M2.7 via NVIDIA NIM) resend the full + # name in every chunk; concatenation would + # produce "read_fileread_file". Assignment + # (matching the OpenAI Node SDK / LiteLLM / + # Vercel AI patterns) is immune to this. + entry["function"]["name"] = tc_delta.function.name if tc_delta.function.arguments: entry["function"]["arguments"] += tc_delta.function.arguments extra = getattr(tc_delta, "extra_content", None) diff --git a/scripts/release.py b/scripts/release.py index 4c32dccfd..88ddb2f43 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -266,6 +266,7 @@ AUTHOR_MAP = { "limkuan24@gmail.com": "WideLee", "aviralarora002@gmail.com": "AviArora02-commits", "junminliu@gmail.com": "JimLiu", + "jarvischer@gmail.com": "maxchernin", } diff --git a/tests/run_agent/test_streaming.py b/tests/run_agent/test_streaming.py index 6afe36ee3..e4825599a 100644 --- a/tests/run_agent/test_streaming.py +++ b/tests/run_agent/test_streaming.py @@ -141,6 +141,50 @@ class TestStreamingAccumulator: assert tc[0].function.name == "terminal" assert tc[0].function.arguments == '{"command": "ls"}' + @patch("run_agent.AIAgent._create_request_openai_client") + @patch("run_agent.AIAgent._close_request_openai_client") + def test_tool_name_not_duplicated_when_resent_per_chunk(self, mock_close, mock_create): + """MiniMax M2.7 via NVIDIA NIM resends the full name in every chunk. + + Bug #8259: the old += accumulation produced "read_fileread_file". + Assignment (matching OpenAI Node SDK / LiteLLM) prevents this. + """ + from run_agent import AIAgent + + chunks = [ + _make_stream_chunk(tool_calls=[ + _make_tool_call_delta(index=0, tc_id="call_nim", name="read_file") + ]), + _make_stream_chunk(tool_calls=[ + _make_tool_call_delta(index=0, tc_id="call_nim", name="read_file", arguments='{"path":') + ]), + _make_stream_chunk(tool_calls=[ + _make_tool_call_delta(index=0, tc_id="call_nim", name="read_file", arguments=' "x.py"}') + ]), + _make_stream_chunk(finish_reason="tool_calls"), + ] + + mock_client = MagicMock() + mock_client.chat.completions.create.return_value = iter(chunks) + mock_create.return_value = mock_client + + agent = AIAgent( + model="test/model", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + agent.api_mode = "chat_completions" + agent._interrupt_requested = False + + response = agent._interruptible_streaming_api_call({}) + + tc = response.choices[0].message.tool_calls + assert tc is not None + assert len(tc) == 1 + assert tc[0].function.name == "read_file" + assert tc[0].function.arguments == '{"path": "x.py"}' + @patch("run_agent.AIAgent._create_request_openai_client") @patch("run_agent.AIAgent._close_request_openai_client") def test_tool_call_extra_content_preserved(self, mock_close, mock_create): From f7af90e2daf2e2a11262ff3152bb3f08ff13ca37 Mon Sep 17 00:00:00 2001 From: LVT382009 Date: Sat, 18 Apr 2026 22:49:30 +0530 Subject: [PATCH 2/5] fix: wire _ephemeral_max_output_tokens into chat_completions and add NVIDIA NIM default Based on #12152 by @LVT382009. Two fixes to run_agent.py: 1. _ephemeral_max_output_tokens consumption in chat_completions path: The error-recovery ephemeral override was only consumed in the anthropic_messages branch of _build_api_kwargs. All chat_completions providers (OpenRouter, NVIDIA NIM, Qwen, Alibaba, custom, etc.) silently ignored it. Now consumed at highest priority, matching the anthropic pattern. 2. NVIDIA NIM max_tokens default (16384): NVIDIA NIM falls back to a very low internal default when max_tokens is omitted, causing models like GLM-4.7 to truncate immediately (thinking tokens exhaust the budget before the response starts). 3. Progressive length-continuation boost: When finish_reason='length' triggers a continuation retry, the output budget now grows progressively (2x base on retry 1, 3x on retry 2, capped at 32768) via _ephemeral_max_output_tokens. Previously the retry loop just re-sent the same token limit on all 3 attempts. --- run_agent.py | 20 +++++++++++++++++++- scripts/release.py | 1 + 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/run_agent.py b/run_agent.py index e88096a60..a0f4db548 100644 --- a/run_agent.py +++ b/run_agent.py @@ -7061,8 +7061,20 @@ class AIAgent: if self.tools: api_kwargs["tools"] = self.tools - if self.max_tokens is not None: + # ── max_tokens for chat_completions ────────────────────────────── + # Priority: ephemeral override (error recovery / length-continuation + # boost) > user-configured max_tokens > provider-specific defaults. + _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None) + if _ephemeral_out is not None: + self._ephemeral_max_output_tokens = None # consume immediately + api_kwargs.update(self._max_tokens_param(_ephemeral_out)) + elif self.max_tokens is not None: api_kwargs.update(self._max_tokens_param(self.max_tokens)) + elif "integrate.api.nvidia.com" in self._base_url_lower: + # NVIDIA NIM defaults to a very low max_tokens when omitted, + # causing models like GLM-4.7 to truncate immediately (thinking + # tokens alone exhaust the budget). 16384 provides adequate room. + api_kwargs.update(self._max_tokens_param(16384)) elif self._is_qwen_portal(): # Qwen Portal defaults to a very low max_tokens when omitted. # Reasoning models (qwen3-coder-plus) exhaust that budget on @@ -10804,6 +10816,12 @@ class AIAgent: continue if restart_with_length_continuation: + # Progressively boost the output token budget on each retry. + # Retry 1 → 2× base, retry 2 → 3× base, capped at 32 768. + # Applies to all providers via _ephemeral_max_output_tokens. + _boost_base = self.max_tokens if self.max_tokens else 4096 + _boost = _boost_base * (length_continue_retries + 1) + self._ephemeral_max_output_tokens = min(_boost, 32768) continue # Guard: if all retries exhausted without a successful response diff --git a/scripts/release.py b/scripts/release.py index 88ddb2f43..94ebef5d3 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -267,6 +267,7 @@ AUTHOR_MAP = { "aviralarora002@gmail.com": "AviArora02-commits", "junminliu@gmail.com": "JimLiu", "jarvischer@gmail.com": "maxchernin", + "levantam.98.2324@gmail.com": "LVT382009", } From 2eab7ee15f9f0283ae1a6c466b0400caa44defbb Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Sat, 18 Apr 2026 13:00:04 -0600 Subject: [PATCH 3/5] fix(gemini): hide low-TPM Gemma models from exposed lists --- agent/models_dev.py | 32 +++++++++++++++++++++--- hermes_cli/models.py | 2 -- hermes_cli/setup.py | 1 - tests/hermes_cli/test_gemini_provider.py | 23 +++++++++++++++-- 4 files changed, 50 insertions(+), 8 deletions(-) diff --git a/agent/models_dev.py b/agent/models_dev.py index 42c8925ff..cc4dbf0be 100644 --- a/agent/models_dev.py +++ b/agent/models_dev.py @@ -420,7 +420,10 @@ def list_provider_models(provider: str) -> List[str]: models = _get_provider_models(provider) if models is None: return [] - return list(models.keys()) + return [ + mid for mid in models.keys() + if not _should_hide_from_provider_catalog(provider, mid) + ] # Patterns that indicate non-agentic or noise models (TTS, embedding, @@ -432,6 +435,29 @@ _NOISE_PATTERNS: re.Pattern = re.compile( re.IGNORECASE, ) +# Google-hosted Gemma models currently have very low TPM quotas for agent-style +# traffic (for example 15K/16K TPM tiers in AI Studio) and are not practical as +# normal Hermes picks even though they advertise large context windows. Keep the +# capability metadata available for direct/manual use, but hide them from the +# Gemini model catalogs we surface in setup and model selection. +_GOOGLE_GEMMA_HIDDEN_MODELS = frozenset({ + "gemma-4-31b-it", + "gemma-4-26b-a4b-it", + "gemma-3-1b", + "gemma-3-2b", + "gemma-3-4b", + "gemma-3-12b", + "gemma-3-27b", +}) + + +def _should_hide_from_provider_catalog(provider: str, model_id: str) -> bool: + provider_lower = (provider or "").strip().lower() + model_lower = (model_id or "").strip().lower() + if provider_lower in {"gemini", "google"} and model_lower in _GOOGLE_GEMMA_HIDDEN_MODELS: + return True + return False + def list_agentic_models(provider: str) -> List[str]: """Return model IDs suitable for agentic use from models.dev. @@ -448,6 +474,8 @@ def list_agentic_models(provider: str) -> List[str]: for mid, entry in models.items(): if not isinstance(entry, dict): continue + if _should_hide_from_provider_catalog(provider, mid): + continue if not entry.get("tool_call", False): continue if _NOISE_PATTERNS.search(mid): @@ -582,5 +610,3 @@ def get_model_info( return _parse_model_info(mid, mdata, mdev_id) return None - - diff --git a/hermes_cli/models.py b/hermes_cli/models.py index cbbeef62d..a0d7c2220 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -133,8 +133,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", - # Gemma open models (also served via AI Studio) - "gemma-4-31b-it", ], "google-gemini-cli": [ "gemini-2.5-pro", diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 8770386b7..8f6b633c6 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -91,7 +91,6 @@ _DEFAULT_PROVIDER_MODELS = { "gemini": [ "gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview", "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", - "gemma-4-31b-it", ], "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"], "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"], diff --git a/tests/hermes_cli/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py index fd16e825d..7632f7691 100644 --- a/tests/hermes_cli/test_gemini_provider.py +++ b/tests/hermes_cli/test_gemini_provider.py @@ -130,7 +130,7 @@ class TestGeminiModelCatalog: models = _PROVIDER_MODELS["gemini"] assert "gemini-2.5-pro" in models assert "gemini-2.5-flash" in models - assert "gemma-4-31b-it" in models + assert "gemma-4-31b-it" not in models def test_provider_models_has_3x(self): models = _PROVIDER_MODELS["gemini"] @@ -313,9 +313,28 @@ class TestGeminiModelsDev: result = list_agentic_models("gemini") assert "gemini-3-flash-preview" in result assert "gemini-2.5-pro" in result - assert "gemma-4-31b-it" in result + assert "gemma-4-31b-it" not in result # Filtered out: assert "gemini-embedding-001" not in result # no tool_call assert "gemini-2.5-flash-preview-tts" not in result # no tool_call assert "gemini-live-2.5-flash" not in result # noise: live- assert "gemini-2.5-flash-preview-04-17" not in result # noise: dated preview + + def test_list_provider_models_hides_low_tpm_google_gemmas(self): + mock_data = { + "google": { + "models": { + "gemini-2.5-pro": {}, + "gemma-4-31b-it": {}, + "gemma-3-1b": {}, + } + } + } + with patch("agent.models_dev.fetch_models_dev", return_value=mock_data): + from agent.models_dev import list_provider_models + + result = list_provider_models("gemini") + + assert "gemini-2.5-pro" in result + assert "gemma-4-31b-it" not in result + assert "gemma-3-1b" not in result From a7dd6a34499cb8fa91579b8943d251a8c2d42021 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Sat, 18 Apr 2026 13:08:57 -0600 Subject: [PATCH 4/5] fix(gemini): hide stale and low-TPM Google models --- agent/models_dev.py | 28 ++++++++++++++++++------ tests/hermes_cli/test_gemini_provider.py | 8 +++++-- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/agent/models_dev.py b/agent/models_dev.py index cc4dbf0be..3e5c911e7 100644 --- a/agent/models_dev.py +++ b/agent/models_dev.py @@ -435,26 +435,40 @@ _NOISE_PATTERNS: re.Pattern = re.compile( re.IGNORECASE, ) -# Google-hosted Gemma models currently have very low TPM quotas for agent-style -# traffic (for example 15K/16K TPM tiers in AI Studio) and are not practical as -# normal Hermes picks even though they advertise large context windows. Keep the -# capability metadata available for direct/manual use, but hide them from the -# Gemini model catalogs we surface in setup and model selection. -_GOOGLE_GEMMA_HIDDEN_MODELS = frozenset({ +# Google's live Gemini catalogs currently include a mix of stale slugs and +# Gemma models whose TPM quotas are too small for normal Hermes agent traffic. +# Keep capability metadata available for direct/manual use, but hide these from +# the Gemini model catalogs we surface in setup and model selection. +_GOOGLE_HIDDEN_MODELS = frozenset({ + # Low-TPM Gemma models that trip Google input-token quota walls under + # agent-style traffic despite advertising large context windows. "gemma-4-31b-it", + "gemma-4-26b-it", "gemma-4-26b-a4b-it", "gemma-3-1b", + "gemma-3-1b-it", "gemma-3-2b", + "gemma-3-2b-it", "gemma-3-4b", + "gemma-3-4b-it", "gemma-3-12b", + "gemma-3-12b-it", "gemma-3-27b", + "gemma-3-27b-it", + # Stale/retired Google slugs that still surface through models.dev-backed + # Gemini selection but 404 on the current Google endpoints. + "gemini-1.5-flash", + "gemini-1.5-pro", + "gemini-1.5-flash-8b", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", }) def _should_hide_from_provider_catalog(provider: str, model_id: str) -> bool: provider_lower = (provider or "").strip().lower() model_lower = (model_id or "").strip().lower() - if provider_lower in {"gemini", "google"} and model_lower in _GOOGLE_GEMMA_HIDDEN_MODELS: + if provider_lower in {"gemini", "google"} and model_lower in _GOOGLE_HIDDEN_MODELS: return True return False diff --git a/tests/hermes_cli/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py index 7632f7691..9c6ee70aa 100644 --- a/tests/hermes_cli/test_gemini_provider.py +++ b/tests/hermes_cli/test_gemini_provider.py @@ -326,7 +326,9 @@ class TestGeminiModelsDev: "models": { "gemini-2.5-pro": {}, "gemma-4-31b-it": {}, - "gemma-3-1b": {}, + "gemma-3-27b-it": {}, + "gemini-1.5-pro": {}, + "gemini-2.0-flash": {}, } } } @@ -337,4 +339,6 @@ class TestGeminiModelsDev: assert "gemini-2.5-pro" in result assert "gemma-4-31b-it" not in result - assert "gemma-3-1b" not in result + assert "gemma-3-27b-it" not in result + assert "gemini-1.5-pro" not in result + assert "gemini-2.0-flash" not in result From ca32a2a60bd8655c001b96394e68309ba53b4550 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Sat, 18 Apr 2026 13:15:27 -0600 Subject: [PATCH 5/5] fix(gemini): restore bearer auth on openai route --- agent/auxiliary_client.py | 28 ------------------- run_agent.py | 21 -------------- tests/hermes_cli/test_gemini_provider.py | 35 +++++------------------- 3 files changed, 7 insertions(+), 77 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 126f4615d..19bde946e 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -782,15 +782,6 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: from hermes_cli.models import copilot_default_headers extra["default_headers"] = copilot_default_headers() - elif "generativelanguage.googleapis.com" in base_url.lower(): - # Google's OpenAI-compatible endpoint only accepts x-goog-api-key. - # Passing api_key= causes the SDK to inject Authorization: Bearer, - # which Google rejects with HTTP 400 "Multiple authentication - # credentials received". Use a placeholder for api_key and pass - # the real key via x-goog-api-key header instead. - # Fixes: https://github.com/NousResearch/hermes-agent/issues/7893 - extra["default_headers"] = {"x-goog-api-key": api_key} - api_key = "not-used" return OpenAI(api_key=api_key, base_url=base_url, **extra), model creds = resolve_api_key_provider_credentials(provider_id) @@ -812,15 +803,6 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: from hermes_cli.models import copilot_default_headers extra["default_headers"] = copilot_default_headers() - elif "generativelanguage.googleapis.com" in base_url.lower(): - # Google's OpenAI-compatible endpoint only accepts x-goog-api-key. - # Passing api_key= causes the SDK to inject Authorization: Bearer, - # which Google rejects with HTTP 400 "Multiple authentication - # credentials received". Use a placeholder for api_key and pass - # the real key via x-goog-api-key header instead. - # Fixes: https://github.com/NousResearch/hermes-agent/issues/7893 - extra["default_headers"] = {"x-goog-api-key": api_key} - api_key = "not-used" return OpenAI(api_key=api_key, base_url=base_url, **extra), model return None, None @@ -1666,16 +1648,6 @@ def resolve_provider_client( from hermes_cli.models import copilot_default_headers headers.update(copilot_default_headers()) - elif "generativelanguage.googleapis.com" in base_url.lower(): - # Google's OpenAI-compatible endpoint only accepts x-goog-api-key. - # Passing api_key= causes the OpenAI SDK to inject Authorization: Bearer, - # which Google rejects with HTTP 400 "Multiple authentication credentials - # received". Use a placeholder for api_key and pass the real key via - # x-goog-api-key header instead. - # Fixes: https://github.com/NousResearch/hermes-agent/issues/7893 - headers["x-goog-api-key"] = api_key - api_key = "not-used" - client = OpenAI(api_key=api_key, base_url=base_url, **({"default_headers": headers} if headers else {})) diff --git a/run_agent.py b/run_agent.py index a0f4db548..756bb62ed 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1054,16 +1054,6 @@ class AIAgent: } elif "portal.qwen.ai" in effective_base.lower(): client_kwargs["default_headers"] = _qwen_portal_headers() - elif "generativelanguage.googleapis.com" in effective_base.lower(): - # Google's OpenAI-compatible endpoint only accepts x-goog-api-key. - # The OpenAI SDK auto-injects Authorization: Bearer when api_key= is - # set to a real value, causing HTTP 400 "Multiple authentication - # credentials received". Pass a placeholder so the SDK does not - # emit Bearer, and carry the real key via x-goog-api-key instead. - # Fixes: https://github.com/NousResearch/hermes-agent/issues/7893 - real_key = client_kwargs["api_key"] - client_kwargs["api_key"] = "not-used" - client_kwargs["default_headers"] = {"x-goog-api-key": real_key} else: # No explicit creds — use the centralized provider router from agent.auxiliary_client import resolve_provider_client @@ -5245,17 +5235,6 @@ class AIAgent: self._client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"} elif "portal.qwen.ai" in normalized: self._client_kwargs["default_headers"] = _qwen_portal_headers() - elif "generativelanguage.googleapis.com" in normalized: - # Google's endpoint rejects Bearer tokens; use x-goog-api-key instead. - # Swap the real key out of api_key and into the header so the OpenAI - # SDK does not emit Authorization: Bearer. - # Fixes: https://github.com/NousResearch/hermes-agent/issues/7893 - real_key = self._client_kwargs.get("api_key", "") - if real_key and real_key != "not-used": - self._client_kwargs["api_key"] = "not-used" - self._client_kwargs["default_headers"] = { - "x-goog-api-key": real_key or self._client_kwargs.get("api_key", ""), - } else: self._client_kwargs.pop("default_headers", None) diff --git a/tests/hermes_cli/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py index 9c6ee70aa..dbb1111fc 100644 --- a/tests/hermes_cli/test_gemini_provider.py +++ b/tests/hermes_cli/test_gemini_provider.py @@ -207,14 +207,8 @@ class TestGeminiAgentInit: assert agent.api_mode == "chat_completions" assert agent.provider == "gemini" - def test_gemini_uses_x_goog_api_key_not_bearer(self, monkeypatch): - """Regression test for issue #7893. - - When provider=gemini, the OpenAI client must be constructed with - api_key='not-used' and default_headers={'x-goog-api-key': real_key}. - This prevents the SDK from injecting Authorization: Bearer, which - Google's endpoint rejects with HTTP 400. - """ + def test_gemini_uses_bearer_auth(self, monkeypatch): + """Gemini OpenAI-compatible endpoint should receive the real API key.""" monkeypatch.setenv("GOOGLE_API_KEY", "AIzaSy_REAL_KEY") real_key = "AIzaSy_REAL_KEY" with patch("run_agent.OpenAI") as mock_openai: @@ -227,37 +221,22 @@ class TestGeminiAgentInit: base_url="https://generativelanguage.googleapis.com/v1beta/openai", ) call_kwargs = mock_openai.call_args[1] - # The SDK must NOT receive the real key as api_key (which would emit Bearer) - assert call_kwargs.get("api_key") == "not-used", ( - "api_key must be 'not-used' to suppress Authorization: Bearer for Gemini" - ) - # The real key must be in x-goog-api-key header + assert call_kwargs.get("api_key") == real_key headers = call_kwargs.get("default_headers", {}) - assert headers.get("x-goog-api-key") == real_key, ( - "x-goog-api-key header must carry the real Gemini API key" - ) + assert "x-goog-api-key" not in headers def test_gemini_resolve_provider_client_auth(self, monkeypatch): - """Regression test for issue #7893 — resolve_provider_client path. - - When resolve_provider_client('gemini') is called, the returned OpenAI - client must use x-goog-api-key header, not Authorization: Bearer. - """ + """resolve_provider_client('gemini') should pass the real API key through.""" monkeypatch.setenv("GEMINI_API_KEY", "AIzaSy_TEST_KEY") real_key = "AIzaSy_TEST_KEY" with patch("agent.auxiliary_client.OpenAI") as mock_openai: mock_openai.return_value = MagicMock() - mock_openai.return_value.api_key = "not-used" from agent.auxiliary_client import resolve_provider_client resolve_provider_client("gemini") call_kwargs = mock_openai.call_args[1] - assert call_kwargs.get("api_key") == "not-used", ( - "api_key must be 'not-used' to prevent Bearer injection for Gemini" - ) + assert call_kwargs.get("api_key") == real_key headers = call_kwargs.get("default_headers", {}) - assert headers.get("x-goog-api-key") == real_key, ( - "x-goog-api-key header must carry the real Gemini API key" - ) + assert "x-goog-api-key" not in headers # ── models.dev Integration ──