From b52ceccfa8ccc36f17314b0368a36b782d6999e8 Mon Sep 17 00:00:00 2001 From: Sanjay <51058514+Sanjays2402@users.noreply.github.com> Date: Tue, 28 Apr 2026 00:52:05 -0700 Subject: [PATCH] fix(opencode): re-derive api_mode per target model on /model switch opencode-zen and opencode-go each serve both anthropic_messages (e.g. minimax-m2.7) and chat_completions (e.g. deepseek-v4-flash) models behind a single base_url. The api_mode resolver in hermes_cli/runtime_provider.py honoured the persisted model_cfg.api_mode (set by the previous default model) before checking the opencode model registry, so /model deepseek-v4-flash from a session whose default was minimax-m2.7 inherited 'anthropic_messages', stripped '/v1' from base_url (the Anthropic SDK adds its own /v1/messages), and 404'd. Promote the opencode detection branch above the configured_mode check in both api_mode resolution paths: - _resolve_runtime_from_pool_entry (pool-backed providers) - _resolve_api_key_runtime (api-key providers, fallback path) Both branches now call opencode_model_api_mode(provider, effective_model) unconditionally for opencode-zen/go before considering any persisted api_mode, so the mode always reflects the model the user just switched to. Existing tests pass (12/12 in tests/hermes_cli/test_model_switch_opencode_anthropic.py). Fixes #16878 --- hermes_cli/runtime_provider.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 1fe5acc2b..d625b75ba 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -260,11 +260,16 @@ def _resolve_runtime_from_pool_entry( if cfg_base_url: base_url = cfg_base_url configured_mode = _parse_api_mode(model_cfg.get("api_mode")) - if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider): - api_mode = configured_mode - elif provider in ("opencode-zen", "opencode-go"): + if provider in ("opencode-zen", "opencode-go"): + # Re-derive api_mode from the effective model rather than the + # persisted api_mode: the opencode providers serve both + # anthropic_messages and chat_completions models, so the previous + # session's mode must not leak across /model switches. + # Refs #16878. from hermes_cli.models import opencode_model_api_mode api_mode = opencode_model_api_mode(provider, effective_model) + elif configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider): + api_mode = configured_mode else: # Auto-detect Anthropic-compatible endpoints (/anthropic suffix, # Kimi /coding, api.openai.com → codex_responses, api.x.ai → @@ -1212,15 +1217,20 @@ def resolve_runtime_provider( configured_provider = str(model_cfg.get("provider") or "").strip().lower() # Only honor persisted api_mode when it belongs to the same provider family. configured_mode = _parse_api_mode(model_cfg.get("api_mode")) - if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider): - api_mode = configured_mode - elif provider in ("opencode-zen", "opencode-go"): + if provider in ("opencode-zen", "opencode-go"): + # opencode-zen/go must always re-derive api_mode from the + # target model (not the stale persisted api_mode), because + # the same provider serves both anthropic_messages + # (e.g. minimax-m2.7) and chat_completions (e.g. + # deepseek-v4-flash) and switching models via /model would + # otherwise carry the previous mode forward, stripping /v1 + # from base_url for chat_completions models and 404'ing. + # Refs #16878. from hermes_cli.models import opencode_model_api_mode - # Prefer the target_model from the caller (explicit mid-session - # switch) over the stale model.default; see _resolve_runtime_from_pool_entry - # for the same rationale. _effective = target_model or model_cfg.get("default", "") api_mode = opencode_model_api_mode(provider, _effective) + elif configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider): + api_mode = configured_mode else: # Auto-detect Anthropic-compatible endpoints by URL convention # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)