fix(opencode): re-derive api_mode per target model on /model switch
opencode-zen and opencode-go each serve both anthropic_messages (e.g. minimax-m2.7) and chat_completions (e.g. deepseek-v4-flash) models behind a single base_url. The api_mode resolver in hermes_cli/runtime_provider.py honoured the persisted model_cfg.api_mode (set by the previous default model) before checking the opencode model registry, so /model deepseek-v4-flash from a session whose default was minimax-m2.7 inherited 'anthropic_messages', stripped '/v1' from base_url (the Anthropic SDK adds its own /v1/messages), and 404'd. Promote the opencode detection branch above the configured_mode check in both api_mode resolution paths: - _resolve_runtime_from_pool_entry (pool-backed providers) - _resolve_api_key_runtime (api-key providers, fallback path) Both branches now call opencode_model_api_mode(provider, effective_model) unconditionally for opencode-zen/go before considering any persisted api_mode, so the mode always reflects the model the user just switched to. Existing tests pass (12/12 in tests/hermes_cli/test_model_switch_opencode_anthropic.py). Fixes #16878
This commit is contained in:
@@ -260,11 +260,16 @@ def _resolve_runtime_from_pool_entry(
|
||||
if cfg_base_url:
|
||||
base_url = cfg_base_url
|
||||
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
|
||||
if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
|
||||
api_mode = configured_mode
|
||||
elif provider in ("opencode-zen", "opencode-go"):
|
||||
if provider in ("opencode-zen", "opencode-go"):
|
||||
# Re-derive api_mode from the effective model rather than the
|
||||
# persisted api_mode: the opencode providers serve both
|
||||
# anthropic_messages and chat_completions models, so the previous
|
||||
# session's mode must not leak across /model switches.
|
||||
# Refs #16878.
|
||||
from hermes_cli.models import opencode_model_api_mode
|
||||
api_mode = opencode_model_api_mode(provider, effective_model)
|
||||
elif configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
|
||||
api_mode = configured_mode
|
||||
else:
|
||||
# Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
|
||||
# Kimi /coding, api.openai.com → codex_responses, api.x.ai →
|
||||
@@ -1212,15 +1217,20 @@ def resolve_runtime_provider(
|
||||
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
# Only honor persisted api_mode when it belongs to the same provider family.
|
||||
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
|
||||
if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
|
||||
api_mode = configured_mode
|
||||
elif provider in ("opencode-zen", "opencode-go"):
|
||||
if provider in ("opencode-zen", "opencode-go"):
|
||||
# opencode-zen/go must always re-derive api_mode from the
|
||||
# target model (not the stale persisted api_mode), because
|
||||
# the same provider serves both anthropic_messages
|
||||
# (e.g. minimax-m2.7) and chat_completions (e.g.
|
||||
# deepseek-v4-flash) and switching models via /model would
|
||||
# otherwise carry the previous mode forward, stripping /v1
|
||||
# from base_url for chat_completions models and 404'ing.
|
||||
# Refs #16878.
|
||||
from hermes_cli.models import opencode_model_api_mode
|
||||
# Prefer the target_model from the caller (explicit mid-session
|
||||
# switch) over the stale model.default; see _resolve_runtime_from_pool_entry
|
||||
# for the same rationale.
|
||||
_effective = target_model or model_cfg.get("default", "")
|
||||
api_mode = opencode_model_api_mode(provider, _effective)
|
||||
elif configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
|
||||
api_mode = configured_mode
|
||||
else:
|
||||
# Auto-detect Anthropic-compatible endpoints by URL convention
|
||||
# (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
|
||||
|
||||
Reference in New Issue
Block a user