fix(auxiliary): refresh Nous runtime credentials after aux 401s
This commit is contained in:
@@ -728,6 +728,33 @@ def _nous_base_url() -> str:
|
||||
return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
|
||||
|
||||
|
||||
def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[str, str]]:
|
||||
"""Return fresh Nous runtime credentials when available.
|
||||
|
||||
This mirrors the main agent's 401 recovery path and keeps auxiliary
|
||||
clients aligned with the singleton auth store + mint flow instead of
|
||||
relying only on whatever raw tokens happen to be sitting in auth.json
|
||||
or the credential pool.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.auth import resolve_nous_runtime_credentials
|
||||
|
||||
creds = resolve_nous_runtime_credentials(
|
||||
min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
|
||||
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
|
||||
force_mint=force_refresh,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
|
||||
return None
|
||||
|
||||
api_key = str(creds.get("api_key") or "").strip()
|
||||
base_url = str(creds.get("base_url") or "").strip().rstrip("/")
|
||||
if not api_key or not base_url:
|
||||
return None
|
||||
return api_key, base_url
|
||||
|
||||
|
||||
def _read_codex_access_token() -> Optional[str]:
|
||||
"""Read a valid, non-expired Codex OAuth access token from Hermes auth store.
|
||||
|
||||
@@ -894,7 +921,8 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
pass
|
||||
|
||||
nous = _read_nous_auth()
|
||||
if not nous:
|
||||
runtime = _resolve_nous_runtime_api(force_refresh=False)
|
||||
if runtime is None and not nous:
|
||||
return None, None
|
||||
global auxiliary_is_nous
|
||||
auxiliary_is_nous = True
|
||||
@@ -913,10 +941,16 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
model, "vision" if vision else "text")
|
||||
except Exception:
|
||||
pass
|
||||
if runtime is not None:
|
||||
api_key, base_url = runtime
|
||||
else:
|
||||
api_key = _nous_api_key(nous or {})
|
||||
base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
|
||||
|
||||
return (
|
||||
OpenAI(
|
||||
api_key=_nous_api_key(nous),
|
||||
base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"),
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
),
|
||||
model,
|
||||
)
|
||||
@@ -1260,6 +1294,15 @@ def _is_connection_error(exc: Exception) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def _is_auth_error(exc: Exception) -> bool:
|
||||
"""Detect auth failures that should trigger provider-specific refresh."""
|
||||
status = getattr(exc, "status_code", None)
|
||||
if status == 401:
|
||||
return True
|
||||
err_lower = str(exc).lower()
|
||||
return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
|
||||
|
||||
|
||||
def _try_payment_fallback(
|
||||
failed_provider: str,
|
||||
task: str = None,
|
||||
@@ -2055,6 +2098,76 @@ _client_cache_lock = threading.Lock()
|
||||
_CLIENT_CACHE_MAX_SIZE = 64 # safety belt — evict oldest when exceeded
|
||||
|
||||
|
||||
def _client_cache_key(
|
||||
provider: str,
|
||||
*,
|
||||
async_mode: bool,
|
||||
base_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
api_mode: Optional[str] = None,
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
) -> tuple:
|
||||
runtime = _normalize_main_runtime(main_runtime)
|
||||
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
|
||||
return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
|
||||
|
||||
|
||||
def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
|
||||
with _client_cache_lock:
|
||||
old_entry = _client_cache.get(cache_key)
|
||||
if old_entry is not None and old_entry[0] is not client:
|
||||
_force_close_async_httpx(old_entry[0])
|
||||
try:
|
||||
close_fn = getattr(old_entry[0], "close", None)
|
||||
if callable(close_fn):
|
||||
close_fn()
|
||||
except Exception:
|
||||
pass
|
||||
_client_cache[cache_key] = (client, default_model, bound_loop)
|
||||
|
||||
|
||||
def _refresh_nous_auxiliary_client(
|
||||
*,
|
||||
cache_provider: str,
|
||||
model: Optional[str],
|
||||
async_mode: bool,
|
||||
base_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
api_mode: Optional[str] = None,
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
|
||||
runtime = _resolve_nous_runtime_api(force_refresh=True)
|
||||
if runtime is None:
|
||||
return None, model
|
||||
|
||||
fresh_key, fresh_base_url = runtime
|
||||
sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
|
||||
final_model = model
|
||||
|
||||
current_loop = None
|
||||
if async_mode:
|
||||
try:
|
||||
import asyncio as _aio
|
||||
current_loop = _aio.get_event_loop()
|
||||
except RuntimeError:
|
||||
pass
|
||||
client, final_model = _to_async_client(sync_client, final_model or "")
|
||||
else:
|
||||
client = sync_client
|
||||
|
||||
cache_key = _client_cache_key(
|
||||
cache_provider,
|
||||
async_mode=async_mode,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
api_mode=api_mode,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
_store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
|
||||
return client, final_model
|
||||
|
||||
|
||||
def neuter_async_httpx_del() -> None:
|
||||
"""Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op.
|
||||
|
||||
@@ -2208,8 +2321,14 @@ def _get_cached_client(
|
||||
except RuntimeError:
|
||||
pass
|
||||
runtime = _normalize_main_runtime(main_runtime)
|
||||
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
|
||||
cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
|
||||
cache_key = _client_cache_key(
|
||||
provider,
|
||||
async_mode=async_mode,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
api_mode=api_mode,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
with _client_cache_lock:
|
||||
if cache_key in _client_cache:
|
||||
cached_client, cached_default, cached_loop = _client_cache[cache_key]
|
||||
@@ -2657,6 +2776,29 @@ def call_llm(
|
||||
raise
|
||||
first_err = retry_err
|
||||
|
||||
# ── Nous auth refresh parity with main agent ──────────────────
|
||||
client_is_nous = (
|
||||
resolved_provider == "nous"
|
||||
or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
|
||||
)
|
||||
if _is_auth_error(first_err) and client_is_nous:
|
||||
refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
|
||||
cache_provider=resolved_provider or "nous",
|
||||
model=final_model,
|
||||
async_mode=False,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
api_mode=resolved_api_mode,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
if refreshed_client is not None:
|
||||
logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
|
||||
task or "call")
|
||||
if refreshed_model and refreshed_model != kwargs.get("model"):
|
||||
kwargs["model"] = refreshed_model
|
||||
return _validate_llm_response(
|
||||
refreshed_client.chat.completions.create(**kwargs), task)
|
||||
|
||||
# ── Payment / credit exhaustion fallback ──────────────────────
|
||||
# When the resolved provider returns 402 or a credit-related error,
|
||||
# try alternative providers instead of giving up. This handles the
|
||||
@@ -2855,6 +2997,28 @@ async def async_call_llm(
|
||||
raise
|
||||
first_err = retry_err
|
||||
|
||||
# ── Nous auth refresh parity with main agent ──────────────────
|
||||
client_is_nous = (
|
||||
resolved_provider == "nous"
|
||||
or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
|
||||
)
|
||||
if _is_auth_error(first_err) and client_is_nous:
|
||||
refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
|
||||
cache_provider=resolved_provider or "nous",
|
||||
model=final_model,
|
||||
async_mode=True,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
api_mode=resolved_api_mode,
|
||||
)
|
||||
if refreshed_client is not None:
|
||||
logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
|
||||
task or "call")
|
||||
if refreshed_model and refreshed_model != kwargs.get("model"):
|
||||
kwargs["model"] = refreshed_model
|
||||
return _validate_llm_response(
|
||||
await refreshed_client.chat.completions.create(**kwargs), task)
|
||||
|
||||
# ── Payment / connection fallback (mirrors sync call_llm) ─────
|
||||
should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
|
||||
is_auto = resolved_provider in ("auto", "", None)
|
||||
|
||||
@@ -476,6 +476,82 @@ class TestGetTextAuxiliaryClient:
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
assert model == "gpt-5.2-codex"
|
||||
|
||||
|
||||
class TestNousAuxiliaryRefresh:
|
||||
def test_try_nous_prefers_runtime_credentials(self):
|
||||
fresh_base = "https://inference-api.nousresearch.com/v1"
|
||||
with (
|
||||
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
|
||||
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||
):
|
||||
from agent.auxiliary_client import _try_nous
|
||||
|
||||
mock_openai.return_value = MagicMock()
|
||||
client, model = _try_nous()
|
||||
|
||||
assert client is not None
|
||||
assert model == "google/gemini-3-flash-preview"
|
||||
assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key"
|
||||
assert mock_openai.call_args.kwargs["base_url"] == fresh_base
|
||||
|
||||
def test_call_llm_retries_nous_after_401(self):
|
||||
class _Auth401(Exception):
|
||||
status_code = 401
|
||||
|
||||
stale_client = MagicMock()
|
||||
stale_client.base_url = "https://inference-api.nousresearch.com/v1"
|
||||
stale_client.chat.completions.create.side_effect = _Auth401("stale nous key")
|
||||
|
||||
fresh_client = MagicMock()
|
||||
fresh_client.base_url = "https://inference-api.nousresearch.com/v1"
|
||||
fresh_client.chat.completions.create.return_value = {"ok": True}
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
|
||||
patch("agent.auxiliary_client.OpenAI", return_value=fresh_client),
|
||||
patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
|
||||
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
|
||||
):
|
||||
result = call_llm(
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
||||
assert result == {"ok": True}
|
||||
assert stale_client.chat.completions.create.call_count == 1
|
||||
assert fresh_client.chat.completions.create.call_count == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_call_llm_retries_nous_after_401(self):
|
||||
class _Auth401(Exception):
|
||||
status_code = 401
|
||||
|
||||
stale_client = MagicMock()
|
||||
stale_client.base_url = "https://inference-api.nousresearch.com/v1"
|
||||
stale_client.chat.completions.create = AsyncMock(side_effect=_Auth401("stale nous key"))
|
||||
|
||||
fresh_async_client = MagicMock()
|
||||
fresh_async_client.base_url = "https://inference-api.nousresearch.com/v1"
|
||||
fresh_async_client.chat.completions.create = AsyncMock(return_value={"ok": True})
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
|
||||
patch("agent.auxiliary_client._to_async_client", return_value=(fresh_async_client, "nous-model")),
|
||||
patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
|
||||
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
|
||||
):
|
||||
result = await async_call_llm(
|
||||
task="session_search",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
||||
assert result == {"ok": True}
|
||||
assert stale_client.chat.completions.create.await_count == 1
|
||||
assert fresh_async_client.chat.completions.create.await_count == 1
|
||||
|
||||
# ── Payment / credit exhaustion fallback ─────────────────────────────────
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user