From 392b2bb17b659a4a797b392856d98e19a79a6920 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Tue, 21 Apr 2026 14:45:13 -0600 Subject: [PATCH] fix(auxiliary): refresh Nous runtime credentials after aux 401s --- agent/auxiliary_client.py | 174 ++++++++++++++++++++++++++- tests/agent/test_auxiliary_client.py | 76 ++++++++++++ 2 files changed, 245 insertions(+), 5 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 4f974a282..1a3853e48 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -728,6 +728,33 @@ def _nous_base_url() -> str: return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL) +def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[str, str]]: + """Return fresh Nous runtime credentials when available. + + This mirrors the main agent's 401 recovery path and keeps auxiliary + clients aligned with the singleton auth store + mint flow instead of + relying only on whatever raw tokens happen to be sitting in auth.json + or the credential pool. + """ + try: + from hermes_cli.auth import resolve_nous_runtime_credentials + + creds = resolve_nous_runtime_credentials( + min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))), + timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")), + force_mint=force_refresh, + ) + except Exception as exc: + logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc) + return None + + api_key = str(creds.get("api_key") or "").strip() + base_url = str(creds.get("base_url") or "").strip().rstrip("/") + if not api_key or not base_url: + return None + return api_key, base_url + + def _read_codex_access_token() -> Optional[str]: """Read a valid, non-expired Codex OAuth access token from Hermes auth store. @@ -894,7 +921,8 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]: pass nous = _read_nous_auth() - if not nous: + runtime = _resolve_nous_runtime_api(force_refresh=False) + if runtime is None and not nous: return None, None global auxiliary_is_nous auxiliary_is_nous = True @@ -913,10 +941,16 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]: model, "vision" if vision else "text") except Exception: pass + if runtime is not None: + api_key, base_url = runtime + else: + api_key = _nous_api_key(nous or {}) + base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/") + return ( OpenAI( - api_key=_nous_api_key(nous), - base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"), + api_key=api_key, + base_url=base_url, ), model, ) @@ -1260,6 +1294,15 @@ def _is_connection_error(exc: Exception) -> bool: return False +def _is_auth_error(exc: Exception) -> bool: + """Detect auth failures that should trigger provider-specific refresh.""" + status = getattr(exc, "status_code", None) + if status == 401: + return True + err_lower = str(exc).lower() + return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower() + + def _try_payment_fallback( failed_provider: str, task: str = None, @@ -2055,6 +2098,76 @@ _client_cache_lock = threading.Lock() _CLIENT_CACHE_MAX_SIZE = 64 # safety belt — evict oldest when exceeded +def _client_cache_key( + provider: str, + *, + async_mode: bool, + base_url: Optional[str] = None, + api_key: Optional[str] = None, + api_mode: Optional[str] = None, + main_runtime: Optional[Dict[str, Any]] = None, +) -> tuple: + runtime = _normalize_main_runtime(main_runtime) + runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else () + return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key) + + +def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None: + with _client_cache_lock: + old_entry = _client_cache.get(cache_key) + if old_entry is not None and old_entry[0] is not client: + _force_close_async_httpx(old_entry[0]) + try: + close_fn = getattr(old_entry[0], "close", None) + if callable(close_fn): + close_fn() + except Exception: + pass + _client_cache[cache_key] = (client, default_model, bound_loop) + + +def _refresh_nous_auxiliary_client( + *, + cache_provider: str, + model: Optional[str], + async_mode: bool, + base_url: Optional[str] = None, + api_key: Optional[str] = None, + api_mode: Optional[str] = None, + main_runtime: Optional[Dict[str, Any]] = None, +) -> Tuple[Optional[Any], Optional[str]]: + """Refresh Nous runtime creds, rebuild the client, and replace the cache entry.""" + runtime = _resolve_nous_runtime_api(force_refresh=True) + if runtime is None: + return None, model + + fresh_key, fresh_base_url = runtime + sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url) + final_model = model + + current_loop = None + if async_mode: + try: + import asyncio as _aio + current_loop = _aio.get_event_loop() + except RuntimeError: + pass + client, final_model = _to_async_client(sync_client, final_model or "") + else: + client = sync_client + + cache_key = _client_cache_key( + cache_provider, + async_mode=async_mode, + base_url=base_url, + api_key=api_key, + api_mode=api_mode, + main_runtime=main_runtime, + ) + _store_cached_client(cache_key, client, final_model, bound_loop=current_loop) + return client, final_model + + def neuter_async_httpx_del() -> None: """Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op. @@ -2208,8 +2321,14 @@ def _get_cached_client( except RuntimeError: pass runtime = _normalize_main_runtime(main_runtime) - runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else () - cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key) + cache_key = _client_cache_key( + provider, + async_mode=async_mode, + base_url=base_url, + api_key=api_key, + api_mode=api_mode, + main_runtime=main_runtime, + ) with _client_cache_lock: if cache_key in _client_cache: cached_client, cached_default, cached_loop = _client_cache[cache_key] @@ -2657,6 +2776,29 @@ def call_llm( raise first_err = retry_err + # ── Nous auth refresh parity with main agent ────────────────── + client_is_nous = ( + resolved_provider == "nous" + or base_url_host_matches(_base_info, "inference-api.nousresearch.com") + ) + if _is_auth_error(first_err) and client_is_nous: + refreshed_client, refreshed_model = _refresh_nous_auxiliary_client( + cache_provider=resolved_provider or "nous", + model=final_model, + async_mode=False, + base_url=resolved_base_url, + api_key=resolved_api_key, + api_mode=resolved_api_mode, + main_runtime=main_runtime, + ) + if refreshed_client is not None: + logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying", + task or "call") + if refreshed_model and refreshed_model != kwargs.get("model"): + kwargs["model"] = refreshed_model + return _validate_llm_response( + refreshed_client.chat.completions.create(**kwargs), task) + # ── Payment / credit exhaustion fallback ────────────────────── # When the resolved provider returns 402 or a credit-related error, # try alternative providers instead of giving up. This handles the @@ -2855,6 +2997,28 @@ async def async_call_llm( raise first_err = retry_err + # ── Nous auth refresh parity with main agent ────────────────── + client_is_nous = ( + resolved_provider == "nous" + or base_url_host_matches(_client_base, "inference-api.nousresearch.com") + ) + if _is_auth_error(first_err) and client_is_nous: + refreshed_client, refreshed_model = _refresh_nous_auxiliary_client( + cache_provider=resolved_provider or "nous", + model=final_model, + async_mode=True, + base_url=resolved_base_url, + api_key=resolved_api_key, + api_mode=resolved_api_mode, + ) + if refreshed_client is not None: + logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying", + task or "call") + if refreshed_model and refreshed_model != kwargs.get("model"): + kwargs["model"] = refreshed_model + return _validate_llm_response( + await refreshed_client.chat.completions.create(**kwargs), task) + # ── Payment / connection fallback (mirrors sync call_llm) ───── should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err) is_auto = resolved_provider in ("auto", "", None) diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 2285a58f4..b6958b08f 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -476,6 +476,82 @@ class TestGetTextAuxiliaryClient: assert isinstance(client, CodexAuxiliaryClient) assert model == "gpt-5.2-codex" + +class TestNousAuxiliaryRefresh: + def test_try_nous_prefers_runtime_credentials(self): + fresh_base = "https://inference-api.nousresearch.com/v1" + with ( + patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}), + patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)), + patch("agent.auxiliary_client.OpenAI") as mock_openai, + ): + from agent.auxiliary_client import _try_nous + + mock_openai.return_value = MagicMock() + client, model = _try_nous() + + assert client is not None + assert model == "google/gemini-3-flash-preview" + assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key" + assert mock_openai.call_args.kwargs["base_url"] == fresh_base + + def test_call_llm_retries_nous_after_401(self): + class _Auth401(Exception): + status_code = 401 + + stale_client = MagicMock() + stale_client.base_url = "https://inference-api.nousresearch.com/v1" + stale_client.chat.completions.create.side_effect = _Auth401("stale nous key") + + fresh_client = MagicMock() + fresh_client.base_url = "https://inference-api.nousresearch.com/v1" + fresh_client.chat.completions.create.return_value = {"ok": True} + + with ( + patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)), + patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")), + patch("agent.auxiliary_client.OpenAI", return_value=fresh_client), + patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp), + patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")), + ): + result = call_llm( + task="compression", + messages=[{"role": "user", "content": "hi"}], + ) + + assert result == {"ok": True} + assert stale_client.chat.completions.create.call_count == 1 + assert fresh_client.chat.completions.create.call_count == 1 + + @pytest.mark.asyncio + async def test_async_call_llm_retries_nous_after_401(self): + class _Auth401(Exception): + status_code = 401 + + stale_client = MagicMock() + stale_client.base_url = "https://inference-api.nousresearch.com/v1" + stale_client.chat.completions.create = AsyncMock(side_effect=_Auth401("stale nous key")) + + fresh_async_client = MagicMock() + fresh_async_client.base_url = "https://inference-api.nousresearch.com/v1" + fresh_async_client.chat.completions.create = AsyncMock(return_value={"ok": True}) + + with ( + patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)), + patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")), + patch("agent.auxiliary_client._to_async_client", return_value=(fresh_async_client, "nous-model")), + patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp), + patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")), + ): + result = await async_call_llm( + task="session_search", + messages=[{"role": "user", "content": "hi"}], + ) + + assert result == {"ok": True} + assert stale_client.chat.completions.create.await_count == 1 + assert fresh_async_client.chat.completions.create.await_count == 1 + # ── Payment / credit exhaustion fallback ─────────────────────────────────