diff --git a/agent/credential_pool.py b/agent/credential_pool.py index 4f1395d17..98534e902 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -456,6 +456,70 @@ class CredentialPool: logger.debug("Failed to sync from credentials file: %s", exc) return entry + def _sync_codex_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential: + """Sync a Codex device_code pool entry from auth.json if tokens differ. + + When a Codex OAuth access token expires (or the ChatGPT account hits + its 5h/weekly quota), the pool entry gets marked ``STATUS_EXHAUSTED`` + with a ``last_error_reset_at`` that can be many hours in the future. + Meanwhile the user may run ``hermes model`` / ``hermes auth`` which + performs a fresh device-code login and writes new tokens to + ``auth.json`` under ``_auth_store_lock``. Without this sync the pool + entry stays frozen until ``last_error_reset_at`` elapses — even + though fresh credentials are sitting on disk — and every request + fails with "no available entries (all exhausted or empty)". + + Mirrors the Nous/Anthropic resync paths above. Only applies to + device_code-sourced entries; env/API-key-sourced entries have no + auth.json shadow to sync from. + """ + if self.provider != "openai-codex" or entry.source != "device_code": + return entry + try: + with _auth_store_lock(): + auth_store = _load_auth_store() + state = _load_provider_state(auth_store, "openai-codex") + if not isinstance(state, dict): + return entry + tokens = state.get("tokens") + if not isinstance(tokens, dict): + return entry + store_access = tokens.get("access_token", "") + store_refresh = tokens.get("refresh_token", "") + # Adopt auth.json tokens when either side differs. Codex refresh + # tokens are single-use too, so a fresh refresh_token from + # another process means our entry's pair is consumed/stale. + entry_access = entry.access_token or "" + entry_refresh = entry.refresh_token or "" + if store_access and ( + store_access != entry_access + or (store_refresh and store_refresh != entry_refresh) + ): + logger.debug( + "Pool entry %s: syncing Codex tokens from auth.json " + "(refreshed by another process)", + entry.id, + ) + field_updates: Dict[str, Any] = { + "access_token": store_access, + "refresh_token": store_refresh or entry.refresh_token, + "last_status": None, + "last_status_at": None, + "last_error_code": None, + "last_error_reason": None, + "last_error_message": None, + "last_error_reset_at": None, + } + if state.get("last_refresh"): + field_updates["last_refresh"] = state["last_refresh"] + updated = replace(entry, **field_updates) + self._replace_entry(entry, updated) + self._persist() + return updated + except Exception as exc: + logger.debug("Failed to sync Codex entry from auth.json: %s", exc) + return entry + def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential: """Sync a Nous pool entry from auth.json if tokens differ. @@ -788,6 +852,18 @@ class CredentialPool: if synced is not entry: entry = synced cleared_any = True + # For openai-codex entries, same pattern: the user may have + # re-authed via `hermes model` / `hermes auth` after a 429/401, + # leaving fresh tokens on disk while the pool entry is still + # frozen behind last_error_reset_at (can be hours in the + # future for ChatGPT weekly windows). + if (self.provider == "openai-codex" + and entry.source == "device_code" + and entry.last_status == STATUS_EXHAUSTED): + synced = self._sync_codex_entry_from_auth_store(entry) + if synced is not entry: + entry = synced + cleared_any = True if entry.last_status == STATUS_EXHAUSTED: exhausted_until = _exhausted_until(entry) if exhausted_until is not None and now < exhausted_until: diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index 64ac4915f..dd9d2d164 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -517,7 +517,14 @@ def run_doctor(args): if shutil.which("codex"): check_ok("codex CLI") else: - check_warn("codex CLI not found", "(required for openai-codex login)") + # Native OAuth uses Hermes' own device-code flow — the Codex CLI is + # only needed if you want to import existing tokens from + # ~/.codex/auth.json. Downgrade to info so users running + # `hermes auth openai-codex` aren't told they're missing something. + check_info( + "codex CLI not installed " + "(optional — only required to import tokens from an existing Codex CLI login)" + ) # ========================================================================= # Check: Directory structure diff --git a/tests/agent/test_credential_pool.py b/tests/agent/test_credential_pool.py index 7f3a835f1..70e59f17a 100644 --- a/tests/agent/test_credential_pool.py +++ b/tests/agent/test_credential_pool.py @@ -1370,3 +1370,143 @@ def test_nous_exhausted_entry_recovers_via_auth_store_sync(tmp_path, monkeypatch assert len(available) == 1 assert available[0].refresh_token == "refresh-FRESH" assert available[0].last_status is None + + +# ── OpenAI Codex OAuth cross-process sync tests ──────────────────────────── + +def _codex_auth_store(access: str, refresh: str) -> dict: + return { + "version": 1, + "active_provider": "openai-codex", + "providers": { + "openai-codex": { + "auth_mode": "chatgpt", + "tokens": { + "access_token": access, + "refresh_token": refresh, + "id_token": "id-" + access, + }, + "last_refresh": "2026-04-28T00:00:00Z", + } + }, + } + + +def test_sync_codex_entry_from_auth_store_adopts_newer_tokens(tmp_path, monkeypatch): + """When auth.json has newer Codex tokens, the pool entry should adopt them.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store(tmp_path, _codex_auth_store("access-OLD", "refresh-OLD")) + + from agent.credential_pool import load_pool + + pool = load_pool("openai-codex") + entry = pool.select() + assert entry is not None + assert entry.access_token == "access-OLD" + assert entry.refresh_token == "refresh-OLD" + + # Simulate `hermes auth openai-codex` replacing the token pair on disk. + _write_auth_store(tmp_path, _codex_auth_store("access-NEW", "refresh-NEW")) + + synced = pool._sync_codex_entry_from_auth_store(entry) + assert synced is not entry + assert synced.access_token == "access-NEW" + assert synced.refresh_token == "refresh-NEW" + assert synced.last_status is None + assert synced.last_error_code is None + assert synced.last_error_reset_at is None + + +def test_sync_codex_entry_noop_when_tokens_match(tmp_path, monkeypatch): + """When auth.json has the same tokens, sync should be a no-op.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store(tmp_path, _codex_auth_store("access-same", "refresh-same")) + + from agent.credential_pool import load_pool + + pool = load_pool("openai-codex") + entry = pool.select() + assert entry is not None + + synced = pool._sync_codex_entry_from_auth_store(entry) + assert synced is entry + + +def test_codex_exhausted_entry_recovers_via_auth_store_sync(tmp_path, monkeypatch): + """An exhausted Codex entry should recover when auth.json has newer tokens. + + Reproduces the Discord report (p1aceho1der, Apr 2026): after a Codex + rate-limit reset the user ran `hermes model` to reauth, but the pool + entry stayed marked EXHAUSTED with last_error_reset_at many hours in + the future — so `_available_entries` kept returning empty and every + request failed with "no available entries (all exhausted or empty)". + """ + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + from agent.credential_pool import load_pool, STATUS_EXHAUSTED + from dataclasses import replace as dc_replace + + _write_auth_store(tmp_path, _codex_auth_store("access-OLD", "refresh-OLD")) + + pool = load_pool("openai-codex") + entry = pool.select() + assert entry is not None + + # Mark entry as exhausted with last_error_reset_at one hour in the + # future (Codex 429 weekly-window pattern). + now = time.time() + exhausted = dc_replace( + entry, + last_status=STATUS_EXHAUSTED, + last_status_at=now, + last_error_code=429, + last_error_reset_at=now + 3600, + ) + pool._replace_entry(entry, exhausted) + pool._persist() + + # Sanity: before the reauth, _available_entries refuses to return + # this entry because last_error_reset_at is in the future. + # (clear_expired would only clear it AFTER exhausted_until elapsed.) + available_before = pool._available_entries(clear_expired=True, refresh=False) + assert available_before == [] + + # Simulate `hermes model` / `hermes auth` refreshing the tokens. + _write_auth_store(tmp_path, _codex_auth_store("access-FRESH", "refresh-FRESH")) + + available = pool._available_entries(clear_expired=True, refresh=False) + assert len(available) == 1 + assert available[0].access_token == "access-FRESH" + assert available[0].refresh_token == "refresh-FRESH" + assert available[0].last_status is None + assert available[0].last_error_reset_at is None + + +def test_codex_exhausted_entry_stays_stuck_without_auth_store_update(tmp_path, monkeypatch): + """Regression guard: if auth.json tokens haven't changed, the exhausted + entry must stay stuck behind its reset window — sync must not spuriously + clear status just because the entry is STATUS_EXHAUSTED.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + from agent.credential_pool import load_pool, STATUS_EXHAUSTED + from dataclasses import replace as dc_replace + + _write_auth_store(tmp_path, _codex_auth_store("access-same", "refresh-same")) + + pool = load_pool("openai-codex") + entry = pool.select() + assert entry is not None + + now = time.time() + exhausted = dc_replace( + entry, + last_status=STATUS_EXHAUSTED, + last_status_at=now, + last_error_code=429, + last_error_reset_at=now + 3600, + ) + pool._replace_entry(entry, exhausted) + pool._persist() + + # auth.json unchanged → sync returns same entry → exhausted_until check + # still skips it. + available = pool._available_entries(clear_expired=True, refresh=False) + assert available == []