diff --git a/gateway/run.py b/gateway/run.py index 05515e243..659ba8013 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -481,6 +481,7 @@ class GatewayRunner: self._prefill_messages = self._load_prefill_messages() self._ephemeral_system_prompt = self._load_ephemeral_system_prompt() self._reasoning_config = self._load_reasoning_config() + self._service_tier = self._load_service_tier() self._show_reasoning = self._load_show_reasoning() self._provider_routing = self._load_provider_routing() self._fallback_model = self._load_fallback_model() @@ -776,6 +777,7 @@ class GatewayRunner: def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict: from agent.smart_model_routing import resolve_turn_route + from hermes_cli.models import resolve_fast_mode_overrides primary = { "model": model, @@ -787,7 +789,19 @@ class GatewayRunner: "args": list(runtime_kwargs.get("args") or []), "credential_pool": runtime_kwargs.get("credential_pool"), } - return resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary) + route = resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary) + + service_tier = getattr(self, "_service_tier", None) + if not service_tier: + route["request_overrides"] = None + return route + + try: + overrides = resolve_fast_mode_overrides(route.get("model")) + except Exception: + overrides = None + route["request_overrides"] = overrides + return route async def _handle_adapter_fatal_error(self, adapter: BasePlatformAdapter) -> None: """React to an adapter failure after startup. @@ -939,6 +953,33 @@ class GatewayRunner: logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort) return result + @staticmethod + def _load_service_tier() -> str | None: + """Load Priority Processing setting from config.yaml. + + Reads agent.service_tier from config.yaml. Accepted values mirror the CLI: + "fast"/"priority"/"on" => "priority", while "normal"/"off" disables it. + Returns None when unset or unsupported. + """ + raw = "" + try: + import yaml as _y + cfg_path = _hermes_home / "config.yaml" + if cfg_path.exists(): + with open(cfg_path, encoding="utf-8") as _f: + cfg = _y.safe_load(_f) or {} + raw = str(cfg.get("agent", {}).get("service_tier", "") or "").strip() + except Exception: + pass + + value = raw.lower() + if not value or value in {"normal", "default", "standard", "off", "none"}: + return None + if value in {"fast", "priority", "on"}: + return "priority" + logger.warning("Unknown service_tier '%s', ignoring", raw) + return None + @staticmethod def _load_show_reasoning() -> bool: """Load show_reasoning toggle from config.yaml display section.""" @@ -2088,6 +2129,9 @@ class GatewayRunner: if canonical == "reasoning": return await self._handle_reasoning_command(event) + if canonical == "fast": + return await self._handle_fast_command(event) + if canonical == "verbose": return await self._handle_verbose_command(event) @@ -4602,6 +4646,7 @@ class GatewayRunner: max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90")) reasoning_config = self._load_reasoning_config() self._reasoning_config = reasoning_config + self._service_tier = self._load_service_tier() turn_route = self._resolve_turn_agent_config(prompt, model, runtime_kwargs) def run_sync(): @@ -4613,6 +4658,8 @@ class GatewayRunner: verbose_logging=False, enabled_toolsets=enabled_toolsets, reasoning_config=reasoning_config, + service_tier=self._service_tier, + request_overrides=turn_route.get("request_overrides"), providers_allowed=pr.get("only"), providers_ignored=pr.get("ignore"), providers_order=pr.get("order"), @@ -4762,6 +4809,7 @@ class GatewayRunner: model = _resolve_gateway_model(user_config) platform_key = _platform_config_key(source.platform) reasoning_config = self._load_reasoning_config() + self._service_tier = self._load_service_tier() turn_route = self._resolve_turn_agent_config(question, model, runtime_kwargs) pr = self._provider_routing @@ -4788,6 +4836,8 @@ class GatewayRunner: verbose_logging=False, enabled_toolsets=[], reasoning_config=reasoning_config, + service_tier=self._service_tier, + request_overrides=turn_route.get("request_overrides"), providers_allowed=pr.get("only"), providers_ignored=pr.get("ignore"), providers_order=pr.get("order"), @@ -4941,6 +4991,66 @@ class GatewayRunner: else: return f"🧠 ✓ Reasoning effort set to `{effort}` (this session only)" + async def _handle_fast_command(self, event: MessageEvent) -> str: + """Handle /fast — mirror the CLI Priority Processing toggle in gateway chats.""" + import yaml + from hermes_cli.models import model_supports_fast_mode + + args = event.get_command_args().strip().lower() + config_path = _hermes_home / "config.yaml" + self._service_tier = self._load_service_tier() + + user_config = _load_gateway_config() + model = _resolve_gateway_model(user_config) + if not model_supports_fast_mode(model): + return "⚡ /fast is only available for OpenAI models that support Priority Processing." + + def _save_config_key(key_path: str, value): + """Save a dot-separated key to config.yaml.""" + try: + user_config = {} + if config_path.exists(): + with open(config_path, encoding="utf-8") as f: + user_config = yaml.safe_load(f) or {} + keys = key_path.split(".") + current = user_config + for k in keys[:-1]: + if k not in current or not isinstance(current[k], dict): + current[k] = {} + current = current[k] + current[keys[-1]] = value + atomic_yaml_write(config_path, user_config) + return True + except Exception as e: + logger.error("Failed to save config key %s: %s", key_path, e) + return False + + if not args or args == "status": + status = "fast" if self._service_tier == "priority" else "normal" + return ( + "⚡ Priority Processing\n\n" + f"Current mode: `{status}`\n\n" + "_Usage:_ `/fast `" + ) + + if args in {"fast", "on"}: + self._service_tier = "priority" + saved_value = "fast" + label = "FAST" + elif args in {"normal", "off"}: + self._service_tier = None + saved_value = "normal" + label = "NORMAL" + else: + return ( + f"⚠️ Unknown argument: `{args}`\n\n" + "**Valid options:** normal, fast, status" + ) + + if _save_config_key("agent.service_tier", saved_value): + return f"⚡ ✓ Priority Processing: **{label}** (saved to config)\n_(takes effect on next message)_" + return f"⚡ ✓ Priority Processing: **{label}** (this session only)" + async def _handle_yolo_command(self, event: MessageEvent) -> str: """Handle /yolo — toggle dangerous command approval bypass for this session only.""" from tools.approval import ( @@ -6771,6 +6881,7 @@ class GatewayRunner: pr = self._provider_routing reasoning_config = self._load_reasoning_config() self._reasoning_config = reasoning_config + self._service_tier = self._load_service_tier() # Set up streaming consumer if enabled _stream_consumer = None _stream_delta_cb = None @@ -6833,6 +6944,8 @@ class GatewayRunner: ephemeral_system_prompt=combined_ephemeral or None, prefill_messages=self._prefill_messages or None, reasoning_config=reasoning_config, + service_tier=self._service_tier, + request_overrides=turn_route.get("request_overrides"), providers_allowed=pr.get("only"), providers_ignored=pr.get("ignore"), providers_order=pr.get("order"), @@ -6857,6 +6970,8 @@ class GatewayRunner: agent.stream_delta_callback = _stream_delta_cb agent.status_callback = _status_callback_sync agent.reasoning_config = reasoning_config + agent.service_tier = self._service_tier + agent.request_overrides = turn_route.get("request_overrides") # Background review delivery — send "💾 Memory updated" etc. to user def _bg_review_send(message: str) -> None: diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 4fee4c3e4..84ec873a3 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -110,7 +110,7 @@ COMMAND_REGISTRY: list[CommandDef] = [ args_hint="[level|show|hide]", subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off")), CommandDef("fast", "Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode (Normal/Fast)", "Configuration", - cli_only=True, args_hint="[normal|fast|status]", + args_hint="[normal|fast|status]", subcommands=("normal", "fast", "status", "on", "off")), CommandDef("skin", "Show or change the display skin/theme", "Configuration", cli_only=True, args_hint="[name]"), diff --git a/tests/gateway/test_fast_command.py b/tests/gateway/test_fast_command.py new file mode 100644 index 000000000..60b994902 --- /dev/null +++ b/tests/gateway/test_fast_command.py @@ -0,0 +1,190 @@ +"""Tests for gateway /fast support and Priority Processing routing.""" + +import sys +import threading +import types +from types import SimpleNamespace +from unittest.mock import AsyncMock, patch + +import pytest +import yaml + +import gateway.run as gateway_run +from gateway.config import Platform +from gateway.platforms.base import MessageEvent +from gateway.session import SessionSource + + +class _CapturingAgent: + last_init = None + last_run = None + + def __init__(self, *args, **kwargs): + type(self).last_init = dict(kwargs) + self.tools = [] + + def run_conversation(self, user_message, conversation_history=None, task_id=None, persist_user_message=None): + type(self).last_run = { + "user_message": user_message, + "conversation_history": conversation_history, + "task_id": task_id, + "persist_user_message": persist_user_message, + } + return { + "final_response": "ok", + "messages": [], + "api_calls": 1, + "completed": True, + } + + +def _install_fake_agent(monkeypatch): + fake_run_agent = types.ModuleType("run_agent") + fake_run_agent.AIAgent = _CapturingAgent + monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent) + + +def _make_runner(): + runner = object.__new__(gateway_run.GatewayRunner) + runner.adapters = {} + runner._ephemeral_system_prompt = "" + runner._prefill_messages = [] + runner._reasoning_config = None + runner._service_tier = None + runner._provider_routing = {} + runner._fallback_model = None + runner._smart_model_routing = {} + runner._running_agents = {} + runner._pending_model_notes = {} + runner._session_db = None + runner._agent_cache = {} + runner._agent_cache_lock = threading.Lock() + runner.hooks = SimpleNamespace(loaded_hooks=False) + runner.config = SimpleNamespace(streaming=None) + runner.session_store = SimpleNamespace( + get_or_create_session=lambda source: SimpleNamespace(session_id="session-1"), + load_transcript=lambda session_id: [], + ) + runner._get_or_create_gateway_honcho = lambda session_key: (None, None) + runner._enrich_message_with_vision = AsyncMock(return_value="ENRICHED") + return runner + + +def _make_source() -> SessionSource: + return SessionSource( + platform=Platform.TELEGRAM, + chat_id="12345", + chat_type="dm", + user_id="user-1", + ) + + +def _make_event(text: str) -> MessageEvent: + return MessageEvent(text=text, source=_make_source(), message_id="m1") + + +def test_turn_route_injects_priority_processing_without_changing_runtime(): + runner = _make_runner() + runner._service_tier = "priority" + runtime_kwargs = { + "api_key": "***", + "base_url": "https://openrouter.ai/api/v1", + "provider": "openrouter", + "api_mode": "chat_completions", + "command": None, + "args": [], + "credential_pool": None, + } + + with patch("agent.smart_model_routing.resolve_turn_route", return_value={ + "model": "gpt-5.4", + "runtime": dict(runtime_kwargs), + "label": None, + "signature": ("gpt-5.4", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()), + }): + route = gateway_run.GatewayRunner._resolve_turn_agent_config(runner, "hi", "gpt-5.4", runtime_kwargs) + + assert route["runtime"]["provider"] == "openrouter" + assert route["runtime"]["api_mode"] == "chat_completions" + assert route["request_overrides"] == {"service_tier": "priority"} + + +def test_turn_route_skips_priority_processing_for_unsupported_models(): + runner = _make_runner() + runner._service_tier = "priority" + runtime_kwargs = { + "api_key": "***", + "base_url": "https://openrouter.ai/api/v1", + "provider": "openrouter", + "api_mode": "chat_completions", + "command": None, + "args": [], + "credential_pool": None, + } + + with patch("agent.smart_model_routing.resolve_turn_route", return_value={ + "model": "gpt-5.3-codex", + "runtime": dict(runtime_kwargs), + "label": None, + "signature": ("gpt-5.3-codex", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()), + }): + route = gateway_run.GatewayRunner._resolve_turn_agent_config(runner, "hi", "gpt-5.3-codex", runtime_kwargs) + + assert route["request_overrides"] is None + + +@pytest.mark.asyncio +async def test_handle_fast_command_persists_config(monkeypatch, tmp_path): + runner = _make_runner() + + monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) + monkeypatch.setattr(gateway_run, "_load_gateway_config", lambda: {}) + monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda config=None: "gpt-5.4") + + response = await runner._handle_fast_command(_make_event("/fast fast")) + + assert "FAST" in response + assert runner._service_tier == "priority" + + saved = yaml.safe_load((tmp_path / "config.yaml").read_text(encoding="utf-8")) + assert saved["agent"]["service_tier"] == "fast" + + +@pytest.mark.asyncio +async def test_run_agent_passes_priority_processing_to_gateway_agent(monkeypatch, tmp_path): + _install_fake_agent(monkeypatch) + runner = _make_runner() + + (tmp_path / "config.yaml").write_text("agent:\n service_tier: fast\n", encoding="utf-8") + monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) + monkeypatch.setattr(gateway_run, "_env_path", tmp_path / ".env") + monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None) + monkeypatch.setattr(gateway_run, "_load_gateway_config", lambda: {}) + monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda config=None: "gpt-5.4") + monkeypatch.setattr( + gateway_run, + "_resolve_runtime_agent_kwargs", + lambda: { + "provider": "openrouter", + "api_mode": "chat_completions", + "base_url": "https://openrouter.ai/api/v1", + "api_key": "***", + }, + ) + + import hermes_cli.tools_config as tools_config + monkeypatch.setattr(tools_config, "_get_platform_tools", lambda user_config, platform_key: {"core"}) + + _CapturingAgent.last_init = None + result = await runner._run_agent( + message="hi", + context_prompt="", + history=[], + source=_make_source(), + session_id="session-1", + session_key="agent:main:telegram:dm:12345", + ) + + assert result["final_response"] == "ok" + assert _CapturingAgent.last_init["service_tier"] == "priority" + assert _CapturingAgent.last_init["request_overrides"] == {"service_tier": "priority"}