From fe12042e50c5a9187463d3646172b6212e122b7d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 16 Apr 2026 06:44:23 -0700 Subject: [PATCH] fix: remove context pressure warnings entirely (#11039) The gateway compression notifications were already removed in commit cc63b2d1 (PR #4139), but the agent-level context pressure warnings (85%/95% tiered alerts via _emit_context_pressure) were still firing on both CLI and gateway. Removed: - _emit_context_pressure method and all call sites in run_conversation() - Class-level dedup state (_context_pressure_last_warned, _CONTEXT_PRESSURE_COOLDOWN) - Instance attribute _context_pressure_warned_at - Pressure reset logic in _compress_context - format_context_pressure and format_context_pressure_gateway from agent/display.py - Orphaned ANSI constants that only served these functions - tests/run_agent/test_context_pressure.py (all 361 lines) Compression itself continues to run silently in the background. Closes #3784 --- agent/display.py | 80 ----- run_agent.py | 98 ------ tests/run_agent/test_context_pressure.py | 361 ----------------------- 3 files changed, 539 deletions(-) delete mode 100644 tests/run_agent/test_context_pressure.py diff --git a/agent/display.py b/agent/display.py index a7f3cbaa2..3f1341485 100644 --- a/agent/display.py +++ b/agent/display.py @@ -993,84 +993,4 @@ def get_cute_tool_message( # Honcho session line (one-liner with clickable OSC 8 hyperlink) # ========================================================================= -_DIM = "\033[2m" -_SKY_BLUE = "\033[38;5;117m" -_ANSI_RESET = "\033[0m" - -# ========================================================================= -# Context pressure display (CLI user-facing warnings) -# ========================================================================= - -# ANSI color codes for context pressure tiers -_CYAN = "\033[36m" -_YELLOW = "\033[33m" -_BOLD = "\033[1m" -_DIM_ANSI = "\033[2m" - -# Bar characters -_BAR_FILLED = "▰" -_BAR_EMPTY = "▱" -_BAR_WIDTH = 20 - - -def format_context_pressure( - compaction_progress: float, - threshold_tokens: int, - threshold_percent: float, - compression_enabled: bool = True, -) -> str: - """Build a formatted context pressure line for CLI display. - - The bar and percentage show progress toward the compaction threshold, - NOT the raw context window. 100% = compaction fires. - - Args: - compaction_progress: How close to compaction (0.0–1.0, 1.0 = fires). - threshold_tokens: Compaction threshold in tokens. - threshold_percent: Compaction threshold as a fraction of context window. - compression_enabled: Whether auto-compression is active. - """ - pct_int = min(int(compaction_progress * 100), 100) - filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH) - bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled) - - threshold_k = f"{threshold_tokens // 1000}k" if threshold_tokens >= 1000 else str(threshold_tokens) - threshold_pct_int = int(threshold_percent * 100) - - color = f"{_BOLD}{_YELLOW}" - icon = "⚠" - if compression_enabled: - hint = "compaction approaching" - else: - hint = "no auto-compaction" - - return ( - f" {color}{icon} context {bar} {pct_int}% to compaction{_ANSI_RESET}" - f" {_DIM_ANSI}{threshold_k} threshold ({threshold_pct_int}%) · {hint}{_ANSI_RESET}" - ) - - -def format_context_pressure_gateway( - compaction_progress: float, - threshold_percent: float, - compression_enabled: bool = True, -) -> str: - """Build a plain-text context pressure notification for messaging platforms. - - No ANSI — just Unicode and plain text suitable for Telegram/Discord/etc. - The percentage shows progress toward the compaction threshold. - """ - pct_int = min(int(compaction_progress * 100), 100) - filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH) - bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled) - - threshold_pct_int = int(threshold_percent * 100) - - icon = "⚠️" - if compression_enabled: - hint = f"Context compaction approaching (threshold: {threshold_pct_int}% of window)." - else: - hint = "Auto-compaction is disabled — context may be truncated." - - return f"{icon} Context: {bar} {pct_int}% to compaction\n{hint}" diff --git a/run_agent.py b/run_agent.py index d2612c346..3707e943e 100644 --- a/run_agent.py +++ b/run_agent.py @@ -540,13 +540,6 @@ class AIAgent: for AI models that support function calling. """ - # ── Class-level context pressure dedup (survives across instances) ── - # The gateway creates a new AIAgent per message, so instance-level flags - # reset every time. This dict tracks {session_id: (warn_level, timestamp)} - # to suppress duplicate warnings within a cooldown window. - _context_pressure_last_warned: dict = {} - _CONTEXT_PRESSURE_COOLDOWN = 300 # seconds between re-warning same session - @property def base_url(self) -> str: return self._base_url @@ -826,12 +819,6 @@ class AIAgent: self._budget_exhausted_injected = False self._budget_grace_call = False - # Context pressure warnings: notify the USER (not the LLM) as context - # fills up. Purely informational — displayed in CLI output and sent via - # status_callback for gateway platforms. Does NOT inject into messages. - # Tiered: fires at 85% and again at 95% of compaction threshold. - self._context_pressure_warned_at = 0.0 # highest tier already shown - # Activity tracking — updated on each API call, tool execution, and # stream chunk. Used by the gateway timeout handler to report what the # agent was doing when it was killed, and by the "still working" @@ -7220,20 +7207,6 @@ class AIAgent: self.context_compressor.last_prompt_tokens = _compressed_est self.context_compressor.last_completion_tokens = 0 - # Only reset the pressure warning if compression actually brought - # us below the warning level (85% of threshold). When compression - # can't reduce enough (e.g. threshold is very low, or system prompt - # alone exceeds the warning level), keep the tier set to prevent - # spamming the user with repeated warnings every loop iteration. - if self.context_compressor.threshold_tokens > 0: - _post_progress = _compressed_est / self.context_compressor.threshold_tokens - if _post_progress < 0.85: - self._context_pressure_warned_at = 0.0 - # Clear class-level dedup for this session so a fresh - # warning cycle can start if context grows again. - _sid = self.session_id or "default" - AIAgent._context_pressure_last_warned.pop(_sid, None) - # Clear the file-read dedup cache. After compression the original # read content is summarised away — if the model re-reads the same # file it needs the full content, not a "file unchanged" stub. @@ -8033,45 +8006,6 @@ class AIAgent: - def _emit_context_pressure(self, compaction_progress: float, compressor) -> None: - """Notify the user that context is approaching the compaction threshold. - - Args: - compaction_progress: How close to compaction (0.0–1.0, where 1.0 = fires). - compressor: The ContextCompressor instance (for threshold/context info). - - Purely user-facing — does NOT modify the message stream. - For CLI: prints a formatted line with a progress bar. - For gateway: fires status_callback so the platform can send a chat message. - """ - from agent.display import format_context_pressure, format_context_pressure_gateway - - threshold_pct = compressor.threshold_tokens / compressor.context_length if compressor.context_length else 0.5 - - # CLI output — always shown (these are user-facing status notifications, - # not verbose debug output, so they bypass quiet_mode). - # Gateway users also get the callback below. - if self.platform in (None, "cli"): - line = format_context_pressure( - compaction_progress=compaction_progress, - threshold_tokens=compressor.threshold_tokens, - threshold_percent=threshold_pct, - compression_enabled=self.compression_enabled, - ) - self._safe_print(line) - - # Gateway / external consumers - if self.status_callback: - try: - msg = format_context_pressure_gateway( - compaction_progress=compaction_progress, - threshold_percent=threshold_pct, - compression_enabled=self.compression_enabled, - ) - self.status_callback("context_pressure", msg) - except Exception: - logger.debug("status_callback error in context pressure", exc_info=True) - def _handle_max_iterations(self, messages: list, api_call_count: int) -> str: """Request a summary when max iterations are reached. Returns the final response text.""" print(f"⚠️ Reached maximum iterations ({self.max_iterations}). Requesting summary...") @@ -10792,38 +10726,6 @@ class AIAgent: else: _real_tokens = estimate_messages_tokens_rough(messages) - # ── Context pressure warnings (user-facing only) ────────── - # Notify the user (NOT the LLM) as context approaches the - # compaction threshold. Thresholds are relative to where - # compaction fires, not the raw context window. - # Does not inject into messages — just prints to CLI output - # and fires status_callback for gateway platforms. - # Tiered: 85% (orange) and 95% (red/critical). - if _compressor.threshold_tokens > 0: - _compaction_progress = _real_tokens / _compressor.threshold_tokens - # Determine the warning tier for this progress level - _warn_tier = 0.0 - if _compaction_progress >= 0.95: - _warn_tier = 0.95 - elif _compaction_progress >= 0.85: - _warn_tier = 0.85 - if _warn_tier > self._context_pressure_warned_at: - # Class-level dedup: check if this session was already - # warned at this tier within the cooldown window. - _sid = self.session_id or "default" - _last = AIAgent._context_pressure_last_warned.get(_sid) - _now = time.time() - if _last is None or _last[0] < _warn_tier or (_now - _last[1]) >= self._CONTEXT_PRESSURE_COOLDOWN: - self._context_pressure_warned_at = _warn_tier - AIAgent._context_pressure_last_warned[_sid] = (_warn_tier, _now) - self._emit_context_pressure(_compaction_progress, _compressor) - # Evict stale entries (older than 2x cooldown) - _cutoff = _now - self._CONTEXT_PRESSURE_COOLDOWN * 2 - AIAgent._context_pressure_last_warned = { - k: v for k, v in AIAgent._context_pressure_last_warned.items() - if v[1] > _cutoff - } - if self.compression_enabled and _compressor.should_compress(_real_tokens): self._safe_print(" ⟳ compacting context…") messages, active_system_prompt = self._compress_context( diff --git a/tests/run_agent/test_context_pressure.py b/tests/run_agent/test_context_pressure.py deleted file mode 100644 index 4140749c5..000000000 --- a/tests/run_agent/test_context_pressure.py +++ /dev/null @@ -1,361 +0,0 @@ -"""Tests for context pressure warnings (user-facing, not injected into messages). - -Covers: -- Display formatting (CLI and gateway variants) -- Flag tracking and threshold logic on AIAgent -- Flag reset after compression -- status_callback invocation -""" - -import json -from types import SimpleNamespace -from unittest.mock import MagicMock, patch - -import pytest - -from agent.display import format_context_pressure, format_context_pressure_gateway -from run_agent import AIAgent - - -# --------------------------------------------------------------------------- -# Display formatting tests -# --------------------------------------------------------------------------- - - -class TestFormatContextPressure: - """CLI context pressure display (agent/display.py). - - The bar shows progress toward the compaction threshold, not the - raw context window. 60% = 60% of the way to compaction. - """ - - def test_80_percent_uses_warning_icon(self): - line = format_context_pressure(0.80, 100_000, 0.50) - assert "⚠" in line - assert "80% to compaction" in line - - def test_90_percent_uses_warning_icon(self): - line = format_context_pressure(0.90, 100_000, 0.50) - assert "⚠" in line - assert "90% to compaction" in line - - def test_bar_length_scales_with_progress(self): - line_80 = format_context_pressure(0.80, 100_000, 0.50) - line_95 = format_context_pressure(0.95, 100_000, 0.50) - assert line_95.count("▰") > line_80.count("▰") - - def test_shows_threshold_tokens(self): - line = format_context_pressure(0.80, 100_000, 0.50) - assert "100k" in line - - def test_small_threshold(self): - line = format_context_pressure(0.80, 500, 0.50) - assert "500" in line - - def test_shows_threshold_percent(self): - line = format_context_pressure(0.80, 100_000, 0.50) - assert "50%" in line - - def test_approaching_hint(self): - line = format_context_pressure(0.80, 100_000, 0.50) - assert "compaction approaching" in line - - def test_no_compaction_when_disabled(self): - line = format_context_pressure(0.85, 100_000, 0.50, compression_enabled=False) - assert "no auto-compaction" in line - - def test_returns_string(self): - result = format_context_pressure(0.65, 128_000, 0.50) - assert isinstance(result, str) - - def test_over_100_percent_capped(self): - """Progress > 1.0 should cap both bar and percentage text at 100%.""" - line = format_context_pressure(1.05, 100_000, 0.50) - assert "▰" in line - assert line.count("▰") == 20 - assert "100%" in line - assert "105%" not in line - - -class TestFormatContextPressureGateway: - """Gateway (plain text) context pressure display.""" - - def test_80_percent_warning(self): - msg = format_context_pressure_gateway(0.80, 0.50) - assert "80% to compaction" in msg - assert "50%" in msg - - def test_90_percent_warning(self): - msg = format_context_pressure_gateway(0.90, 0.50) - assert "90% to compaction" in msg - assert "approaching" in msg - - def test_no_compaction_warning(self): - msg = format_context_pressure_gateway(0.85, 0.50, compression_enabled=False) - assert "disabled" in msg - - def test_no_ansi_codes(self): - msg = format_context_pressure_gateway(0.80, 0.50) - assert "\033[" not in msg - - def test_has_progress_bar(self): - msg = format_context_pressure_gateway(0.80, 0.50) - assert "▰" in msg - - def test_over_100_percent_capped(self): - """Progress > 1.0 should cap percentage text at 100%.""" - msg = format_context_pressure_gateway(1.09, 0.50) - assert "100% to compaction" in msg - assert "109%" not in msg - assert msg.count("▰") == 20 - - -# --------------------------------------------------------------------------- -# AIAgent context pressure flag tests -# --------------------------------------------------------------------------- - - -def _make_tool_defs(*names): - return [ - { - "type": "function", - "function": { - "name": n, - "description": f"{n} tool", - "parameters": {"type": "object", "properties": {}}, - }, - } - for n in names - ] - - -@pytest.fixture() -def agent(): - """Minimal AIAgent with mocked internals.""" - with ( - patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), - patch("run_agent.check_toolset_requirements", return_value={}), - patch("run_agent.OpenAI"), - ): - a = AIAgent( - api_key="test-key-1234567890", - quiet_mode=True, - skip_context_files=True, - skip_memory=True, - ) - a.client = MagicMock() - return a - - -class TestContextPressureFlags: - """Context pressure warning flag tracking on AIAgent.""" - - def test_flag_initialized_zero(self, agent): - assert agent._context_pressure_warned_at == 0.0 - - def test_emit_calls_status_callback(self, agent): - """status_callback should be invoked with event type and message.""" - cb = MagicMock() - agent.status_callback = cb - - compressor = MagicMock() - compressor.context_length = 200_000 - compressor.threshold_tokens = 100_000 # 50% - - agent._emit_context_pressure(0.85, compressor) - - cb.assert_called_once() - args = cb.call_args[0] - assert args[0] == "context_pressure" - assert "85% to compaction" in args[1] - - def test_emit_no_callback_no_crash(self, agent): - """No status_callback set — should not crash.""" - agent.status_callback = None - - compressor = MagicMock() - compressor.context_length = 200_000 - compressor.threshold_tokens = 100_000 - - # Should not raise - agent._emit_context_pressure(0.60, compressor) - - def test_emit_prints_for_cli_platform(self, agent, capsys): - """CLI platform should always print context pressure, even in quiet_mode.""" - agent.quiet_mode = True - agent.platform = "cli" - agent.status_callback = None - - compressor = MagicMock() - compressor.context_length = 200_000 - compressor.threshold_tokens = 100_000 - - agent._emit_context_pressure(0.85, compressor) - captured = capsys.readouterr() - assert "▰" in captured.out - assert "to compaction" in captured.out - - def test_emit_skips_print_for_gateway_platform(self, agent, capsys): - """Gateway platforms get the callback, not CLI print.""" - agent.platform = "telegram" - agent.status_callback = None - - compressor = MagicMock() - compressor.context_length = 200_000 - compressor.threshold_tokens = 100_000 - - agent._emit_context_pressure(0.85, compressor) - captured = capsys.readouterr() - assert "▰" not in captured.out - - def test_flag_reset_on_compression(self, agent): - """After _compress_context, context pressure flag should reset.""" - agent._context_pressure_warned_at = 0.85 - agent.compression_enabled = True - - agent.context_compressor = MagicMock() - agent.context_compressor.compress.return_value = [ - {"role": "user", "content": "Summary of conversation so far."} - ] - agent.context_compressor.context_length = 200_000 - agent.context_compressor.threshold_tokens = 100_000 - agent.context_compressor.compression_count = 1 - - agent._todo_store = MagicMock() - agent._todo_store.format_for_injection.return_value = None - - agent._build_system_prompt = MagicMock(return_value="system prompt") - agent._cached_system_prompt = "old system prompt" - agent._session_db = None - - messages = [ - {"role": "user", "content": "hello"}, - {"role": "assistant", "content": "hi there"}, - ] - agent._compress_context(messages, "system prompt") - - assert agent._context_pressure_warned_at == 0.0 - - def test_emit_callback_error_handled(self, agent): - """If status_callback raises, it should be caught gracefully.""" - cb = MagicMock(side_effect=RuntimeError("callback boom")) - agent.status_callback = cb - - compressor = MagicMock() - compressor.context_length = 200_000 - compressor.threshold_tokens = 100_000 - - # Should not raise - agent._emit_context_pressure(0.85, compressor) - - def test_tiered_reemits_at_95(self, agent): - """Warning fires at 85%, then fires again when crossing 95%.""" - agent._context_pressure_warned_at = 0.85 - # Simulate crossing 95%: the tier (0.95) > warned_at (0.85) - assert 0.95 > agent._context_pressure_warned_at - # After emission at 95%, the tier should update - agent._context_pressure_warned_at = 0.95 - assert agent._context_pressure_warned_at == 0.95 - - def test_tiered_no_double_emit_at_same_level(self, agent): - """Once warned at 85%, further 85%+ readings don't re-warn.""" - agent._context_pressure_warned_at = 0.85 - # At 88%, tier is 0.85, which is NOT > warned_at (0.85) - _warn_tier = 0.85 if 0.88 >= 0.85 else 0.0 - assert not (_warn_tier > agent._context_pressure_warned_at) - - def test_flag_not_reset_when_compression_insufficient(self, agent): - """When compression can't drop below 85%, keep the flag set.""" - agent._context_pressure_warned_at = 0.85 - agent.compression_enabled = True - - agent.context_compressor = MagicMock() - agent.context_compressor.compress.return_value = [ - {"role": "user", "content": "Summary of conversation so far."} - ] - agent.context_compressor.context_length = 200 - # Use a small threshold so the tiny compressed output still - # represents >= 85% of it (prevents flag reset). - agent.context_compressor.threshold_tokens = 10 - agent.context_compressor.compression_count = 1 - agent.context_compressor.last_prompt_tokens = 0 - - agent._todo_store = MagicMock() - agent._todo_store.format_for_injection.return_value = None - agent._build_system_prompt = MagicMock(return_value="system prompt") - agent._cached_system_prompt = "old system prompt" - agent._session_db = None - - messages = [ - {"role": "user", "content": "hello"}, - {"role": "assistant", "content": "hi there"}, - ] - agent._compress_context(messages, "system prompt") - - # Post-compression is ~90% of threshold — flag should NOT reset - assert agent._context_pressure_warned_at == 0.85 - - -class TestContextPressureGatewayDedup: - """Class-level dedup prevents warning spam across AIAgent instances.""" - - def setup_method(self): - """Clear class-level dedup state between tests.""" - AIAgent._context_pressure_last_warned.clear() - - def test_second_instance_within_cooldown_suppressed(self): - """Same session, same tier, within cooldown — should be suppressed.""" - import time - sid = "test_session_dedup" - # Simulate first warning - AIAgent._context_pressure_last_warned[sid] = (0.85, time.time()) - # Second instance checking same tier within cooldown - _last = AIAgent._context_pressure_last_warned.get(sid) - _should_warn = _last is None or _last[0] < 0.85 or (time.time() - _last[1]) >= AIAgent._CONTEXT_PRESSURE_COOLDOWN - assert not _should_warn - - def test_higher_tier_fires_despite_cooldown(self): - """Same session, higher tier — should fire even within cooldown.""" - import time - sid = "test_session_tier" - AIAgent._context_pressure_last_warned[sid] = (0.85, time.time()) - _last = AIAgent._context_pressure_last_warned.get(sid) - # 0.95 > 0.85 stored tier → should warn - _should_warn = _last is None or _last[0] < 0.95 or (time.time() - _last[1]) >= AIAgent._CONTEXT_PRESSURE_COOLDOWN - assert _should_warn - - def test_warning_fires_after_cooldown_expires(self): - """Same session, same tier, after cooldown — should fire again.""" - import time - sid = "test_session_expired" - # Set a timestamp far in the past - AIAgent._context_pressure_last_warned[sid] = (0.85, time.time() - AIAgent._CONTEXT_PRESSURE_COOLDOWN - 1) - _last = AIAgent._context_pressure_last_warned.get(sid) - _should_warn = _last is None or _last[0] < 0.85 or (time.time() - _last[1]) >= AIAgent._CONTEXT_PRESSURE_COOLDOWN - assert _should_warn - - def test_compression_clears_dedup(self): - """After compression drops below 85%, dedup entry should be cleared.""" - import time - sid = "test_session_clear" - AIAgent._context_pressure_last_warned[sid] = (0.85, time.time()) - assert sid in AIAgent._context_pressure_last_warned - # Simulate what _compress_context does on reset - AIAgent._context_pressure_last_warned.pop(sid, None) - assert sid not in AIAgent._context_pressure_last_warned - - def test_eviction_removes_stale_entries(self): - """Stale entries older than 2x cooldown should be evicted.""" - import time - _now = time.time() - AIAgent._context_pressure_last_warned = { - "fresh": (0.85, _now), - "stale": (0.85, _now - AIAgent._CONTEXT_PRESSURE_COOLDOWN * 3), - } - _cutoff = _now - AIAgent._CONTEXT_PRESSURE_COOLDOWN * 2 - AIAgent._context_pressure_last_warned = { - k: v for k, v in AIAgent._context_pressure_last_warned.items() - if v[1] > _cutoff - } - assert "fresh" in AIAgent._context_pressure_last_warned - assert "stale" not in AIAgent._context_pressure_last_warned