feat(gateway): bust cached agent on compression/context_length config edits (#17008)

The gateway caches one AIAgent per session to preserve prompt-cache hits, keyed by _agent_config_signature(). The signature previously only fingerprinted model/credentials/toolsets/ephemeral-prompt — NOT the compression or context_length config. As a result, users who edited model.context_length or compression.threshold in config.yaml on a long-lived gateway saw no effect until they triggered an unrelated cache eviction (/model switch, /reset, gateway restart). Add a new cache_keys parameter to _agent_config_signature and a _CACHE_BUSTING_CONFIG_KEYS registry listing config values the agent bakes in at construction time. Call sites read the current config and pass it through — next gateway message with an edited config rebuilds the agent. Keys registered: - model.context_length - compression.enabled - compression.threshold - compression.target_ratio - compression.protect_last_n Reported by @OP (Apr 26 feedback bundle). ## Changes - gateway/run.py: new _CACHE_BUSTING_CONFIG_KEYS tuple, _extract_cache_busting_config classmethod, cache_keys kwarg on _agent_config_signature, call site passes the extracted dict - tests/gateway/test_agent_cache.py: 11 new tests (5 on _agent_config_signature behavior, 6 on _extract_cache_busting_config) Co-authored-by: teknium1 <teknium@users.noreply.github.com>
2026-04-28 06:37:42 -07:00
parent b5905f0d4a
commit 5f84eac451
2 changed files with 205 additions and 0 deletions
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -9079,12 +9079,47 @@ class GatewayRunner:

    _MAX_INTERRUPT_DEPTH = 3  # Cap recursive interrupt handling (#816)

+    # Config keys whose values MUST invalidate the gateway's cached agent
+    # when they change.  The agent bakes these into its compressor / context
+    # handling at construction time, so a mid-running-gateway config edit
+    # would otherwise be silently ignored until the user triggers a
+    # different cache eviction (model switch, /reset, etc.).
+    #
+    # Each entry is a tuple of (section, key) read from the raw config dict.
+    # Add more here as new baked-at-construction config settings are added.
+    _CACHE_BUSTING_CONFIG_KEYS: tuple = (
+        ("model", "context_length"),
+        ("compression", "enabled"),
+        ("compression", "threshold"),
+        ("compression", "target_ratio"),
+        ("compression", "protect_last_n"),
+    )
+
+    @classmethod
+    def _extract_cache_busting_config(cls, user_config: dict | None) -> dict:
+        """Pull the subset of config values that must bust the agent cache.
+
+        Returns a flat dict keyed by 'section.key'.  Missing keys and
+        non-dict sections yield None values, which still contribute to
+        the signature (so 'absent' vs 'present-and-null' differ).
+        """
+        out: Dict[str, Any] = {}
+        cfg = user_config if isinstance(user_config, dict) else {}
+        for section, key in cls._CACHE_BUSTING_CONFIG_KEYS:
+            section_val = cfg.get(section)
+            if isinstance(section_val, dict):
+                out[f"{section}.{key}"] = section_val.get(key)
+            else:
+                out[f"{section}.{key}"] = None
+        return out
+
    @staticmethod
    def _agent_config_signature(
        model: str,
        runtime: dict,
        enabled_toolsets: list,
        ephemeral_prompt: str,
+        cache_keys: dict | None = None,
    ) -> str:
        """Compute a stable string key from agent config values.

@@ -9092,6 +9127,12 @@ class GatewayRunner:
        discarded and rebuilt.  When it stays the same, the cached agent is
        reused — preserving the frozen system prompt and tool schemas for
        prompt cache hits.
+
+        ``cache_keys`` is an optional flat dict of additional config values
+        that should invalidate the cache when they change.  Callers pass
+        the output of ``_extract_cache_busting_config(user_config)`` so
+        edits to model.context_length / compression.* in config.yaml are
+        picked up on the next gateway message without a manual restart.
        """
        import hashlib, json as _j

@@ -9102,6 +9143,8 @@ class GatewayRunner:
        _api_key = str(runtime.get("api_key", "") or "")
        _api_key_fingerprint = hashlib.sha256(_api_key.encode()).hexdigest() if _api_key else ""

+        _cache_keys_sorted = sorted((cache_keys or {}).items())
+
        blob = _j.dumps(
            [
                model,
@@ -9113,6 +9156,7 @@ class GatewayRunner:
                # reasoning_config excluded — it's set per-message on the
                # cached agent and doesn't affect system prompt or tools.
                ephemeral_prompt or "",
+                _cache_keys_sorted,
            ],
            sort_keys=True,
            default=str,
@@ -10365,6 +10409,7 @@ class GatewayRunner:
                turn_route["runtime"],
                enabled_toolsets,
                combined_ephemeral,
+                cache_keys=self._extract_cache_busting_config(user_config),
            )
            agent = None
            _cache_lock = getattr(self, "_agent_cache_lock", None)
--- a/tests/gateway/test_agent_cache.py
+++ b/tests/gateway/test_agent_cache.py
@@ -98,6 +98,166 @@ class TestAgentConfigSignature:
        sig2 = GatewayRunner._agent_config_signature("claude-sonnet-4", runtime, ["hermes-telegram"], "")
        assert sig1 == sig2

+    # ---------------------------------------------------------------
+    # cache_keys (compression/context config cache-busting)
+    # ---------------------------------------------------------------
+
+    def test_cache_keys_default_omitted_matches_empty(self):
+        """Omitted cache_keys must produce the same signature as empty {}."""
+        from gateway.run import GatewayRunner
+
+        runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
+        sig_omitted = GatewayRunner._agent_config_signature("m", runtime, [], "")
+        sig_empty = GatewayRunner._agent_config_signature("m", runtime, [], "", cache_keys={})
+        sig_none = GatewayRunner._agent_config_signature("m", runtime, [], "", cache_keys=None)
+        assert sig_omitted == sig_empty == sig_none
+
+    def test_context_length_change_busts_cache(self):
+        """Editing model.context_length in config must produce a new signature."""
+        from gateway.run import GatewayRunner
+
+        runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
+        sig1 = GatewayRunner._agent_config_signature(
+            "m", runtime, [], "",
+            cache_keys={"model.context_length": 200_000},
+        )
+        sig2 = GatewayRunner._agent_config_signature(
+            "m", runtime, [], "",
+            cache_keys={"model.context_length": 400_000},
+        )
+        assert sig1 != sig2
+
+    def test_compression_threshold_change_busts_cache(self):
+        from gateway.run import GatewayRunner
+
+        runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
+        sig1 = GatewayRunner._agent_config_signature(
+            "m", runtime, [], "",
+            cache_keys={"compression.threshold": 0.50},
+        )
+        sig2 = GatewayRunner._agent_config_signature(
+            "m", runtime, [], "",
+            cache_keys={"compression.threshold": 0.75},
+        )
+        assert sig1 != sig2
+
+    def test_compression_enabled_toggle_busts_cache(self):
+        from gateway.run import GatewayRunner
+
+        runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
+        sig_on = GatewayRunner._agent_config_signature(
+            "m", runtime, [], "",
+            cache_keys={"compression.enabled": True},
+        )
+        sig_off = GatewayRunner._agent_config_signature(
+            "m", runtime, [], "",
+            cache_keys={"compression.enabled": False},
+        )
+        assert sig_on != sig_off
+
+    def test_cache_keys_key_order_does_not_matter(self):
+        """Signature must be stable regardless of dict key insertion order."""
+        from gateway.run import GatewayRunner
+
+        runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
+        sig_a = GatewayRunner._agent_config_signature(
+            "m", runtime, [], "",
+            cache_keys={"model.context_length": 200_000, "compression.threshold": 0.5},
+        )
+        sig_b = GatewayRunner._agent_config_signature(
+            "m", runtime, [], "",
+            cache_keys={"compression.threshold": 0.5, "model.context_length": 200_000},
+        )
+        assert sig_a == sig_b
+
+
+class TestExtractCacheBustingConfig:
+    """Verify _extract_cache_busting_config pulls the documented subset of
+    config values that must invalidate the cached agent on change."""
+
+    def test_reads_model_context_length(self):
+        from gateway.run import GatewayRunner
+
+        out = GatewayRunner._extract_cache_busting_config(
+            {"model": {"context_length": 272_000, "provider": "openrouter"}}
+        )
+        assert out["model.context_length"] == 272_000
+
+    def test_reads_compression_subkeys(self):
+        from gateway.run import GatewayRunner
+
+        out = GatewayRunner._extract_cache_busting_config(
+            {
+                "compression": {
+                    "enabled": False,
+                    "threshold": 0.6,
+                    "target_ratio": 0.3,
+                    "protect_last_n": 25,
+                    "some_other_key": "ignored",
+                }
+            }
+        )
+        assert out["compression.enabled"] is False
+        assert out["compression.threshold"] == 0.6
+        assert out["compression.target_ratio"] == 0.3
+        assert out["compression.protect_last_n"] == 25
+
+    def test_missing_keys_yield_none(self):
+        """Absent config keys must produce None values (still contribute to signature)."""
+        from gateway.run import GatewayRunner
+
+        out = GatewayRunner._extract_cache_busting_config({})
+        # Every documented cache-busting key must be present, even if None
+        for section, key in GatewayRunner._CACHE_BUSTING_CONFIG_KEYS:
+            assert f"{section}.{key}" in out
+            assert out[f"{section}.{key}"] is None
+
+    def test_non_dict_section_treated_as_missing(self):
+        from gateway.run import GatewayRunner
+
+        # compression is a string — should not crash, all compression.* keys None
+        out = GatewayRunner._extract_cache_busting_config(
+            {"compression": "broken", "model": {"context_length": 100_000}}
+        )
+        assert out["compression.enabled"] is None
+        assert out["compression.threshold"] is None
+        assert out["model.context_length"] == 100_000
+
+    def test_none_config_is_safe(self):
+        from gateway.run import GatewayRunner
+
+        out = GatewayRunner._extract_cache_busting_config(None)
+        for section, key in GatewayRunner._CACHE_BUSTING_CONFIG_KEYS:
+            assert out[f"{section}.{key}"] is None
+
+    def test_full_round_trip_busts_cache_on_real_edit(self):
+        """End-to-end: simulate a config edit on main and verify the
+        extracted cache_keys change produces a new signature."""
+        from gateway.run import GatewayRunner
+
+        runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
+        cfg_before = {
+            "model": {"context_length": 200_000},
+            "compression": {"threshold": 0.50, "enabled": True},
+        }
+        cfg_after = {
+            "model": {"context_length": 200_000},
+            "compression": {"threshold": 0.75, "enabled": True},  # user raised threshold
+        }
+
+        sig_before = GatewayRunner._agent_config_signature(
+            "m", runtime, [], "",
+            cache_keys=GatewayRunner._extract_cache_busting_config(cfg_before),
+        )
+        sig_after = GatewayRunner._agent_config_signature(
+            "m", runtime, [], "",
+            cache_keys=GatewayRunner._extract_cache_busting_config(cfg_after),
+        )
+        assert sig_before != sig_after, (
+            "Editing compression.threshold in config.yaml must bust the "
+            "gateway's cached agent so the new threshold takes effect."
+        )
+

 class TestAgentCacheLifecycle:
    """End-to-end cache behavior with real AIAgent construction."""