feat(gateway): bust cached agent on compression/context_length config edits (#17008)

The gateway caches one AIAgent per session to preserve prompt-cache hits,
keyed by _agent_config_signature().  The signature previously only
fingerprinted model/credentials/toolsets/ephemeral-prompt — NOT the
compression or context_length config.  As a result, users who edited
model.context_length or compression.threshold in config.yaml on a
long-lived gateway saw no effect until they triggered an unrelated
cache eviction (/model switch, /reset, gateway restart).

Add a new cache_keys parameter to _agent_config_signature and a
_CACHE_BUSTING_CONFIG_KEYS registry listing config values the agent
bakes in at construction time.  Call sites read the current config and
pass it through — next gateway message with an edited config
rebuilds the agent.

Keys registered:
- model.context_length
- compression.enabled
- compression.threshold
- compression.target_ratio
- compression.protect_last_n

Reported by @OP (Apr 26 feedback bundle).

## Changes
- gateway/run.py: new _CACHE_BUSTING_CONFIG_KEYS tuple,
  _extract_cache_busting_config classmethod, cache_keys kwarg on
  _agent_config_signature, call site passes the extracted dict
- tests/gateway/test_agent_cache.py: 11 new tests
  (5 on _agent_config_signature behavior, 6 on _extract_cache_busting_config)

Co-authored-by: teknium1 <teknium@users.noreply.github.com>
This commit is contained in:
Teknium
2026-04-28 06:37:42 -07:00
committed by GitHub
parent b5905f0d4a
commit 5f84eac451
2 changed files with 205 additions and 0 deletions

View File

@@ -9079,12 +9079,47 @@ class GatewayRunner:
_MAX_INTERRUPT_DEPTH = 3 # Cap recursive interrupt handling (#816)
# Config keys whose values MUST invalidate the gateway's cached agent
# when they change. The agent bakes these into its compressor / context
# handling at construction time, so a mid-running-gateway config edit
# would otherwise be silently ignored until the user triggers a
# different cache eviction (model switch, /reset, etc.).
#
# Each entry is a tuple of (section, key) read from the raw config dict.
# Add more here as new baked-at-construction config settings are added.
_CACHE_BUSTING_CONFIG_KEYS: tuple = (
("model", "context_length"),
("compression", "enabled"),
("compression", "threshold"),
("compression", "target_ratio"),
("compression", "protect_last_n"),
)
@classmethod
def _extract_cache_busting_config(cls, user_config: dict | None) -> dict:
"""Pull the subset of config values that must bust the agent cache.
Returns a flat dict keyed by 'section.key'. Missing keys and
non-dict sections yield None values, which still contribute to
the signature (so 'absent' vs 'present-and-null' differ).
"""
out: Dict[str, Any] = {}
cfg = user_config if isinstance(user_config, dict) else {}
for section, key in cls._CACHE_BUSTING_CONFIG_KEYS:
section_val = cfg.get(section)
if isinstance(section_val, dict):
out[f"{section}.{key}"] = section_val.get(key)
else:
out[f"{section}.{key}"] = None
return out
@staticmethod
def _agent_config_signature(
model: str,
runtime: dict,
enabled_toolsets: list,
ephemeral_prompt: str,
cache_keys: dict | None = None,
) -> str:
"""Compute a stable string key from agent config values.
@@ -9092,6 +9127,12 @@ class GatewayRunner:
discarded and rebuilt. When it stays the same, the cached agent is
reused — preserving the frozen system prompt and tool schemas for
prompt cache hits.
``cache_keys`` is an optional flat dict of additional config values
that should invalidate the cache when they change. Callers pass
the output of ``_extract_cache_busting_config(user_config)`` so
edits to model.context_length / compression.* in config.yaml are
picked up on the next gateway message without a manual restart.
"""
import hashlib, json as _j
@@ -9102,6 +9143,8 @@ class GatewayRunner:
_api_key = str(runtime.get("api_key", "") or "")
_api_key_fingerprint = hashlib.sha256(_api_key.encode()).hexdigest() if _api_key else ""
_cache_keys_sorted = sorted((cache_keys or {}).items())
blob = _j.dumps(
[
model,
@@ -9113,6 +9156,7 @@ class GatewayRunner:
# reasoning_config excluded — it's set per-message on the
# cached agent and doesn't affect system prompt or tools.
ephemeral_prompt or "",
_cache_keys_sorted,
],
sort_keys=True,
default=str,
@@ -10365,6 +10409,7 @@ class GatewayRunner:
turn_route["runtime"],
enabled_toolsets,
combined_ephemeral,
cache_keys=self._extract_cache_busting_config(user_config),
)
agent = None
_cache_lock = getattr(self, "_agent_cache_lock", None)

View File

@@ -98,6 +98,166 @@ class TestAgentConfigSignature:
sig2 = GatewayRunner._agent_config_signature("claude-sonnet-4", runtime, ["hermes-telegram"], "")
assert sig1 == sig2
# ---------------------------------------------------------------
# cache_keys (compression/context config cache-busting)
# ---------------------------------------------------------------
def test_cache_keys_default_omitted_matches_empty(self):
"""Omitted cache_keys must produce the same signature as empty {}."""
from gateway.run import GatewayRunner
runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
sig_omitted = GatewayRunner._agent_config_signature("m", runtime, [], "")
sig_empty = GatewayRunner._agent_config_signature("m", runtime, [], "", cache_keys={})
sig_none = GatewayRunner._agent_config_signature("m", runtime, [], "", cache_keys=None)
assert sig_omitted == sig_empty == sig_none
def test_context_length_change_busts_cache(self):
"""Editing model.context_length in config must produce a new signature."""
from gateway.run import GatewayRunner
runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
sig1 = GatewayRunner._agent_config_signature(
"m", runtime, [], "",
cache_keys={"model.context_length": 200_000},
)
sig2 = GatewayRunner._agent_config_signature(
"m", runtime, [], "",
cache_keys={"model.context_length": 400_000},
)
assert sig1 != sig2
def test_compression_threshold_change_busts_cache(self):
from gateway.run import GatewayRunner
runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
sig1 = GatewayRunner._agent_config_signature(
"m", runtime, [], "",
cache_keys={"compression.threshold": 0.50},
)
sig2 = GatewayRunner._agent_config_signature(
"m", runtime, [], "",
cache_keys={"compression.threshold": 0.75},
)
assert sig1 != sig2
def test_compression_enabled_toggle_busts_cache(self):
from gateway.run import GatewayRunner
runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
sig_on = GatewayRunner._agent_config_signature(
"m", runtime, [], "",
cache_keys={"compression.enabled": True},
)
sig_off = GatewayRunner._agent_config_signature(
"m", runtime, [], "",
cache_keys={"compression.enabled": False},
)
assert sig_on != sig_off
def test_cache_keys_key_order_does_not_matter(self):
"""Signature must be stable regardless of dict key insertion order."""
from gateway.run import GatewayRunner
runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
sig_a = GatewayRunner._agent_config_signature(
"m", runtime, [], "",
cache_keys={"model.context_length": 200_000, "compression.threshold": 0.5},
)
sig_b = GatewayRunner._agent_config_signature(
"m", runtime, [], "",
cache_keys={"compression.threshold": 0.5, "model.context_length": 200_000},
)
assert sig_a == sig_b
class TestExtractCacheBustingConfig:
"""Verify _extract_cache_busting_config pulls the documented subset of
config values that must invalidate the cached agent on change."""
def test_reads_model_context_length(self):
from gateway.run import GatewayRunner
out = GatewayRunner._extract_cache_busting_config(
{"model": {"context_length": 272_000, "provider": "openrouter"}}
)
assert out["model.context_length"] == 272_000
def test_reads_compression_subkeys(self):
from gateway.run import GatewayRunner
out = GatewayRunner._extract_cache_busting_config(
{
"compression": {
"enabled": False,
"threshold": 0.6,
"target_ratio": 0.3,
"protect_last_n": 25,
"some_other_key": "ignored",
}
}
)
assert out["compression.enabled"] is False
assert out["compression.threshold"] == 0.6
assert out["compression.target_ratio"] == 0.3
assert out["compression.protect_last_n"] == 25
def test_missing_keys_yield_none(self):
"""Absent config keys must produce None values (still contribute to signature)."""
from gateway.run import GatewayRunner
out = GatewayRunner._extract_cache_busting_config({})
# Every documented cache-busting key must be present, even if None
for section, key in GatewayRunner._CACHE_BUSTING_CONFIG_KEYS:
assert f"{section}.{key}" in out
assert out[f"{section}.{key}"] is None
def test_non_dict_section_treated_as_missing(self):
from gateway.run import GatewayRunner
# compression is a string — should not crash, all compression.* keys None
out = GatewayRunner._extract_cache_busting_config(
{"compression": "broken", "model": {"context_length": 100_000}}
)
assert out["compression.enabled"] is None
assert out["compression.threshold"] is None
assert out["model.context_length"] == 100_000
def test_none_config_is_safe(self):
from gateway.run import GatewayRunner
out = GatewayRunner._extract_cache_busting_config(None)
for section, key in GatewayRunner._CACHE_BUSTING_CONFIG_KEYS:
assert out[f"{section}.{key}"] is None
def test_full_round_trip_busts_cache_on_real_edit(self):
"""End-to-end: simulate a config edit on main and verify the
extracted cache_keys change produces a new signature."""
from gateway.run import GatewayRunner
runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
cfg_before = {
"model": {"context_length": 200_000},
"compression": {"threshold": 0.50, "enabled": True},
}
cfg_after = {
"model": {"context_length": 200_000},
"compression": {"threshold": 0.75, "enabled": True}, # user raised threshold
}
sig_before = GatewayRunner._agent_config_signature(
"m", runtime, [], "",
cache_keys=GatewayRunner._extract_cache_busting_config(cfg_before),
)
sig_after = GatewayRunner._agent_config_signature(
"m", runtime, [], "",
cache_keys=GatewayRunner._extract_cache_busting_config(cfg_after),
)
assert sig_before != sig_after, (
"Editing compression.threshold in config.yaml must bust the "
"gateway's cached agent so the new threshold takes effect."
)
class TestAgentCacheLifecycle:
"""End-to-end cache behavior with real AIAgent construction."""