feat(gateway): bust cached agent on compression/context_length config edits (#17008)
The gateway caches one AIAgent per session to preserve prompt-cache hits, keyed by _agent_config_signature(). The signature previously only fingerprinted model/credentials/toolsets/ephemeral-prompt — NOT the compression or context_length config. As a result, users who edited model.context_length or compression.threshold in config.yaml on a long-lived gateway saw no effect until they triggered an unrelated cache eviction (/model switch, /reset, gateway restart). Add a new cache_keys parameter to _agent_config_signature and a _CACHE_BUSTING_CONFIG_KEYS registry listing config values the agent bakes in at construction time. Call sites read the current config and pass it through — next gateway message with an edited config rebuilds the agent. Keys registered: - model.context_length - compression.enabled - compression.threshold - compression.target_ratio - compression.protect_last_n Reported by @OP (Apr 26 feedback bundle). ## Changes - gateway/run.py: new _CACHE_BUSTING_CONFIG_KEYS tuple, _extract_cache_busting_config classmethod, cache_keys kwarg on _agent_config_signature, call site passes the extracted dict - tests/gateway/test_agent_cache.py: 11 new tests (5 on _agent_config_signature behavior, 6 on _extract_cache_busting_config) Co-authored-by: teknium1 <teknium@users.noreply.github.com>
This commit is contained in:
@@ -9079,12 +9079,47 @@ class GatewayRunner:
|
||||
|
||||
_MAX_INTERRUPT_DEPTH = 3 # Cap recursive interrupt handling (#816)
|
||||
|
||||
# Config keys whose values MUST invalidate the gateway's cached agent
|
||||
# when they change. The agent bakes these into its compressor / context
|
||||
# handling at construction time, so a mid-running-gateway config edit
|
||||
# would otherwise be silently ignored until the user triggers a
|
||||
# different cache eviction (model switch, /reset, etc.).
|
||||
#
|
||||
# Each entry is a tuple of (section, key) read from the raw config dict.
|
||||
# Add more here as new baked-at-construction config settings are added.
|
||||
_CACHE_BUSTING_CONFIG_KEYS: tuple = (
|
||||
("model", "context_length"),
|
||||
("compression", "enabled"),
|
||||
("compression", "threshold"),
|
||||
("compression", "target_ratio"),
|
||||
("compression", "protect_last_n"),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _extract_cache_busting_config(cls, user_config: dict | None) -> dict:
|
||||
"""Pull the subset of config values that must bust the agent cache.
|
||||
|
||||
Returns a flat dict keyed by 'section.key'. Missing keys and
|
||||
non-dict sections yield None values, which still contribute to
|
||||
the signature (so 'absent' vs 'present-and-null' differ).
|
||||
"""
|
||||
out: Dict[str, Any] = {}
|
||||
cfg = user_config if isinstance(user_config, dict) else {}
|
||||
for section, key in cls._CACHE_BUSTING_CONFIG_KEYS:
|
||||
section_val = cfg.get(section)
|
||||
if isinstance(section_val, dict):
|
||||
out[f"{section}.{key}"] = section_val.get(key)
|
||||
else:
|
||||
out[f"{section}.{key}"] = None
|
||||
return out
|
||||
|
||||
@staticmethod
|
||||
def _agent_config_signature(
|
||||
model: str,
|
||||
runtime: dict,
|
||||
enabled_toolsets: list,
|
||||
ephemeral_prompt: str,
|
||||
cache_keys: dict | None = None,
|
||||
) -> str:
|
||||
"""Compute a stable string key from agent config values.
|
||||
|
||||
@@ -9092,6 +9127,12 @@ class GatewayRunner:
|
||||
discarded and rebuilt. When it stays the same, the cached agent is
|
||||
reused — preserving the frozen system prompt and tool schemas for
|
||||
prompt cache hits.
|
||||
|
||||
``cache_keys`` is an optional flat dict of additional config values
|
||||
that should invalidate the cache when they change. Callers pass
|
||||
the output of ``_extract_cache_busting_config(user_config)`` so
|
||||
edits to model.context_length / compression.* in config.yaml are
|
||||
picked up on the next gateway message without a manual restart.
|
||||
"""
|
||||
import hashlib, json as _j
|
||||
|
||||
@@ -9102,6 +9143,8 @@ class GatewayRunner:
|
||||
_api_key = str(runtime.get("api_key", "") or "")
|
||||
_api_key_fingerprint = hashlib.sha256(_api_key.encode()).hexdigest() if _api_key else ""
|
||||
|
||||
_cache_keys_sorted = sorted((cache_keys or {}).items())
|
||||
|
||||
blob = _j.dumps(
|
||||
[
|
||||
model,
|
||||
@@ -9113,6 +9156,7 @@ class GatewayRunner:
|
||||
# reasoning_config excluded — it's set per-message on the
|
||||
# cached agent and doesn't affect system prompt or tools.
|
||||
ephemeral_prompt or "",
|
||||
_cache_keys_sorted,
|
||||
],
|
||||
sort_keys=True,
|
||||
default=str,
|
||||
@@ -10365,6 +10409,7 @@ class GatewayRunner:
|
||||
turn_route["runtime"],
|
||||
enabled_toolsets,
|
||||
combined_ephemeral,
|
||||
cache_keys=self._extract_cache_busting_config(user_config),
|
||||
)
|
||||
agent = None
|
||||
_cache_lock = getattr(self, "_agent_cache_lock", None)
|
||||
|
||||
@@ -98,6 +98,166 @@ class TestAgentConfigSignature:
|
||||
sig2 = GatewayRunner._agent_config_signature("claude-sonnet-4", runtime, ["hermes-telegram"], "")
|
||||
assert sig1 == sig2
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# cache_keys (compression/context config cache-busting)
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def test_cache_keys_default_omitted_matches_empty(self):
|
||||
"""Omitted cache_keys must produce the same signature as empty {}."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
|
||||
sig_omitted = GatewayRunner._agent_config_signature("m", runtime, [], "")
|
||||
sig_empty = GatewayRunner._agent_config_signature("m", runtime, [], "", cache_keys={})
|
||||
sig_none = GatewayRunner._agent_config_signature("m", runtime, [], "", cache_keys=None)
|
||||
assert sig_omitted == sig_empty == sig_none
|
||||
|
||||
def test_context_length_change_busts_cache(self):
|
||||
"""Editing model.context_length in config must produce a new signature."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
|
||||
sig1 = GatewayRunner._agent_config_signature(
|
||||
"m", runtime, [], "",
|
||||
cache_keys={"model.context_length": 200_000},
|
||||
)
|
||||
sig2 = GatewayRunner._agent_config_signature(
|
||||
"m", runtime, [], "",
|
||||
cache_keys={"model.context_length": 400_000},
|
||||
)
|
||||
assert sig1 != sig2
|
||||
|
||||
def test_compression_threshold_change_busts_cache(self):
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
|
||||
sig1 = GatewayRunner._agent_config_signature(
|
||||
"m", runtime, [], "",
|
||||
cache_keys={"compression.threshold": 0.50},
|
||||
)
|
||||
sig2 = GatewayRunner._agent_config_signature(
|
||||
"m", runtime, [], "",
|
||||
cache_keys={"compression.threshold": 0.75},
|
||||
)
|
||||
assert sig1 != sig2
|
||||
|
||||
def test_compression_enabled_toggle_busts_cache(self):
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
|
||||
sig_on = GatewayRunner._agent_config_signature(
|
||||
"m", runtime, [], "",
|
||||
cache_keys={"compression.enabled": True},
|
||||
)
|
||||
sig_off = GatewayRunner._agent_config_signature(
|
||||
"m", runtime, [], "",
|
||||
cache_keys={"compression.enabled": False},
|
||||
)
|
||||
assert sig_on != sig_off
|
||||
|
||||
def test_cache_keys_key_order_does_not_matter(self):
|
||||
"""Signature must be stable regardless of dict key insertion order."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
|
||||
sig_a = GatewayRunner._agent_config_signature(
|
||||
"m", runtime, [], "",
|
||||
cache_keys={"model.context_length": 200_000, "compression.threshold": 0.5},
|
||||
)
|
||||
sig_b = GatewayRunner._agent_config_signature(
|
||||
"m", runtime, [], "",
|
||||
cache_keys={"compression.threshold": 0.5, "model.context_length": 200_000},
|
||||
)
|
||||
assert sig_a == sig_b
|
||||
|
||||
|
||||
class TestExtractCacheBustingConfig:
|
||||
"""Verify _extract_cache_busting_config pulls the documented subset of
|
||||
config values that must invalidate the cached agent on change."""
|
||||
|
||||
def test_reads_model_context_length(self):
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
out = GatewayRunner._extract_cache_busting_config(
|
||||
{"model": {"context_length": 272_000, "provider": "openrouter"}}
|
||||
)
|
||||
assert out["model.context_length"] == 272_000
|
||||
|
||||
def test_reads_compression_subkeys(self):
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
out = GatewayRunner._extract_cache_busting_config(
|
||||
{
|
||||
"compression": {
|
||||
"enabled": False,
|
||||
"threshold": 0.6,
|
||||
"target_ratio": 0.3,
|
||||
"protect_last_n": 25,
|
||||
"some_other_key": "ignored",
|
||||
}
|
||||
}
|
||||
)
|
||||
assert out["compression.enabled"] is False
|
||||
assert out["compression.threshold"] == 0.6
|
||||
assert out["compression.target_ratio"] == 0.3
|
||||
assert out["compression.protect_last_n"] == 25
|
||||
|
||||
def test_missing_keys_yield_none(self):
|
||||
"""Absent config keys must produce None values (still contribute to signature)."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
out = GatewayRunner._extract_cache_busting_config({})
|
||||
# Every documented cache-busting key must be present, even if None
|
||||
for section, key in GatewayRunner._CACHE_BUSTING_CONFIG_KEYS:
|
||||
assert f"{section}.{key}" in out
|
||||
assert out[f"{section}.{key}"] is None
|
||||
|
||||
def test_non_dict_section_treated_as_missing(self):
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
# compression is a string — should not crash, all compression.* keys None
|
||||
out = GatewayRunner._extract_cache_busting_config(
|
||||
{"compression": "broken", "model": {"context_length": 100_000}}
|
||||
)
|
||||
assert out["compression.enabled"] is None
|
||||
assert out["compression.threshold"] is None
|
||||
assert out["model.context_length"] == 100_000
|
||||
|
||||
def test_none_config_is_safe(self):
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
out = GatewayRunner._extract_cache_busting_config(None)
|
||||
for section, key in GatewayRunner._CACHE_BUSTING_CONFIG_KEYS:
|
||||
assert out[f"{section}.{key}"] is None
|
||||
|
||||
def test_full_round_trip_busts_cache_on_real_edit(self):
|
||||
"""End-to-end: simulate a config edit on main and verify the
|
||||
extracted cache_keys change produces a new signature."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runtime = {"api_key": "k", "base_url": "u", "provider": "p"}
|
||||
cfg_before = {
|
||||
"model": {"context_length": 200_000},
|
||||
"compression": {"threshold": 0.50, "enabled": True},
|
||||
}
|
||||
cfg_after = {
|
||||
"model": {"context_length": 200_000},
|
||||
"compression": {"threshold": 0.75, "enabled": True}, # user raised threshold
|
||||
}
|
||||
|
||||
sig_before = GatewayRunner._agent_config_signature(
|
||||
"m", runtime, [], "",
|
||||
cache_keys=GatewayRunner._extract_cache_busting_config(cfg_before),
|
||||
)
|
||||
sig_after = GatewayRunner._agent_config_signature(
|
||||
"m", runtime, [], "",
|
||||
cache_keys=GatewayRunner._extract_cache_busting_config(cfg_after),
|
||||
)
|
||||
assert sig_before != sig_after, (
|
||||
"Editing compression.threshold in config.yaml must bust the "
|
||||
"gateway's cached agent so the new threshold takes effect."
|
||||
)
|
||||
|
||||
|
||||
class TestAgentCacheLifecycle:
|
||||
"""End-to-end cache behavior with real AIAgent construction."""
|
||||
|
||||
Reference in New Issue
Block a user