test(context_compressor): regression test for PR #17025 tail-protection off-by-one

When len(messages) <= protect_tail_count and a token budget is set, the
previous formula min(protect_tail_count, len(result) - 1) under-protected
the tail by one, allowing the oldest message to be summarized.

The test fails on the buggy formula (pruned == 1) and passes on the fix
(pruned == 0, tool content preserved verbatim).
This commit is contained in:
Teknium
2026-04-30 19:59:36 -07:00
parent b194617d00
commit 8b7b074df9

View File

@@ -1143,6 +1143,34 @@ class TestTokenBudgetTailProtection:
# At least one old tool result should have been pruned
assert pruned >= 1
def test_prune_short_conv_protects_entire_tail(self, budget_compressor):
"""Regression guard for PR #17025.
When ``len(messages) <= protect_tail_count`` and a token budget is
also set, every message must be protected. The previous code used
``min(protect_tail_count, len(result) - 1)`` which capped the floor
one below the full length, leaving the oldest message eligible for
pruning.
"""
c = budget_compressor
# 4 messages, protect_tail_count=4 -- nothing should be pruned.
# Oldest message is a large tool result; on the buggy path it falls
# outside the protected window and gets summarized.
messages = [
{"role": "tool", "content": "x" * 5000, "tool_call_id": "c0"},
{"role": "assistant", "content": "ack"},
{"role": "user", "content": "recent"},
{"role": "assistant", "content": "reply"},
]
result, pruned = c._prune_old_tool_results(
messages,
protect_tail_count=4,
protect_tail_tokens=1_000_000, # budget large enough to protect all
)
assert pruned == 0
# Tool result at index 0 must be preserved verbatim
assert result[0]["content"] == "x" * 5000
def test_prune_without_token_budget_uses_message_count(self, budget_compressor):
"""Without protect_tail_tokens, falls back to message-count behavior."""
c = budget_compressor