fix(agent): strip unterminated <think> blocks from visible content
Providers served via NIM (MiniMax M2.7, some Moonshot/DeepSeek proxies) sometimes drop the closing </think> tag, leaving raw reasoning in the assistant's content field. _strip_think_blocks()'s closed-pair regex is non-greedy so it only matches complete blocks — any orphan <think>...EOF survived the stripper and leaked to users (#8878, #9568, #10408). Adds an unterminated-tag pass that fires when an open reasoning tag sits at a block boundary (start of text or after a newline) with no matching close. Everything from that tag to end of string is stripped. The block-boundary check mirrors gateway/stream_consumer.py's filter so models that mention <think> in prose are not over-stripped. Also makes the closed-pair regexes consistently case-insensitive so <THINK>...</THINK> and <Thinking>...</Thinking> are handled uniformly — previously the mixed-case open tag would bypass the closed-pair pass and be caught by the unterminated-tag pass, taking trailing visible content with it. 6 new regression tests in TestStripThinkBlocks covering: unterminated <think>, unterminated <thought>, multi-line unterminated, line-start orphan with preserved prefix, prose-mention non-regression, mixed-case closed pairs. The implementation is inspired by @luinbytes's PR #10408 report of the NIM/MiniMax symptom. This commit does not include the 💭/🧠 emoji regexes from that PR — those glyphs are Hermes CLI display decorations, not model content markers.
This commit is contained in:
46
run_agent.py
46
run_agent.py
@@ -2172,17 +2172,49 @@ class AIAgent:
|
||||
return bool(cleaned.strip())
|
||||
|
||||
def _strip_think_blocks(self, content: str) -> str:
|
||||
"""Remove reasoning/thinking blocks from content, returning only visible text."""
|
||||
"""Remove reasoning/thinking blocks from content, returning only visible text.
|
||||
|
||||
Handles four cases:
|
||||
1. Closed tag pairs (``<think>…</think>``) — the common path when
|
||||
the provider emits complete reasoning blocks.
|
||||
2. Unterminated open tag at a block boundary (start of text or
|
||||
after a newline) — e.g. MiniMax M2.7 / NIM endpoints where the
|
||||
closing tag is dropped. Everything from the open tag to end
|
||||
of string is stripped. The block-boundary check mirrors
|
||||
``gateway/stream_consumer.py``'s filter so models that mention
|
||||
``<think>`` in prose aren't over-stripped.
|
||||
3. Stray orphan open/close tags that slip through.
|
||||
4. Tag variants: ``<think>``, ``<thinking>``, ``<reasoning>``,
|
||||
``<REASONING_SCRATCHPAD>``, ``<thought>`` (Gemma 4), all
|
||||
case-insensitive.
|
||||
"""
|
||||
if not content:
|
||||
return ""
|
||||
# Strip all reasoning tag variants: <think>, <thinking>, <THINKING>,
|
||||
# <reasoning>, <REASONING_SCRATCHPAD>, <thought> (Gemma 4)
|
||||
content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
|
||||
# 1. Closed tag pairs — case-insensitive for all variants so
|
||||
# mixed-case tags (<THINK>, <Thinking>) don't slip through to
|
||||
# the unterminated-tag pass and take trailing content with them.
|
||||
content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL | re.IGNORECASE)
|
||||
content = re.sub(r'<thinking>.*?</thinking>', '', content, flags=re.DOTALL | re.IGNORECASE)
|
||||
content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL)
|
||||
content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL)
|
||||
content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL | re.IGNORECASE)
|
||||
content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL | re.IGNORECASE)
|
||||
content = re.sub(r'<thought>.*?</thought>', '', content, flags=re.DOTALL | re.IGNORECASE)
|
||||
content = re.sub(r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\s*', '', content, flags=re.IGNORECASE)
|
||||
# 2. Unterminated reasoning block — open tag at a block boundary
|
||||
# (start of text, or after a newline) with no matching close.
|
||||
# Strip from the tag to end of string. Fixes #8878 / #9568
|
||||
# (MiniMax M2.7 leaking raw reasoning into assistant content).
|
||||
content = re.sub(
|
||||
r'(?:^|\n)[ \t]*<(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)\b[^>]*>.*$',
|
||||
'',
|
||||
content,
|
||||
flags=re.DOTALL | re.IGNORECASE,
|
||||
)
|
||||
# 3. Stray orphan open/close tags that slipped through.
|
||||
content = re.sub(
|
||||
r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\s*',
|
||||
'',
|
||||
content,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
return content
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -317,6 +317,60 @@ class TestStripThinkBlocks:
|
||||
result = agent._strip_think_blocks("<thought>orphaned reasoning without close")
|
||||
assert "<thought>" not in result
|
||||
|
||||
# ─── Unterminated-block coverage (#8878, #9568, #10408) ──────────────
|
||||
# Reasoning models served via NIM / MiniMax M2.7 frequently drop the
|
||||
# closing tag, leaking raw reasoning into assistant content. The open
|
||||
# tag appears at a block boundary (start of text or after a newline);
|
||||
# everything from that tag to end-of-string is stripped.
|
||||
|
||||
def test_unterminated_think_block_content_stripped(self, agent):
|
||||
"""Content after unterminated <think> is fully stripped."""
|
||||
result = agent._strip_think_blocks("<think>orphaned reasoning without close")
|
||||
assert "orphaned reasoning" not in result
|
||||
assert result.strip() == ""
|
||||
|
||||
def test_unterminated_thought_block_content_stripped(self, agent):
|
||||
"""Gemma-style <thought> with no close is fully stripped."""
|
||||
result = agent._strip_think_blocks("<thought>orphaned reasoning without close")
|
||||
assert "orphaned reasoning" not in result
|
||||
assert result.strip() == ""
|
||||
|
||||
def test_unterminated_multiline_block_stripped(self, agent):
|
||||
"""Multi-line unterminated blocks are stripped in full."""
|
||||
result = agent._strip_think_blocks(
|
||||
"<think>\nmulti\nline\nreasoning\nthat never closes"
|
||||
)
|
||||
assert "multi" not in result
|
||||
assert "never closes" not in result
|
||||
|
||||
def test_unterminated_block_after_answer_preserves_prefix(self, agent):
|
||||
"""Visible answer before a line-starting unterminated tag is kept."""
|
||||
result = agent._strip_think_blocks(
|
||||
"Answer is 42.\n<think>actually let me reconsider"
|
||||
)
|
||||
assert "Answer is 42." in result
|
||||
assert "reconsider" not in result
|
||||
|
||||
def test_inline_think_mention_in_prose_not_over_stripped(self, agent):
|
||||
"""Mid-line `<think>` mentioned in prose must not swallow the rest
|
||||
of the content (the block-boundary check prevents this)."""
|
||||
text = "Use the <think> tag like this in your prose."
|
||||
result = agent._strip_think_blocks(text)
|
||||
# Block-boundary check prevents unterminated-strip from firing
|
||||
assert "prose" in result
|
||||
assert "Use the" in result
|
||||
|
||||
def test_mixed_case_closed_pair_stripped(self, agent):
|
||||
"""Mixed-case variants <THINK>…</THINK>, <Thinking>…</Thinking> are
|
||||
handled by case-insensitive closed-pair regex, so the trailing
|
||||
content is preserved."""
|
||||
result = agent._strip_think_blocks("<THINK>upper</THINK>final")
|
||||
assert "upper" not in result
|
||||
assert "final" in result
|
||||
result = agent._strip_think_blocks("<Thinking>mixed</Thinking>final")
|
||||
assert "mixed" not in result
|
||||
assert "final" in result
|
||||
|
||||
|
||||
class TestExtractReasoning:
|
||||
def test_reasoning_field(self, agent):
|
||||
|
||||
Reference in New Issue
Block a user