fix(agent): strip unterminated <think> blocks from visible content

Providers served via NIM (MiniMax M2.7, some Moonshot/DeepSeek proxies) sometimes drop the closing </think> tag, leaving raw reasoning in the assistant's content field.  _strip_think_blocks()'s closed-pair regex is non-greedy so it only matches complete blocks — any orphan <think>...EOF survived the stripper and leaked to users (#8878, #9568, #10408).

Adds an unterminated-tag pass that fires when an open reasoning tag sits at a block boundary (start of text or after a newline) with no matching close.  Everything from that tag to end of string is stripped.  The block-boundary check mirrors gateway/stream_consumer.py's filter so models that mention <think> in prose are not over-stripped.

Also makes the closed-pair regexes consistently case-insensitive so <THINK>...</THINK> and <Thinking>...</Thinking> are handled uniformly — previously the mixed-case open tag would bypass the closed-pair pass and be caught by the unterminated-tag pass, taking trailing visible content with it.

6 new regression tests in TestStripThinkBlocks covering: unterminated <think>, unterminated <thought>, multi-line unterminated, line-start orphan with preserved prefix, prose-mention non-regression, mixed-case closed pairs.

The implementation is inspired by @luinbytes's PR #10408 report of the NIM/MiniMax symptom.  This commit does not include the 💭/🧠 emoji regexes from that PR — those glyphs are Hermes CLI display decorations, not model content markers.
This commit is contained in:
Teknium
2026-04-18 19:17:52 -07:00
committed by Teknium
parent 79c5a381c5
commit 9489d1577d
2 changed files with 93 additions and 7 deletions

View File

@@ -2172,17 +2172,49 @@ class AIAgent:
return bool(cleaned.strip())
def _strip_think_blocks(self, content: str) -> str:
"""Remove reasoning/thinking blocks from content, returning only visible text."""
"""Remove reasoning/thinking blocks from content, returning only visible text.
Handles four cases:
1. Closed tag pairs (``<think>…</think>``) — the common path when
the provider emits complete reasoning blocks.
2. Unterminated open tag at a block boundary (start of text or
after a newline) — e.g. MiniMax M2.7 / NIM endpoints where the
closing tag is dropped. Everything from the open tag to end
of string is stripped. The block-boundary check mirrors
``gateway/stream_consumer.py``'s filter so models that mention
``<think>`` in prose aren't over-stripped.
3. Stray orphan open/close tags that slip through.
4. Tag variants: ``<think>``, ``<thinking>``, ``<reasoning>``,
``<REASONING_SCRATCHPAD>``, ``<thought>`` (Gemma 4), all
case-insensitive.
"""
if not content:
return ""
# Strip all reasoning tag variants: <think>, <thinking>, <THINKING>,
# <reasoning>, <REASONING_SCRATCHPAD>, <thought> (Gemma 4)
content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
# 1. Closed tag pairs — case-insensitive for all variants so
# mixed-case tags (<THINK>, <Thinking>) don't slip through to
# the unterminated-tag pass and take trailing content with them.
content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL | re.IGNORECASE)
content = re.sub(r'<thinking>.*?</thinking>', '', content, flags=re.DOTALL | re.IGNORECASE)
content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL)
content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL)
content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL | re.IGNORECASE)
content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL | re.IGNORECASE)
content = re.sub(r'<thought>.*?</thought>', '', content, flags=re.DOTALL | re.IGNORECASE)
content = re.sub(r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\s*', '', content, flags=re.IGNORECASE)
# 2. Unterminated reasoning block — open tag at a block boundary
# (start of text, or after a newline) with no matching close.
# Strip from the tag to end of string. Fixes #8878 / #9568
# (MiniMax M2.7 leaking raw reasoning into assistant content).
content = re.sub(
r'(?:^|\n)[ \t]*<(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)\b[^>]*>.*$',
'',
content,
flags=re.DOTALL | re.IGNORECASE,
)
# 3. Stray orphan open/close tags that slipped through.
content = re.sub(
r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\s*',
'',
content,
flags=re.IGNORECASE,
)
return content
@staticmethod

View File

@@ -317,6 +317,60 @@ class TestStripThinkBlocks:
result = agent._strip_think_blocks("<thought>orphaned reasoning without close")
assert "<thought>" not in result
# ─── Unterminated-block coverage (#8878, #9568, #10408) ──────────────
# Reasoning models served via NIM / MiniMax M2.7 frequently drop the
# closing tag, leaking raw reasoning into assistant content. The open
# tag appears at a block boundary (start of text or after a newline);
# everything from that tag to end-of-string is stripped.
def test_unterminated_think_block_content_stripped(self, agent):
"""Content after unterminated <think> is fully stripped."""
result = agent._strip_think_blocks("<think>orphaned reasoning without close")
assert "orphaned reasoning" not in result
assert result.strip() == ""
def test_unterminated_thought_block_content_stripped(self, agent):
"""Gemma-style <thought> with no close is fully stripped."""
result = agent._strip_think_blocks("<thought>orphaned reasoning without close")
assert "orphaned reasoning" not in result
assert result.strip() == ""
def test_unterminated_multiline_block_stripped(self, agent):
"""Multi-line unterminated blocks are stripped in full."""
result = agent._strip_think_blocks(
"<think>\nmulti\nline\nreasoning\nthat never closes"
)
assert "multi" not in result
assert "never closes" not in result
def test_unterminated_block_after_answer_preserves_prefix(self, agent):
"""Visible answer before a line-starting unterminated tag is kept."""
result = agent._strip_think_blocks(
"Answer is 42.\n<think>actually let me reconsider"
)
assert "Answer is 42." in result
assert "reconsider" not in result
def test_inline_think_mention_in_prose_not_over_stripped(self, agent):
"""Mid-line `<think>` mentioned in prose must not swallow the rest
of the content (the block-boundary check prevents this)."""
text = "Use the <think> tag like this in your prose."
result = agent._strip_think_blocks(text)
# Block-boundary check prevents unterminated-strip from firing
assert "prose" in result
assert "Use the" in result
def test_mixed_case_closed_pair_stripped(self, agent):
"""Mixed-case variants <THINK>…</THINK>, <Thinking>…</Thinking> are
handled by case-insensitive closed-pair regex, so the trailing
content is preserved."""
result = agent._strip_think_blocks("<THINK>upper</THINK>final")
assert "upper" not in result
assert "final" in result
result = agent._strip_think_blocks("<Thinking>mixed</Thinking>final")
assert "mixed" not in result
assert "final" in result
class TestExtractReasoning:
def test_reasoning_field(self, agent):