From 9489d1577db1b05e869b9d842ccdec3197f1954b Mon Sep 17 00:00:00 2001 From: Teknium Date: Sat, 18 Apr 2026 19:17:52 -0700 Subject: [PATCH] fix(agent): strip unterminated blocks from visible content MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Providers served via NIM (MiniMax M2.7, some Moonshot/DeepSeek proxies) sometimes drop the closing tag, leaving raw reasoning in the assistant's content field. _strip_think_blocks()'s closed-pair regex is non-greedy so it only matches complete blocks — any orphan ...EOF survived the stripper and leaked to users (#8878, #9568, #10408). Adds an unterminated-tag pass that fires when an open reasoning tag sits at a block boundary (start of text or after a newline) with no matching close. Everything from that tag to end of string is stripped. The block-boundary check mirrors gateway/stream_consumer.py's filter so models that mention in prose are not over-stripped. Also makes the closed-pair regexes consistently case-insensitive so ... and ... are handled uniformly — previously the mixed-case open tag would bypass the closed-pair pass and be caught by the unterminated-tag pass, taking trailing visible content with it. 6 new regression tests in TestStripThinkBlocks covering: unterminated , unterminated , multi-line unterminated, line-start orphan with preserved prefix, prose-mention non-regression, mixed-case closed pairs. The implementation is inspired by @luinbytes's PR #10408 report of the NIM/MiniMax symptom. This commit does not include the 💭/🧠 emoji regexes from that PR — those glyphs are Hermes CLI display decorations, not model content markers. --- run_agent.py | 46 ++++++++++++++++++++++---- tests/run_agent/test_run_agent.py | 54 +++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 7 deletions(-) diff --git a/run_agent.py b/run_agent.py index 0051fce63..33635ef2f 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2172,17 +2172,49 @@ class AIAgent: return bool(cleaned.strip()) def _strip_think_blocks(self, content: str) -> str: - """Remove reasoning/thinking blocks from content, returning only visible text.""" + """Remove reasoning/thinking blocks from content, returning only visible text. + + Handles four cases: + 1. Closed tag pairs (````) — the common path when + the provider emits complete reasoning blocks. + 2. Unterminated open tag at a block boundary (start of text or + after a newline) — e.g. MiniMax M2.7 / NIM endpoints where the + closing tag is dropped. Everything from the open tag to end + of string is stripped. The block-boundary check mirrors + ``gateway/stream_consumer.py``'s filter so models that mention + ```` in prose aren't over-stripped. + 3. Stray orphan open/close tags that slip through. + 4. Tag variants: ````, ````, ````, + ````, ```` (Gemma 4), all + case-insensitive. + """ if not content: return "" - # Strip all reasoning tag variants: , , , - # , , (Gemma 4) - content = re.sub(r'.*?', '', content, flags=re.DOTALL) + # 1. Closed tag pairs — case-insensitive for all variants so + # mixed-case tags (, ) don't slip through to + # the unterminated-tag pass and take trailing content with them. + content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) - content = re.sub(r'.*?', '', content, flags=re.DOTALL) - content = re.sub(r'.*?', '', content, flags=re.DOTALL) + content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) + content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) - content = re.sub(r'\s*', '', content, flags=re.IGNORECASE) + # 2. Unterminated reasoning block — open tag at a block boundary + # (start of text, or after a newline) with no matching close. + # Strip from the tag to end of string. Fixes #8878 / #9568 + # (MiniMax M2.7 leaking raw reasoning into assistant content). + content = re.sub( + r'(?:^|\n)[ \t]*<(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)\b[^>]*>.*$', + '', + content, + flags=re.DOTALL | re.IGNORECASE, + ) + # 3. Stray orphan open/close tags that slipped through. + content = re.sub( + r'\s*', + '', + content, + flags=re.IGNORECASE, + ) return content @staticmethod diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 86f95580f..bde5ed5aa 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -317,6 +317,60 @@ class TestStripThinkBlocks: result = agent._strip_think_blocks("orphaned reasoning without close") assert "" not in result + # ─── Unterminated-block coverage (#8878, #9568, #10408) ────────────── + # Reasoning models served via NIM / MiniMax M2.7 frequently drop the + # closing tag, leaking raw reasoning into assistant content. The open + # tag appears at a block boundary (start of text or after a newline); + # everything from that tag to end-of-string is stripped. + + def test_unterminated_think_block_content_stripped(self, agent): + """Content after unterminated is fully stripped.""" + result = agent._strip_think_blocks("orphaned reasoning without close") + assert "orphaned reasoning" not in result + assert result.strip() == "" + + def test_unterminated_thought_block_content_stripped(self, agent): + """Gemma-style with no close is fully stripped.""" + result = agent._strip_think_blocks("orphaned reasoning without close") + assert "orphaned reasoning" not in result + assert result.strip() == "" + + def test_unterminated_multiline_block_stripped(self, agent): + """Multi-line unterminated blocks are stripped in full.""" + result = agent._strip_think_blocks( + "\nmulti\nline\nreasoning\nthat never closes" + ) + assert "multi" not in result + assert "never closes" not in result + + def test_unterminated_block_after_answer_preserves_prefix(self, agent): + """Visible answer before a line-starting unterminated tag is kept.""" + result = agent._strip_think_blocks( + "Answer is 42.\nactually let me reconsider" + ) + assert "Answer is 42." in result + assert "reconsider" not in result + + def test_inline_think_mention_in_prose_not_over_stripped(self, agent): + """Mid-line `` mentioned in prose must not swallow the rest + of the content (the block-boundary check prevents this).""" + text = "Use the tag like this in your prose." + result = agent._strip_think_blocks(text) + # Block-boundary check prevents unterminated-strip from firing + assert "prose" in result + assert "Use the" in result + + def test_mixed_case_closed_pair_stripped(self, agent): + """Mixed-case variants , are + handled by case-insensitive closed-pair regex, so the trailing + content is preserved.""" + result = agent._strip_think_blocks("upperfinal") + assert "upper" not in result + assert "final" in result + result = agent._strip_think_blocks("mixedfinal") + assert "mixed" not in result + assert "final" in result + class TestExtractReasoning: def test_reasoning_field(self, agent):