From e553f6f3e4c61adc529615caea07f2a50a81f555 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Mon, 27 Apr 2026 14:32:20 -0400
Subject: [PATCH] fix(memory): narrow scrub surface to known wrapper boundaries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewer pushback on the original boundary-hardening commits — three
overreach points pulled plugin-specific policy into shared core paths:

1. gateway/run.py hardcoded a '## Honcho Context' literal split for
   vision-LLM output.  Plugin-format heading in framework code; could
   truncate legitimate output naturally containing that header.
   Drop the literal split; keep generic sanitize_context (the wrapper
   strip is plugin-agnostic).  Plugin-specific cleanup belongs at the
   provider boundary, not the shared gateway path.

2. run_agent.run_conversation scrubbed user_message and
   persist_user_message before the conversation loop.  User text is
   sacred — if a user types a literal <memory-context> tag we must
   not silently delete it.  The producer (build_memory_context_block)
   is the only legitimate emitter; user input should never need the
   reverse op.

3. _build_assistant_message scrubbed model output before persistence.
   Same hazard: would silently mutate legitimate documentation/code
   the model emits containing the literal markers.  The streaming
   scrubber catches real leaks delta-by-delta before content is
   concatenated; persist-time scrub was redundant belt-and-suspenders.

4. _fire_stream_delta stripped leading newlines from every delta unless
   a paragraph break flag was set.  Mid-stream '\n' is legitimate
   markdown — lists, code fences, paragraph breaks — and chunk
   boundaries are arbitrary.  Narrow lstrip to the very first delta
   of the stream only (so stale provider preamble still gets cleaned
   on turn start, but mid-stream formatting survives).

Plus: build_memory_context_block now logs a warning when its defensive
sanitize_context strips something — surfaces buggy providers returning
pre-wrapped text instead of silently double-fencing.

Net architectural change: scrub surface collapses from 8 sites to 3
(StreamingContextScrubber on output deltas, plugin→backend send,
build_memory_context_block input-validation).  Plugin-specific strings
stay out of shared runtime paths.  User input and persisted assistant
output are no longer mutated.

Tests: rescoped TestMemoryContextSanitization (helper-correctness only,
no source-inspection of removed call sites), updated vision tests to
drop '## Honcho Context' literal-split assertions, updated
_build_assistant_message persistence test to assert preservation.
Added: cross-turn scrubber reset, build_memory_context_block warn-on-
violation, mid-stream newline preservation (plain + code fence).
---
 agent/memory_manager.py                       | 10 +++
 gateway/run.py                                | 11 ++--
 run_agent.py                                  | 20 +++---
 .../agent/test_streaming_context_scrubber.py  | 61 +++++++++++++++++++
 tests/gateway/test_vision_memory_leak.py      | 41 ++++---------
 tests/run_agent/test_run_agent.py             | 46 +++++++-------
 .../test_run_agent_codex_responses.py         | 40 ++++++++++++
 7 files changed, 159 insertions(+), 70 deletions(-)
diff --git a/agent/memory_manager.py b/agent/memory_manager.py
index 953f41b3c..fb1c4d639 100644
--- a/agent/memory_manager.py
+++ b/agent/memory_manager.py
@@ -179,10 +179,20 @@ def build_memory_context_block(raw_context: str) -> str:
 
     The fence prevents the model from treating recalled context as user
     discourse.  Injected at API-call time only — never persisted.
+
+    A provider returning text that already contains the wrapper is a
+    contract violation (would produce nested fences).  We strip defensively
+    and warn so the buggy provider surfaces in logs instead of silently
+    double-fencing.
     """
     if not raw_context or not raw_context.strip():
         return ""
     clean = sanitize_context(raw_context)
+    if clean != raw_context:
+        logger.warning(
+            "memory provider returned text containing <memory-context> wrapper; "
+            "stripped before re-fencing (provider contract violation)"
+        )
     return (
         "<memory-context>\n"
         "[System note: The following is recalled memory context, "
diff --git a/gateway/run.py b/gateway/run.py
index 9fa4298e9..cbb4ae00d 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -8502,14 +8502,11 @@ class GatewayRunner:
                 result = json.loads(result_json)
                 if result.get("success"):
                     description = result.get("analysis", "")
-                    # The auxiliary vision LLM can echo injected system-prompt
-                    # memory context back into its output (#5719).  Scrub any
-                    # <memory-context> fences and the "## Honcho Context"
-                    # section before the description lands in a user-visible
-                    # message.
+                    # Vision auxiliary LLM can echo the injected system-prompt
+                    # memory-context wrapper back into its output (#5719).
+                    # sanitize_context strips the fenced wrapper; plugin-specific
+                    # header cleanup belongs at the provider boundary, not here.
                     description = sanitize_context(description)
-                    if "## Honcho Context" in description:
-                        description = description.split("## Honcho Context", 1)[0].rstrip()
                     enriched_parts.append(
                         f"[The user sent an image~ Here's what I can see:\n{description}]\n"
                         f"[If you need a closer look, use vision_analyze with "
diff --git a/run_agent.py b/run_agent.py
index 7850ac15c..37f162761 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -6102,7 +6102,13 @@ class AIAgent:
             else:
                 # Defensive: legacy callers without the scrubber attribute.
                 text = sanitize_context(text)
-            if not prepended_break:
+            # Strip leading newlines only on the very first delta of the stream,
+            # and only when we didn't just prepend a paragraph break ourselves.
+            # Mid-stream "\n" is legitimate markdown (lists, code, paragraphs)
+            # and must survive — chunk boundaries are arbitrary.
+            if not prepended_break and not getattr(
+                self, "_current_streamed_assistant_text", ""
+            ):
                 text = text.lstrip("\n")
         if not text:
             return
@@ -8077,7 +8083,7 @@ class AIAgent:
         # API replay, session transcript, gateway delivery, CLI display,
         # compression, title generation.
         if isinstance(_san_content, str) and _san_content:
-            _san_content = sanitize_context(self._strip_think_blocks(_san_content)).strip()
+            _san_content = self._strip_think_blocks(_san_content).strip()
 
         msg = {
             "role": "assistant",
@@ -9629,16 +9635,6 @@ class AIAgent:
         if isinstance(persist_user_message, str):
             persist_user_message = _sanitize_surrogates(persist_user_message)
 
-        # Strip leaked <memory-context> blocks from user input.  When Honcho's
-        # saveMessages persists a turn that included injected context, the block
-        # can reappear in the next turn's user message via message history.
-        # Stripping here prevents stale memory tags from leaking into the
-        # conversation and being visible to the user or the model as user text.
-        if isinstance(user_message, str):
-            user_message = sanitize_context(user_message)
-        if isinstance(persist_user_message, str):
-            persist_user_message = sanitize_context(persist_user_message)
-
         # Store stream callback for _interruptible_api_call to pick up
         self._stream_callback = stream_callback
         self._persist_user_message_idx = None
diff --git a/tests/agent/test_streaming_context_scrubber.py b/tests/agent/test_streaming_context_scrubber.py
index 2dbb17f42..13888dfe7 100644
--- a/tests/agent/test_streaming_context_scrubber.py
+++ b/tests/agent/test_streaming_context_scrubber.py
@@ -148,3 +148,64 @@ class TestSanitizeContextUnchanged:
         )
         out = sanitize_context(leaked).strip()
         assert out == "Visible"
+
+
+class TestStreamingContextScrubberCrossTurn:
+    """A scrubber instance is reused across turns (per agent).  reset() must
+    clear any held state so a partial-tag tail from turn N doesn't bleed
+    into turn N+1's first delta."""
+
+    def test_reset_clears_held_partial_tag(self):
+        s = StreamingContextScrubber()
+        # Feed a partial open-tag prefix that gets held back as buffer.
+        out_turn_1 = s.feed("answer<memo")
+        assert out_turn_1 == "answer"
+
+        # Reset for next turn — buffer must clear.
+        s.reset()
+
+        # New turn: plain text starting with a "<m" must NOT be treated as
+        # the continuation of the held "<memo".
+        out_turn_2 = s.feed("<marker>fresh content")
+        assert out_turn_2 == "<marker>fresh content"
+
+    def test_reset_clears_in_span_state(self):
+        s = StreamingContextScrubber()
+        s.feed("text<memory-context>secret-tail")
+        # Mid-span state held — without reset, subsequent text would be
+        # discarded until we see </memory-context>.
+        s.reset()
+        out = s.feed("post-reset visible text")
+        assert out == "post-reset visible text"
+
+
+class TestBuildMemoryContextBlockWarnsOnViolation:
+    """Providers must return raw context — not pre-wrapped.  When they do,
+    we strip and warn so the buggy provider surfaces."""
+
+    def test_provider_emitting_wrapper_warns(self, caplog):
+        import logging
+        from agent.memory_manager import build_memory_context_block
+
+        prewrapped = (
+            "<memory-context>\n"
+            "[System note: ...]\n\n"
+            "real fact\n"
+            "</memory-context>"
+        )
+        with caplog.at_level(logging.WARNING, logger="agent.memory_manager"):
+            out = build_memory_context_block(prewrapped)
+
+        assert any("contract violation" in rec.message for rec in caplog.records)
+        assert out.count("<memory-context>") == 1
+        assert out.count("</memory-context>") == 1
+
+    def test_clean_provider_output_does_not_warn(self, caplog):
+        import logging
+        from agent.memory_manager import build_memory_context_block
+
+        with caplog.at_level(logging.WARNING, logger="agent.memory_manager"):
+            out = build_memory_context_block("plain fact about user")
+
+        assert not any("contract violation" in rec.message for rec in caplog.records)
+        assert "plain fact about user" in out
diff --git a/tests/gateway/test_vision_memory_leak.py b/tests/gateway/test_vision_memory_leak.py
index 5f6f0a776..505b78117 100644
--- a/tests/gateway/test_vision_memory_leak.py
+++ b/tests/gateway/test_vision_memory_leak.py
@@ -1,13 +1,12 @@
 """Tests for _enrich_message_with_vision — regression for #5719.
 
-The auxiliary vision LLM can echo system-prompt Honcho memory back into
-its analysis output. When that echo reaches the user as the enriched
-image description, recalled memory context (personal facts, dialectic
-output) surfaces into a user-visible message.
+The auxiliary vision LLM can echo system-prompt memory-context back into
+its analysis output.  The boundary fix in gateway/run.py runs the generic
+sanitize_context helper over the description so the fenced wrapper and
+its system-note are removed before the description reaches the user.
 
-The boundary fix in gateway/run.py strips both <memory-context>...</memory-context>
-fenced blocks AND any "## Honcho Context" section from vision descriptions
-before they're embedded into the enriched user message.
+Plugin-specific header cleanup (e.g. "## Honcho Context") belongs at the
+provider boundary, not in this shared gateway path.
 """
 
 import asyncio
@@ -43,22 +42,6 @@ class TestEnrichMessageWithVision:
             out = _run(gateway_runner._enrich_message_with_vision("caption", ["/tmp/img.jpg"]))
         assert "sunset over the ocean" in out
 
-    def test_honcho_context_header_stripped(self, gateway_runner):
-        """'## Honcho Context' section and everything after is removed."""
-        leaked = (
-            "A photograph of a sunset.\n\n"
-            "## Honcho Context\n"
-            "User prefers concise answers, works at Plastic Labs,\n"
-            "uses OPSEC pseudonyms.\n"
-        )
-        fake_result = json.dumps({"success": True, "analysis": leaked})
-        with patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=fake_result)):
-            out = _run(gateway_runner._enrich_message_with_vision("caption", ["/tmp/img.jpg"]))
-        assert "sunset" in out
-        assert "Honcho Context" not in out
-        assert "Plastic Labs" not in out
-        assert "OPSEC" not in out
-
     def test_memory_context_fence_stripped(self, gateway_runner):
         """<memory-context>...</memory-context> fenced block is scrubbed."""
         leaked = (
@@ -77,23 +60,21 @@ class TestEnrichMessageWithVision:
         assert "User details and preferences" not in out
         assert "System note" not in out
 
-    def test_both_leak_patterns_together_stripped(self, gateway_runner):
-        """A vision output containing both leak shapes is fully scrubbed."""
+    def test_fenced_leak_stripped_plugin_header_preserved(self, gateway_runner):
+        """The fenced wrapper is stripped; plugin-specific text outside the
+        fence (e.g. a "## Honcho Context" header) is left to the plugin layer.
+        Gateway core stays plugin-agnostic."""
         leaked = (
             "<memory-context>\n"
             "[System note: The following is recalled memory context, NOT new "
             "user input. Treat as informational background data.]\n"
             "fenced leak\n"
             "</memory-context>\n"
-            "A photograph of a dog.\n\n"
-            "## Honcho Context\n"
-            "header leak\n"
+            "A photograph of a dog."
         )
         fake_result = json.dumps({"success": True, "analysis": leaked})
         with patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=fake_result)):
             out = _run(gateway_runner._enrich_message_with_vision("caption", ["/tmp/img.jpg"]))
         assert "photograph of a dog" in out
         assert "fenced leak" not in out
-        assert "header leak" not in out
-        assert "Honcho Context" not in out
         assert "<memory-context>" not in out
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index f29cf73e2..eb2b47f87 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -1441,19 +1441,23 @@ class TestBuildAssistantMessage:
         result = agent._build_assistant_message(msg, "stop")
         assert result["content"] == "No thinking here."
 
-    def test_memory_context_stripped_from_stored_content(self, agent):
-        msg = _mock_assistant_msg(
-            content=(
-                "<memory-context>\n"
-                "[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
-                "## Honcho Context\n"
-                "stale memory\n"
-                "</memory-context>\n\n"
-                "Visible answer"
-            )
+    def test_memory_context_in_stored_content_is_preserved(self, agent):
+        """`_build_assistant_message` must not silently mutate model output
+        containing literal <memory-context> markers — that's legitimate text
+        (e.g. documentation, code) that the model may emit.  Streaming-path
+        leak prevention is handled by StreamingContextScrubber upstream."""
+        original = (
+            "<memory-context>\n"
+            "[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
+            "## Honcho Context\n"
+            "stale memory\n"
+            "</memory-context>\n\n"
+            "Visible answer"
         )
+        msg = _mock_assistant_msg(content=original)
         result = agent._build_assistant_message(msg, "stop")
-        assert result["content"] == "Visible answer"
+        assert "<memory-context>" in result["content"]
+        assert "Visible answer" in result["content"]
 
     def test_unterminated_think_block_stripped(self, agent):
         """Unterminated <think> block (MiniMax / NIM dropped close tag) is
@@ -4767,21 +4771,21 @@ class TestDeadRetryCode:
 
 
 class TestMemoryContextSanitization:
-    """run_conversation() must strip leaked <memory-context> blocks from user input."""
+    """sanitize_context() helper correctness — used at provider boundaries."""
 
-    def test_memory_context_stripped_from_user_message(self):
-        """Verify that <memory-context> blocks are removed before the message
-        enters the conversation loop — prevents stale Honcho injection from
-        leaking into user text."""
+    def test_user_message_is_not_mutated_by_run_conversation(self):
+        """User input must reach run_conversation untouched — if a user types
+        a literal <memory-context> tag we don't silently delete their text.
+        The streaming scrubber + plugin-side scrub cover real leak paths."""
         import inspect
         src = inspect.getsource(AIAgent.run_conversation)
-        # The sanitize_context call must appear in run_conversation's preamble
-        assert "sanitize_context(user_message)" in src
-        assert "sanitize_context(persist_user_message)" in src
+        assert "sanitize_context(user_message)" not in src
+        assert "sanitize_context(persist_user_message)" not in src
 
     def test_sanitize_context_strips_full_block(self):
-        """End-to-end: a user message with an embedded memory-context block
-        is cleaned to just the actual user text."""
+        """Helper-level: a string with an embedded memory-context block is
+        cleaned to just the surrounding text.  Used by build_memory_context_block
+        (input-validation) and by plugins on their own backend boundary."""
         from agent.memory_manager import sanitize_context
         user_text = "how is the honcho working"
         injected = (
diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py
index 74dc64c28..eb95d108d 100644
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@@ -1209,6 +1209,46 @@ def test_stream_delta_scrubber_resets_between_turns(monkeypatch):
     assert "".join(observed) == "clean new turn text"
 
 
+def test_stream_delta_preserves_mid_stream_leading_newlines(monkeypatch):
+    """Mid-stream leading newlines must survive — they are legitimate
+    markdown (lists, code fences, paragraph breaks).  Stripping them
+    based on chunk boundaries silently breaks formatting.
+
+    Only the very first delta of a stream gets leading-newlines stripped
+    (so stale provider preamble doesn't leak); after that, deltas are
+    emitted verbatim.
+    """
+    agent = _build_agent(monkeypatch)
+    observed = []
+    agent.stream_delta_callback = observed.append
+
+    # First delta delivers text — strips its own leading "\n" once.
+    agent._fire_stream_delta("\nHere is a list:")
+    # Second delta starts with "\n- item" — must NOT be stripped.
+    agent._fire_stream_delta("\n- first")
+    agent._fire_stream_delta("\n- second")
+
+    combined = "".join(observed)
+    assert combined == "Here is a list:\n- first\n- second"
+
+
+def test_stream_delta_preserves_code_fence_newlines(monkeypatch):
+    """Code blocks span multiple deltas.  A "\\n```python\\n" boundary
+    is the canonical case where stripping leading newlines corrupts output."""
+    agent = _build_agent(monkeypatch)
+    observed = []
+    agent.stream_delta_callback = observed.append
+
+    agent._fire_stream_delta("Here is the code:")
+    agent._fire_stream_delta("\n```python\n")
+    agent._fire_stream_delta("print('hi')\n")
+    agent._fire_stream_delta("```\n")
+
+    combined = "".join(observed)
+    assert "```python\n" in combined
+    assert combined.startswith("Here is the code:\n```python\n")
+
+
 def test_run_conversation_codex_continues_after_commentary_phase_message(monkeypatch):
     agent = _build_agent(monkeypatch)
     responses = [