🐛 fix(memory): require newline after context tag
This commit is contained in:
@@ -126,7 +126,10 @@ class StreamingContextScrubber:
|
|||||||
idx = self._find_boundary_open_tag(buf)
|
idx = self._find_boundary_open_tag(buf)
|
||||||
if idx == -1:
|
if idx == -1:
|
||||||
# No open tag — hold back a potential partial open tag
|
# No open tag — hold back a potential partial open tag
|
||||||
held = self._max_partial_suffix(buf, self._OPEN_TAG)
|
held = (
|
||||||
|
self._max_pending_open_suffix(buf)
|
||||||
|
or self._max_partial_suffix(buf, self._OPEN_TAG)
|
||||||
|
)
|
||||||
if held:
|
if held:
|
||||||
self._append_visible(out, buf[:-held])
|
self._append_visible(out, buf[:-held])
|
||||||
self._buf = buf[-held:]
|
self._buf = buf[-held:]
|
||||||
@@ -179,10 +182,25 @@ class StreamingContextScrubber:
|
|||||||
idx = buf_lower.find(self._OPEN_TAG, search_start)
|
idx = buf_lower.find(self._OPEN_TAG, search_start)
|
||||||
if idx == -1:
|
if idx == -1:
|
||||||
return -1
|
return -1
|
||||||
if self._is_block_boundary(buf, idx):
|
if self._is_block_boundary(buf, idx) and self._has_block_opener_suffix(buf, idx):
|
||||||
return idx
|
return idx
|
||||||
search_start = idx + 1
|
search_start = idx + 1
|
||||||
|
|
||||||
|
def _max_pending_open_suffix(self, buf: str) -> int:
|
||||||
|
"""Hold a complete boundary tag until the following char confirms it."""
|
||||||
|
if not buf.lower().endswith(self._OPEN_TAG):
|
||||||
|
return 0
|
||||||
|
idx = len(buf) - len(self._OPEN_TAG)
|
||||||
|
if not self._is_block_boundary(buf, idx):
|
||||||
|
return 0
|
||||||
|
return len(self._OPEN_TAG)
|
||||||
|
|
||||||
|
def _has_block_opener_suffix(self, buf: str, idx: int) -> bool:
|
||||||
|
after_idx = idx + len(self._OPEN_TAG)
|
||||||
|
if after_idx >= len(buf):
|
||||||
|
return False
|
||||||
|
return buf[after_idx] in "\r\n"
|
||||||
|
|
||||||
def _is_block_boundary(self, buf: str, idx: int) -> bool:
|
def _is_block_boundary(self, buf: str, idx: int) -> bool:
|
||||||
if idx == 0:
|
if idx == 0:
|
||||||
return self._at_block_boundary
|
return self._at_block_boundary
|
||||||
|
|||||||
@@ -73,7 +73,18 @@ class TestStreamingContextScrubberBasics:
|
|||||||
s = StreamingContextScrubber()
|
s = StreamingContextScrubber()
|
||||||
out = (
|
out = (
|
||||||
s.feed("pre \n<memory")
|
s.feed("pre \n<memory")
|
||||||
+ s.feed("-context>leak</memory-context> post")
|
+ s.feed("-context>\nleak</memory-context> post")
|
||||||
|
+ s.flush()
|
||||||
|
)
|
||||||
|
assert out == "pre \n post"
|
||||||
|
assert "leak" not in out
|
||||||
|
|
||||||
|
def test_open_tag_waits_for_newline_confirmation_across_deltas(self):
|
||||||
|
"""A boundary tag is only a leaked block when the next char is a newline."""
|
||||||
|
s = StreamingContextScrubber()
|
||||||
|
out = (
|
||||||
|
s.feed("pre \n<memory-context>")
|
||||||
|
+ s.feed("\nleak</memory-context> post")
|
||||||
+ s.flush()
|
+ s.flush()
|
||||||
)
|
)
|
||||||
assert out == "pre \n post"
|
assert out == "pre \n post"
|
||||||
@@ -83,7 +94,7 @@ class TestStreamingContextScrubberBasics:
|
|||||||
"""The close tag arriving in two fragments."""
|
"""The close tag arriving in two fragments."""
|
||||||
s = StreamingContextScrubber()
|
s = StreamingContextScrubber()
|
||||||
out = (
|
out = (
|
||||||
s.feed("pre \n<memory-context>leak</memory")
|
s.feed("pre \n<memory-context>\nleak</memory")
|
||||||
+ s.feed("-context> post")
|
+ s.feed("-context> post")
|
||||||
+ s.flush()
|
+ s.flush()
|
||||||
)
|
)
|
||||||
@@ -116,18 +127,28 @@ class TestStreamingContextScrubberPartialTagFalsePositives:
|
|||||||
)
|
)
|
||||||
assert out == "In that previous `<memory-context>` block, there was no matching fact."
|
assert out == "In that previous `<memory-context>` block, there was no matching fact."
|
||||||
|
|
||||||
def test_mid_sentence_memory_context_pair_is_not_scrubbed(self):
|
def test_mid_sentence_memory_context_mention_is_not_scrubbed(self):
|
||||||
"""Only block-like memory-context spans are treated as leaked context."""
|
"""Only block-like memory-context spans are treated as leaked context."""
|
||||||
s = StreamingContextScrubber()
|
s = StreamingContextScrubber()
|
||||||
out = s.feed("The <memory-context> tag name is documented here.") + s.flush()
|
out = s.feed("The <memory-context> tag name is documented here.") + s.flush()
|
||||||
assert out == "The <memory-context> tag name is documented here."
|
assert out == "The <memory-context> tag name is documented here."
|
||||||
|
|
||||||
|
def test_line_start_memory_context_mention_without_close_is_not_scrubbed(self):
|
||||||
|
"""A plain-text line that starts with the tag name must be preserved."""
|
||||||
|
s = StreamingContextScrubber()
|
||||||
|
out = (
|
||||||
|
s.feed("Visible intro\n")
|
||||||
|
+ s.feed("<memory-context> is the literal tag name mentioned here.")
|
||||||
|
+ s.flush()
|
||||||
|
)
|
||||||
|
assert out == "Visible intro\n<memory-context> is the literal tag name mentioned here."
|
||||||
|
|
||||||
|
|
||||||
class TestStreamingContextScrubberUnterminatedSpan:
|
class TestStreamingContextScrubberUnterminatedSpan:
|
||||||
def test_unterminated_span_drops_payload(self):
|
def test_unterminated_span_drops_payload(self):
|
||||||
"""Provider drops close tag — better to lose output than to leak."""
|
"""Provider drops close tag — better to lose output than to leak."""
|
||||||
s = StreamingContextScrubber()
|
s = StreamingContextScrubber()
|
||||||
out = s.feed("pre \n<memory-context>secret never closed") + s.flush()
|
out = s.feed("pre \n<memory-context>\nsecret never closed") + s.flush()
|
||||||
assert out == "pre \n"
|
assert out == "pre \n"
|
||||||
assert "secret" not in out
|
assert "secret" not in out
|
||||||
|
|
||||||
@@ -144,7 +165,7 @@ class TestStreamingContextScrubberCaseInsensitivity:
|
|||||||
def test_uppercase_tags_still_scrubbed(self):
|
def test_uppercase_tags_still_scrubbed(self):
|
||||||
s = StreamingContextScrubber()
|
s = StreamingContextScrubber()
|
||||||
out = (
|
out = (
|
||||||
s.feed("<MEMORY-CONTEXT>secret")
|
s.feed("<MEMORY-CONTEXT>\nsecret")
|
||||||
+ s.feed("</Memory-Context>visible")
|
+ s.feed("</Memory-Context>visible")
|
||||||
+ s.flush()
|
+ s.flush()
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user