From 831067c5d3d94390fd9af6b718bf4c7c28dead6b Mon Sep 17 00:00:00 2001 From: acsezen Date: Fri, 3 Apr 2026 16:25:35 +0200 Subject: [PATCH] =?UTF-8?q?perf:=20fix=20O(n=C2=B2)=20catastrophic=20backt?= =?UTF-8?q?racking=20in=20redact=20regex=20+=20reorder=20file=20read=20gua?= =?UTF-8?q?rd?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two pre-existing issues causing test_file_read_guards timeouts on CI: 1. agent/redact.py: _ENV_ASSIGN_RE used unbounded [A-Z_]* with IGNORECASE, matching any letter/underscore to end-of-string at each position → O(n²) backtracking on 100K+ char inputs. Bounded to {0,50} since env var names are never that long. 2. tools/file_tools.py: redact_sensitive_text() ran BEFORE the character-count guard, so oversized content (that would be rejected anyway) went through the expensive regex first. Reordered to check size limit before redaction. --- agent/redact.py | 2 +- tools/file_tools.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/agent/redact.py b/agent/redact.py index 2906d920e..8cb975851 100644 --- a/agent/redact.py +++ b/agent/redact.py @@ -53,7 +53,7 @@ _PREFIX_PATTERNS = [ # ENV assignment patterns: KEY=value where KEY contains a secret-like name _SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)" _ENV_ASSIGN_RE = re.compile( - rf"([A-Z_]*{_SECRET_ENV_NAMES}[A-Z_]*)\s*=\s*(['\"]?)(\S+)\2", + rf"([A-Z_]{{0,50}}{_SECRET_ENV_NAMES}[A-Z_]{{0,50}})\s*=\s*(['\"]?)(\S+)\2", re.IGNORECASE, ) diff --git a/tools/file_tools.py b/tools/file_tools.py index 79a111cb7..45add116b 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -345,8 +345,6 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = # ── Perform the read ────────────────────────────────────────── file_ops = _get_file_ops(task_id) result = file_ops.read_file(path, offset, limit) - if result.content: - result.content = redact_sensitive_text(result.content) result_dict = result.to_dict() # ── Character-count guard ───────────────────────────────────── @@ -355,6 +353,7 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = # amount of content, reject it and tell the model to narrow down. # Note: we check the formatted content (with line-number prefixes), # not the raw file size, because that's what actually enters context. + # Check BEFORE redaction to avoid expensive regex on huge content. content_len = len(result.content or "") file_size = result_dict.get("file_size", 0) max_chars = _get_max_read_chars() @@ -372,6 +371,11 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = "file_size": file_size, }, ensure_ascii=False) + # ── Redact secrets (after guard check to skip oversized content) ── + if result.content: + result.content = redact_sensitive_text(result.content) + result_dict["content"] = result.content + # Large-file hint: if the file is big and the caller didn't ask # for a narrow window, nudge toward targeted reads. if (file_size and file_size > _LARGE_FILE_HINT_BYTES