From 831067c5d3d94390fd9af6b718bf4c7c28dead6b Mon Sep 17 00:00:00 2001
From: acsezen <asezen@icloud.com>
Date: Fri, 3 Apr 2026 16:25:35 +0200
Subject: [PATCH] =?UTF-8?q?perf:=20fix=20O(n=C2=B2)=20catastrophic=20backt?=
 =?UTF-8?q?racking=20in=20redact=20regex=20+=20reorder=20file=20read=20gua?=
 =?UTF-8?q?rd?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two pre-existing issues causing test_file_read_guards timeouts on CI:

1. agent/redact.py: _ENV_ASSIGN_RE used unbounded [A-Z_]* with
   IGNORECASE, matching any letter/underscore to end-of-string at
   each position → O(n²) backtracking on 100K+ char inputs.
   Bounded to {0,50} since env var names are never that long.

2. tools/file_tools.py: redact_sensitive_text() ran BEFORE the
   character-count guard, so oversized content (that would be rejected
   anyway) went through the expensive regex first. Reordered to check
   size limit before redaction.
---
 agent/redact.py     | 2 +-
 tools/file_tools.py | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/agent/redact.py b/agent/redact.py
index 2906d920e..8cb975851 100644
--- a/agent/redact.py
+++ b/agent/redact.py
@@ -53,7 +53,7 @@ _PREFIX_PATTERNS = [
 # ENV assignment patterns: KEY=value where KEY contains a secret-like name
 _SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)"
 _ENV_ASSIGN_RE = re.compile(
-    rf"([A-Z_]*{_SECRET_ENV_NAMES}[A-Z_]*)\s*=\s*(['\"]?)(\S+)\2",
+    rf"([A-Z_]{{0,50}}{_SECRET_ENV_NAMES}[A-Z_]{{0,50}})\s*=\s*(['\"]?)(\S+)\2",
     re.IGNORECASE,
 )
 
diff --git a/tools/file_tools.py b/tools/file_tools.py
index 79a111cb7..45add116b 100644
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@@ -345,8 +345,6 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
         # ── Perform the read ──────────────────────────────────────────
         file_ops = _get_file_ops(task_id)
         result = file_ops.read_file(path, offset, limit)
-        if result.content:
-            result.content = redact_sensitive_text(result.content)
         result_dict = result.to_dict()
 
         # ── Character-count guard ─────────────────────────────────────
@@ -355,6 +353,7 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
         # amount of content, reject it and tell the model to narrow down.
         # Note: we check the formatted content (with line-number prefixes),
         # not the raw file size, because that's what actually enters context.
+        # Check BEFORE redaction to avoid expensive regex on huge content.
         content_len = len(result.content or "")
         file_size = result_dict.get("file_size", 0)
         max_chars = _get_max_read_chars()
@@ -372,6 +371,11 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
                 "file_size": file_size,
             }, ensure_ascii=False)
 
+        # ── Redact secrets (after guard check to skip oversized content) ──
+        if result.content:
+            result.content = redact_sensitive_text(result.content)
+            result_dict["content"] = result.content
+
         # Large-file hint: if the file is big and the caller didn't ask
         # for a narrow window, nudge toward targeted reads.
         if (file_size and file_size > _LARGE_FILE_HINT_BYTES