From b3319b12522643fddb11b8e44fd5f48c0a409d36 Mon Sep 17 00:00:00 2001
From: Dat Pham <pvdat.cyan@gmail.com>
Date: Sat, 4 Apr 2026 00:38:42 +0700
Subject: [PATCH] fix(memory): Fix ByteRover plugin - run brv query
 synchronously before LLM call
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The pipeline prefetch design was firing \`brv query\` in a background
thread *after* each response, meaning the context injected at turn N
was from turn N-1's message — and the first turn got no BRV context
at all. Replace the async prefetch pipeline with a synchronous query
in \`prefetch()\` so recall runs before the first API call on every
turn. Make \`queue_prefetch()\` a no-op and remove the now-unused
pipeline state.
---
 plugins/memory/byterover/__init__.py | 55 ++++++++++------------------
 1 file changed, 20 insertions(+), 35 deletions(-)

diff --git a/plugins/memory/byterover/__init__.py b/plugins/memory/byterover/__init__.py
index cf3fe84aa..ead87d0c2 100644
--- a/plugins/memory/byterover/__init__.py
+++ b/plugins/memory/byterover/__init__.py
@@ -32,7 +32,7 @@ from agent.memory_provider import MemoryProvider
 logger = logging.getLogger(__name__)
 
 # Timeouts
-_QUERY_TIMEOUT = 30   # brv query — should be fast
+_QUERY_TIMEOUT = 10   # brv query — should be fast
 _CURATE_TIMEOUT = 120  # brv curate — may involve LLM processing
 
 # Minimum lengths to filter noise
@@ -175,9 +175,6 @@ class ByteRoverMemoryProvider(MemoryProvider):
         self._cwd = ""
         self._session_id = ""
         self._turn_count = 0
-        self._prefetch_result = ""
-        self._prefetch_lock = threading.Lock()
-        self._prefetch_thread: Optional[threading.Thread] = None
         self._sync_thread: Optional[threading.Thread] = None
 
     @property
@@ -216,37 +213,26 @@ class ByteRoverMemoryProvider(MemoryProvider):
         )
 
     def prefetch(self, query: str, *, session_id: str = "") -> str:
-        if self._prefetch_thread and self._prefetch_thread.is_alive():
-            self._prefetch_thread.join(timeout=3.0)
-        with self._prefetch_lock:
-            result = self._prefetch_result
-            self._prefetch_result = ""
-        if not result:
+        """Run brv query synchronously before the agent's first LLM call.
+
+        Blocks until the query completes (up to _QUERY_TIMEOUT seconds), ensuring
+        the result is available as context before the model is called.
+        """
+        if not query or len(query.strip()) < _MIN_QUERY_LEN:
             return ""
-        return f"## ByteRover Context\n{result}"
+        result = _run_brv(
+            ["query", "--", query.strip()[:5000]],
+            timeout=_QUERY_TIMEOUT, cwd=self._cwd,
+        )
+        if result["success"] and result.get("output"):
+            output = result["output"].strip()
+            if len(output) > _MIN_OUTPUT_LEN:
+                return f"## ByteRover Context\n{output}"
+        return ""
 
     def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
-        if not query or len(query.strip()) < _MIN_QUERY_LEN:
-            return
-
-        def _run():
-            try:
-                result = _run_brv(
-                    ["query", "--", query.strip()[:5000]],
-                    timeout=_QUERY_TIMEOUT, cwd=self._cwd,
-                )
-                if result["success"] and result.get("output"):
-                    output = result["output"].strip()
-                    if len(output) > _MIN_OUTPUT_LEN:
-                        with self._prefetch_lock:
-                            self._prefetch_result = output
-            except Exception as e:
-                logger.debug("ByteRover prefetch failed: %s", e)
-
-        self._prefetch_thread = threading.Thread(
-            target=_run, daemon=True, name="brv-prefetch"
-        )
-        self._prefetch_thread.start()
+        """No-op: prefetch() now runs synchronously at turn start."""
+        pass
 
     def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
         """Curate the conversation turn in background (non-blocking)."""
@@ -338,9 +324,8 @@ class ByteRoverMemoryProvider(MemoryProvider):
         return json.dumps({"error": f"Unknown tool: {tool_name}"})
 
     def shutdown(self) -> None:
-        for t in (self._sync_thread, self._prefetch_thread):
-            if t and t.is_alive():
-                t.join(timeout=10.0)
+        if self._sync_thread and self._sync_thread.is_alive():
+            self._sync_thread.join(timeout=10.0)
 
     # -- Tool implementations ------------------------------------------------