From abfc1847b7bc6f90e23fb5ab9703d518185eb288 Mon Sep 17 00:00:00 2001
From: handsdiff <239876380+handsdiff@users.noreply.github.com>
Date: Sat, 18 Apr 2026 12:04:03 -0400
Subject: [PATCH] fix(terminal): rewrite `A && B &` to `A && { B & }` to
 prevent subshell leak
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

bash parses `A && B &` with `&&` tighter than `&`, so it forks a subshell
for the compound and backgrounds the subshell. Inside the subshell, B
runs foreground, so the subshell waits for B. When B is a process that
doesn't naturally exit (`python3 -m http.server`, `yes > /dev/null`, a
long-running daemon), the subshell is stuck in `wait4` forever and leaks
as an orphan reparented to init.

Observed in production: agents running `cd X && python3 -m http.server
8000 &>/dev/null & sleep 1 && curl ...` as a "start a local server, then
verify it" one-liner. Outer bash exits cleanly; the subshell never does.
Across ~3 days of use, 8 unique stuck-terminal events and 7 leaked
bash+server pairs accumulated on the fleet, with some sessions appearing
hung from the user's perspective because the subshell's open stdout pipe
kept the terminal tool's drain thread blocked.

This is distinct from the `set +m` fix in 933fbd8f (which addressed
interactive-shell job-control waiting at exit). `set +m` doesn't help
here because `bash -c` is non-interactive and job control is already
off; the problem is the subshell's own internal wait for its foreground
B, not the outer shell's job-tracking.

The fix: walk the command shell-aware (respecting quotes, parens, brace
groups, `&>`/`>&` redirects), find `A && B &` / `A || B &` at depth 0
and rewrite the tail to `A && { B & }`. Brace groups don't fork a
subshell — they run in the current shell. `B &` inside the group is a
simple background (no subshell wait). The outer `&` is absorbed into
the group, so the compound no longer needs an explicit subshell.

`&&` error-propagation is preserved exactly: if A fails, `&&`
short-circuits and B never runs.

- Skips quoted strings, comment lines, and `(…)` subshells
- Handles `&>/dev/null`, `2>&1`, `>&2` without mistaking them for `&`
- Resets chain state at `;`, `|`, and newlines
- Tracks brace depth so already-rewritten output is idempotent
- Walks using the existing `_read_shell_token` tokenizer, matching the
  pattern of `_rewrite_real_sudo_invocations`

Called once from `BaseEnvironment.execute` right after
`_prepare_command`, so it runs for every backend (local, ssh, docker,
modal, etc.) with no per-backend plumbing.

34 new tests covering rewrite cases, preservation cases, redirect
edge-cases, quoting/parens/backticks, idempotency, and empty/edge
inputs. End-to-end verified on a test VM: the exact vela-incident
command now returns in ~1.3s with no leaked bash, only the intentional
backgrounded server.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../test_terminal_compound_background.py      | 180 ++++++++++++++++++
 tools/environments/base.py                    |   7 +
 tools/terminal_tool.py                        | 165 ++++++++++++++++
 3 files changed, 352 insertions(+)
 create mode 100644 tests/tools/test_terminal_compound_background.py

diff --git a/tests/tools/test_terminal_compound_background.py b/tests/tools/test_terminal_compound_background.py
new file mode 100644
index 000000000..d8922bcf5
--- /dev/null
+++ b/tests/tools/test_terminal_compound_background.py
@@ -0,0 +1,180 @@
+"""Regression tests for _rewrite_compound_background.
+
+Context: bash parses ``A && B &`` as ``(A && B) &`` — it forks a subshell
+for the compound and backgrounds the subshell. Inside the subshell, B
+runs foreground, so the subshell waits for B. When B never exits on its
+own (HTTP servers, ``yes > /dev/null``, etc.), the subshell is stuck in
+``wait4`` forever and leaks as an orphan process. Pre-fix, we saw this
+pattern leak processes across the fleet (vela, sal, combiagent).
+
+The rewriter fixes this by wrapping the tail in a brace group —
+``A && { B & }`` — so B runs as a simple backgrounded command inside
+the current shell. No subshell fork, no wait.
+"""
+
+import pytest
+
+from tools.terminal_tool import _rewrite_compound_background as rewrite
+
+
+class TestRewrites:
+    """Commands that trigger the subshell-wait bug MUST be rewritten."""
+
+    def test_simple_and_background(self):
+        assert rewrite("A && B &") == "A && { B & }"
+
+    def test_or_background(self):
+        assert rewrite("A || B &") == "A || { B & }"
+
+    def test_chained_and(self):
+        assert rewrite("A && B && C &") == "A && B && { C & }"
+
+    def test_chained_or(self):
+        assert rewrite("A || B || C &") == "A || B || { C & }"
+
+    def test_mixed_and_or(self):
+        assert rewrite("A && B || C &") == "A && B || { C & }"
+
+    def test_realistic_server_start(self):
+        # The exact shape observed in the vela incident.
+        cmd = (
+            "cd /home/exedev && python3 -m http.server 8000 &>/dev/null &\n"
+            "sleep 1\n"
+            'curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/'
+        )
+        expected = (
+            "cd /home/exedev && { python3 -m http.server 8000 &>/dev/null & }\n"
+            "sleep 1\n"
+            'curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/'
+        )
+        assert rewrite(cmd) == expected
+
+    def test_newline_resets_chain_state(self):
+        # A && newline starts a new statement; B & on its own line is simple.
+        cmd = "A && B\nC &"
+        assert rewrite(cmd) == "A && B\nC &"
+
+    def test_semicolon_resets_chain_state(self):
+        cmd = "A && B; C &"
+        assert rewrite(cmd) == "A && B; C &"
+
+    def test_pipe_resets_chain_state(self):
+        cmd = "A && B | C &"
+        assert rewrite(cmd) == "A && B | C &"
+
+    def test_multiple_rewrites_in_one_script(self):
+        cmd = "A && B &\nfalse || C &"
+        assert rewrite(cmd) == "A && { B & }\nfalse || { C & }"
+
+
+class TestPreserved:
+    """Commands that DON'T have the bug MUST pass through unchanged."""
+
+    def test_simple_background(self):
+        # No compound — just background a single command. Works fine as-is.
+        assert rewrite("sleep 5 &") == "sleep 5 &"
+
+    def test_plain_server_background(self):
+        assert rewrite("python3 -m http.server 0 &") == "python3 -m http.server 0 &"
+
+    def test_semicolon_sequence(self):
+        assert rewrite("cd /tmp; start-server &") == "cd /tmp; start-server &"
+
+    def test_no_trailing_ampersand(self):
+        assert rewrite("A && B") == "A && B"
+
+    def test_no_chain_at_all(self):
+        assert rewrite("echo hello") == "echo hello"
+
+    def test_empty_string(self):
+        assert rewrite("") == ""
+
+    def test_whitespace_only(self):
+        assert rewrite("   \n\t") == "   \n\t"
+
+
+class TestRedirectsNotConfused:
+    """``&>``, ``2>&1``, ``>&2`` must not be mistaken for background ``&``."""
+
+    def test_amp_gt_redirect_alone(self):
+        assert rewrite("echo hi &>/dev/null") == "echo hi &>/dev/null"
+
+    def test_fd_to_fd_redirect(self):
+        assert rewrite("cmd 2>&1") == "cmd 2>&1"
+
+    def test_fd_redirect_with_trailing_bg(self):
+        # 2>&1 is redirect; trailing & is simple bg (no compound).
+        assert rewrite("cmd 2>&1 &") == "cmd 2>&1 &"
+
+    def test_amp_gt_inside_compound_background(self):
+        # &> should be preserved; the trailing & still needs wrapping.
+        cmd = "A && B &>/dev/null &"
+        assert rewrite(cmd) == "A && { B &>/dev/null & }"
+
+    def test_gt_amp_inside_compound(self):
+        cmd = "A && B 2>&1 &"
+        assert rewrite(cmd) == "A && { B 2>&1 & }"
+
+
+class TestQuotingAndParens:
+    """Shell metacharacters inside quotes/parens must not be parsed as operators."""
+
+    def test_and_and_inside_single_quotes(self):
+        cmd = "echo 'A && B &'"
+        assert rewrite(cmd) == "echo 'A && B &'"
+
+    def test_and_and_inside_double_quotes(self):
+        cmd = 'echo "A && B &"'
+        assert rewrite(cmd) == 'echo "A && B &"'
+
+    def test_parenthesised_subshell_left_alone(self):
+        # `(A && B) &` has the same bug class but isn't the common agent
+        # pattern. Leave for a follow-up; do not rewrite and do not
+        # misrewrite content inside the parens.
+        assert rewrite("(A && B) &") == "(A && B) &"
+
+    def test_command_substitution_not_rewritten(self):
+        # $(A && B) is command substitution; the `&&` inside is a compound
+        # expression in the subshell, unrelated to the outer `&`.
+        cmd = 'echo "$(A && B)" &'
+        assert rewrite(cmd) == 'echo "$(A && B)" &'
+
+    def test_backslash_escaped_ampersand(self):
+        # Escaped & is not a background operator.
+        cmd = r"echo A \&\& B"
+        assert rewrite(cmd) == cmd
+
+    def test_comment_line_not_rewritten(self):
+        cmd = "# A && B &\nC"
+        assert rewrite(cmd) == "# A && B &\nC"
+
+
+class TestIdempotence:
+    """Running the rewriter twice should be a no-op on its own output."""
+
+    def test_already_rewritten(self):
+        once = rewrite("A && B &")
+        twice = rewrite(once)
+        assert once == twice
+        assert twice == "A && { B & }"
+
+    def test_multiline_idempotent(self):
+        once = rewrite("cd /tmp && server &\nsleep 1")
+        assert rewrite(once) == once
+
+
+class TestEdgeCases:
+    def test_only_chain_op_no_second_command(self):
+        # Malformed input: bash would error, we shouldn't crash or rewrite.
+        cmd = "A && &"
+        # Don't assert a specific output; just don't raise.
+        rewrite(cmd)
+
+    def test_only_trailing_ampersand(self):
+        assert rewrite("&") == "&"
+
+    def test_leading_whitespace(self):
+        assert rewrite("   A && B &") == "   A && { B & }"
+
+    def test_tabs_between_tokens(self):
+        assert rewrite("A\t&&\tB\t&") == "A\t&&\t{ B\t& }"
diff --git a/tools/environments/base.py b/tools/environments/base.py
index cde78e1d4..19a637901 100644
--- a/tools/environments/base.py
+++ b/tools/environments/base.py
@@ -705,6 +705,13 @@ class BaseEnvironment(ABC):
         self._before_execute()
 
         exec_command, sudo_stdin = self._prepare_command(command)
+        # Guard against the `A && B &` subshell-wait trap: bash forks a
+        # subshell for the compound that then waits for an infinite B (a
+        # server, `yes > /dev/null`, etc.), leaking the subshell forever.
+        # Rewriting to `A && { B & }` runs B as a plain background in the
+        # current shell — no subshell wait.
+        from tools.terminal_tool import _rewrite_compound_background
+        exec_command = _rewrite_compound_background(exec_command)
         effective_timeout = timeout or self.timeout
         effective_cwd = cwd or self.cwd
 
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index b8b69856d..6a69a3b83 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -442,6 +442,171 @@ def _rewrite_real_sudo_invocations(command: str) -> tuple[str, bool]:
     return "".join(out), found
 
 
+def _rewrite_compound_background(command: str) -> str:
+    """Wrap `A && B &` (or `A || B &`) to `A && { B & }` at depth 0.
+
+    Bash parses ``A && B &`` with `&&` tighter than `&`, so it forks a
+    subshell for the whole `A && B` compound and backgrounds it. Inside
+    the subshell, `B` runs foreground, so the subshell waits for `B` to
+    finish. When `B` is a long-running process (`python3 -m http.server`,
+    `yes > /dev/null`, anything that doesn't naturally exit), the subshell
+    never exits. It leaks as a process stuck in ``wait4`` forever — and
+    on the way, its open stdout pipe can prevent the terminal tool from
+    returning promptly.
+
+    Rewriting the tail to `A && { B & }` preserves `&&`'s error semantics
+    (skip B if A fails) while replacing the subshell with a brace group.
+    The brace group runs in the current shell (no fork), backgrounds B as
+    a simple command (bash doesn't wait for it in non-interactive mode),
+    and exits immediately. B runs as a normal backgrounded child, orphaned
+    when the parent shell exits.
+
+    Handles redirects (``&>``, ``2>&1``) and skips content inside quoted
+    strings and parenthesised subshells. Leaves simple ``cmd &`` alone —
+    that construct doesn't have the subshell-wait bug.
+    """
+    n = len(command)
+    i = 0
+    paren_depth = 0
+    brace_depth = 0
+    # Position in *command* just after the most recent `&&` / `||` at depth 0
+    # in the current statement; -1 when no chain operator is active.
+    last_chain_op_end = -1
+    rewrites: list[tuple[int, int]] = []  # (chain_op_end, amp_pos)
+
+    while i < n:
+        ch = command[i]
+
+        # Newline terminates a statement at depth 0 — reset chain state.
+        # Checked before the whitespace skip so we don't miss it.
+        if ch == "\n" and paren_depth == 0 and brace_depth == 0:
+            last_chain_op_end = -1
+            i += 1
+            continue
+
+        if ch.isspace():
+            i += 1
+            continue
+
+        # Comments (only at statement start — conservative: any `#` not inside
+        # a token ends the line). `_read_shell_token` handles quoted strings
+        # below so `#` inside quotes is safe.
+        if ch == "#":
+            nl = command.find("\n", i)
+            if nl == -1:
+                break
+            i = nl
+            continue
+
+        if ch == "\\" and i + 1 < n:
+            i += 2
+            continue
+
+        # Quoted tokens — consume whole string via the shared tokenizer.
+        if ch in ("'", '"'):
+            _, next_i = _read_shell_token(command, i)
+            i = max(next_i, i + 1)
+            continue
+
+        if ch == "(":
+            paren_depth += 1
+            i += 1
+            continue
+
+        if ch == ")":
+            paren_depth = max(0, paren_depth - 1)
+            i += 1
+            continue
+
+        # Brace groups: `{ ... }` is a group (no subshell fork), and bash
+        # requires whitespace after `{`. We track depth so already-rewritten
+        # output (`A && { B & }`) is idempotent — the inner `&` is part of
+        # the group, not a new compound to rewrite. Also skip content inside
+        # the group since `A && B &` there is separately well-formed.
+        if ch == "{" and i + 1 < n and (command[i + 1].isspace() or command[i + 1] == "\n"):
+            brace_depth += 1
+            i += 1
+            continue
+        if ch == "}" and brace_depth > 0:
+            brace_depth -= 1
+            # Closing a group completes a compound statement; reset chain.
+            last_chain_op_end = -1
+            i += 1
+            continue
+
+        # Inside parens or brace groups, skip operators — they parse in their
+        # own scope. `(...)` subshells have the same bug class but are not the
+        # common agent pattern; leave for a follow-up.
+        if paren_depth > 0 or brace_depth > 0:
+            i += 1
+            continue
+
+        # Chain operators at depth 0
+        if command.startswith("&&", i) or command.startswith("||", i):
+            last_chain_op_end = i + 2
+            i += 2
+            continue
+
+        # Statement terminators reset the chain state
+        if ch == ";":
+            last_chain_op_end = -1
+            i += 1
+            continue
+
+        # Single `|` (pipe) starts a new pipeline stage; don't rewrite
+        # across it. `||` handled above.
+        if ch == "|":
+            last_chain_op_end = -1
+            i += 1
+            continue
+
+        # `&` handling: distinguish `&&`, `&>`, fd redirect (`>&`, `<&`),
+        # and a true backgrounding `&`.
+        if ch == "&":
+            # `&&` handled above; won't reach here
+            if i + 1 < n and command[i + 1] == ">":
+                # `&>` redirect — consume
+                i += 2
+                continue
+            # `>&` / `<&` fd target — look back past whitespace
+            j = i - 1
+            while j >= 0 and command[j].isspace():
+                j -= 1
+            if j >= 0 and command[j] in "<>":
+                i += 1
+                continue
+            # Real background operator
+            if last_chain_op_end >= 0:
+                rewrites.append((last_chain_op_end, i))
+            last_chain_op_end = -1
+            i += 1
+            continue
+
+        # Regular unquoted token — advance past it via the shared tokenizer
+        _, next_i = _read_shell_token(command, i)
+        i = max(next_i, i + 1)
+
+    if not rewrites:
+        return command
+
+    # Apply rewrites back-to-front so earlier indices remain valid.
+    result = command
+    for chain_end, amp_pos in reversed(rewrites):
+        # Skip whitespace right after the `&&`/`||` so the brace group
+        # opens flush against the inner command.
+        insert_pos = chain_end
+        while insert_pos < amp_pos and result[insert_pos].isspace():
+            insert_pos += 1
+        prefix = result[:insert_pos]
+        middle = result[insert_pos:amp_pos]  # inner command + trailing space
+        suffix = result[amp_pos + 1 :]
+        # `{` needs a trailing space in bash; the closing `}` needs to be
+        # preceded by `;` or `&` — we're providing `&` from the backgrounding.
+        result = prefix + "{ " + middle + "& }" + suffix
+
+    return result
+
+
 def _transform_sudo_command(command: str | None) -> tuple[str | None, str | None]:
     """
     Transform sudo commands to use -S flag if SUDO_PASSWORD is available.