fix(process_registry): use taskkill /T /F for tree-kill on Windows
The Windows branch of `_terminate_host_pid` early-returned after
`os.kill(pid, SIGTERM)` (which Python maps to `TerminateProcess` for
the target handle only), leaving descendant processes — e.g. Chromium
renderer/GPU/network helpers spawned by an `agent-browser` daemon —
running on Windows even after the preceding commit fixed POSIX.
The right Windows primitive is `taskkill /PID <pid> /T /F`:
`/T` walks the tree, `/F` force-terminates. Same approach
`gateway.status.terminate_pid(force=True)` already uses for the
gateway's own shutdown path; reuse the same shape here.
Why NOT extend the POSIX psutil tree-walk to Windows:
1. Windows doesn't maintain a Unix-style process tree. `psutil.
Process.children(recursive=True)` walks PPID links that go stale
when intermediate processes exit, so enumeration is best-effort
and silently misses orphaned descendants. The whole bug we're
fixing is orphaned descendants.
2. `psutil.Process.terminate()` on Windows is `TerminateProcess()`
for one handle — same single-PID scope as the existing
`os.kill`. The existing comment in `gateway/status.py::
terminate_pid` warns this explicitly: 'os.kill SIGTERM is not
equivalent to a tree-killing hard stop' on Windows.
3. Headless Chromium has no GUI window, so the softer
`taskkill /T` without `/F` (which sends WM_CLOSE) won't reach
it either. `/F` is required.
POSIX path is unchanged. The taskkill subprocess uses the same
`creationflags=windows_hide_flags()` pattern other Windows shellouts
in this codebase use. `FileNotFoundError` / `TimeoutExpired` /
`OSError` fall back to bare `os.kill(SIGTERM)` as cheap insurance.
Tests cover the Windows branch via the codebase's standard
`monkeypatch _IS_WINDOWS` pattern (`references/windows-native-
support.md`), plus POSIX tree-walk order, NoSuchProcess swallow,
and the OSError fallback path. 7 new tests, all green on Linux CI.
This commit is contained in:
@@ -1007,3 +1007,163 @@ def test_drain_notifications_empty_queue():
|
|||||||
|
|
||||||
results = process_registry.drain_notifications()
|
results = process_registry.drain_notifications()
|
||||||
assert results == []
|
assert results == []
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _terminate_host_pid — cross-platform process-tree termination
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestTerminateHostPidWindows:
|
||||||
|
"""Windows branch uses ``taskkill /T /F`` — the documented MS tree-kill
|
||||||
|
primitive. We can't use psutil's ``children(recursive=True)`` /
|
||||||
|
``.terminate()`` path on Windows because (1) Windows doesn't maintain
|
||||||
|
a Unix-style process tree so the walk is unreliable, and (2)
|
||||||
|
``Process.terminate()`` on Windows is ``TerminateProcess()`` for the
|
||||||
|
target handle only, not the tree.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_windows_invokes_taskkill_with_tree_and_force_flags(self, monkeypatch):
|
||||||
|
"""The Windows branch must shell out to ``taskkill /PID N /T /F``."""
|
||||||
|
from tools import process_registry as pr
|
||||||
|
|
||||||
|
captured = {}
|
||||||
|
|
||||||
|
def fake_run(args, **kwargs):
|
||||||
|
captured["args"] = args
|
||||||
|
captured["kwargs"] = kwargs
|
||||||
|
return MagicMock(returncode=0, stderr="", stdout="")
|
||||||
|
|
||||||
|
monkeypatch.setattr(pr, "_IS_WINDOWS", True)
|
||||||
|
monkeypatch.setattr(pr.subprocess, "run", fake_run)
|
||||||
|
|
||||||
|
pr.ProcessRegistry._terminate_host_pid(12345)
|
||||||
|
|
||||||
|
assert captured["args"][0] == "taskkill"
|
||||||
|
assert "/PID" in captured["args"]
|
||||||
|
assert "12345" in captured["args"]
|
||||||
|
assert "/T" in captured["args"], "Tree flag required to reach descendants"
|
||||||
|
assert "/F" in captured["args"], "Force flag required for headless Chromium"
|
||||||
|
|
||||||
|
def test_windows_falls_back_to_os_kill_when_taskkill_missing(self, monkeypatch):
|
||||||
|
"""If ``taskkill.exe`` is somehow unavailable, fall back to a bare
|
||||||
|
``os.kill(pid, SIGTERM)`` so we at least try to kill the parent."""
|
||||||
|
from tools import process_registry as pr
|
||||||
|
|
||||||
|
kill_calls = []
|
||||||
|
|
||||||
|
def fake_run(*args, **kwargs):
|
||||||
|
raise FileNotFoundError("taskkill not found")
|
||||||
|
|
||||||
|
def fake_kill(pid, sig):
|
||||||
|
kill_calls.append((pid, sig))
|
||||||
|
|
||||||
|
monkeypatch.setattr(pr, "_IS_WINDOWS", True)
|
||||||
|
monkeypatch.setattr(pr.subprocess, "run", fake_run)
|
||||||
|
monkeypatch.setattr(pr.os, "kill", fake_kill)
|
||||||
|
|
||||||
|
pr.ProcessRegistry._terminate_host_pid(12345)
|
||||||
|
|
||||||
|
assert kill_calls == [(12345, signal.SIGTERM)]
|
||||||
|
|
||||||
|
def test_windows_does_not_call_psutil(self, monkeypatch):
|
||||||
|
"""The Windows branch must NOT exercise the psutil tree-walk
|
||||||
|
(it's unreliable on Windows — see the function docstring)."""
|
||||||
|
from tools import process_registry as pr
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
psutil_calls = []
|
||||||
|
|
||||||
|
class _BoomProcess:
|
||||||
|
def __init__(self, pid):
|
||||||
|
psutil_calls.append(("Process", pid))
|
||||||
|
|
||||||
|
def children(self, recursive=False):
|
||||||
|
psutil_calls.append(("children", recursive))
|
||||||
|
return []
|
||||||
|
|
||||||
|
def terminate(self):
|
||||||
|
psutil_calls.append(("terminate",))
|
||||||
|
|
||||||
|
def fake_run(args, **kwargs):
|
||||||
|
return MagicMock(returncode=0, stderr="", stdout="")
|
||||||
|
|
||||||
|
monkeypatch.setattr(pr, "_IS_WINDOWS", True)
|
||||||
|
monkeypatch.setattr(pr.subprocess, "run", fake_run)
|
||||||
|
monkeypatch.setattr(psutil, "Process", _BoomProcess)
|
||||||
|
|
||||||
|
pr.ProcessRegistry._terminate_host_pid(12345)
|
||||||
|
|
||||||
|
assert psutil_calls == [], (
|
||||||
|
f"Windows branch must not touch psutil, but saw {psutil_calls!r}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestTerminateHostPidPosix:
|
||||||
|
"""POSIX branch walks the tree via psutil and SIGTERMs children first."""
|
||||||
|
|
||||||
|
def test_posix_walks_tree_and_terminates_children_then_parent(self, monkeypatch):
|
||||||
|
from tools import process_registry as pr
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
terminate_order = []
|
||||||
|
|
||||||
|
class _FakeChild:
|
||||||
|
def __init__(self, pid):
|
||||||
|
self.pid = pid
|
||||||
|
|
||||||
|
def terminate(self):
|
||||||
|
terminate_order.append(self.pid)
|
||||||
|
|
||||||
|
class _FakeParent:
|
||||||
|
def __init__(self, pid):
|
||||||
|
self.pid = pid
|
||||||
|
|
||||||
|
def children(self, recursive=False):
|
||||||
|
assert recursive is True
|
||||||
|
return [_FakeChild(101), _FakeChild(102), _FakeChild(103)]
|
||||||
|
|
||||||
|
def terminate(self):
|
||||||
|
terminate_order.append(self.pid)
|
||||||
|
|
||||||
|
monkeypatch.setattr(pr, "_IS_WINDOWS", False)
|
||||||
|
monkeypatch.setattr(psutil, "Process", _FakeParent)
|
||||||
|
|
||||||
|
pr.ProcessRegistry._terminate_host_pid(12345)
|
||||||
|
|
||||||
|
assert terminate_order == [101, 102, 103, 12345], (
|
||||||
|
"Children must be terminated before the parent"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_posix_no_such_process_swallowed(self, monkeypatch):
|
||||||
|
from tools import process_registry as pr
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
def boom(pid):
|
||||||
|
raise psutil.NoSuchProcess(pid)
|
||||||
|
|
||||||
|
monkeypatch.setattr(pr, "_IS_WINDOWS", False)
|
||||||
|
monkeypatch.setattr(psutil, "Process", boom)
|
||||||
|
|
||||||
|
# Must not raise.
|
||||||
|
pr.ProcessRegistry._terminate_host_pid(999999999)
|
||||||
|
|
||||||
|
def test_posix_oserror_falls_back_to_os_kill(self, monkeypatch):
|
||||||
|
from tools import process_registry as pr
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
def boom(pid):
|
||||||
|
raise PermissionError("can't read /proc")
|
||||||
|
|
||||||
|
kill_calls = []
|
||||||
|
|
||||||
|
def fake_kill(pid, sig):
|
||||||
|
kill_calls.append((pid, sig))
|
||||||
|
|
||||||
|
monkeypatch.setattr(pr, "_IS_WINDOWS", False)
|
||||||
|
monkeypatch.setattr(psutil, "Process", boom)
|
||||||
|
monkeypatch.setattr(pr.os, "kill", fake_kill)
|
||||||
|
|
||||||
|
pr.ProcessRegistry._terminate_host_pid(12345)
|
||||||
|
|
||||||
|
assert kill_calls == [(12345, signal.SIGTERM)]
|
||||||
|
|||||||
@@ -434,9 +434,50 @@ class ProcessRegistry:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _terminate_host_pid(pid: int) -> None:
|
def _terminate_host_pid(pid: int) -> None:
|
||||||
"""Terminate a host-visible PID without requiring the original process handle."""
|
"""Terminate a host-visible PID and its descendants.
|
||||||
|
|
||||||
|
POSIX: walks the process tree with ``psutil`` and SIGTERMs
|
||||||
|
children before the parent so subprocess trees (e.g. Chromium
|
||||||
|
renderers/GPU helpers spawned by an ``agent-browser`` daemon)
|
||||||
|
don't get reparented to init and survive cleanup.
|
||||||
|
|
||||||
|
Windows: shells out to ``taskkill /PID <pid> /T /F``. This is
|
||||||
|
the documented Microsoft primitive for tree-kill and matches the
|
||||||
|
existing convention in ``gateway.status.terminate_pid``. We can't
|
||||||
|
reuse the POSIX psutil path on Windows because:
|
||||||
|
|
||||||
|
1. Windows doesn't maintain a Unix-style process tree —
|
||||||
|
``psutil.Process.children(recursive=True)`` walks PPID
|
||||||
|
links that go stale when intermediate processes exit, so
|
||||||
|
enumeration is best-effort and misses orphaned descendants.
|
||||||
|
2. ``psutil.Process.terminate()`` on Windows is
|
||||||
|
``TerminateProcess()`` which kills only the target handle
|
||||||
|
and is a hard kill — there is no Windows equivalent of a
|
||||||
|
SIGTERM that cascades through a process group. (See the
|
||||||
|
warning in ``gateway/status.py::terminate_pid``: "os.kill
|
||||||
|
with SIGTERM is not equivalent to a tree-killing hard stop"
|
||||||
|
on Windows.) Headless Chromium has no GUI window, so the
|
||||||
|
softer ``taskkill /T`` without ``/F`` won't reach it either.
|
||||||
|
|
||||||
|
``psutil`` is a hard dependency (see ``pyproject.toml``); the
|
||||||
|
bare-``os.kill`` fallback covers OSError / PermissionError on
|
||||||
|
POSIX and a missing ``taskkill.exe`` on Windows (effectively
|
||||||
|
unreachable on real Windows installs, but cheap insurance).
|
||||||
|
"""
|
||||||
if _IS_WINDOWS:
|
if _IS_WINDOWS:
|
||||||
os.kill(pid, signal.SIGTERM)
|
try:
|
||||||
|
subprocess.run(
|
||||||
|
["taskkill", "/PID", str(pid), "/T", "/F"],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=10,
|
||||||
|
creationflags=windows_hide_flags(),
|
||||||
|
)
|
||||||
|
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
|
||||||
|
try:
|
||||||
|
os.kill(pid, signal.SIGTERM)
|
||||||
|
except (OSError, ProcessLookupError, PermissionError):
|
||||||
|
pass
|
||||||
return
|
return
|
||||||
|
|
||||||
import psutil
|
import psutil
|
||||||
|
|||||||
Reference in New Issue
Block a user