fix(update): kill stale dashboard processes instead of warning (#17832)
`hermes update` previously just printed a warning when it detected a running `hermes dashboard` process from the previous version, telling the user to kill and restart it themselves. In practice dashboards get started and forgotten, so the warning was routinely ignored and users ended up with a silent frontend/backend mismatch (new JS bundle served against the old in-memory Python backend, e.g. new auth headers the old code doesn't recognise → every API call 401s). The dashboard has no service manager, no PID file, and we don't record the original launch args (--host, --port, --insecure, --tui, --no-open) so we can't auto-restart it. But we CAN stop it, which is what the user wants — the failure mode when the stale process is left alive is worse than the dashboard just being down. - POSIX: SIGTERM, poll for ~3s, SIGKILL any survivors. - Windows: `taskkill /PID <pid> /F`. - Print each PID's outcome plus a one-line restart hint. - Detection logic is unchanged (same ps / wmic scan, same guards against the `pgrep -f` greedy-match trap from #16872 and the #17049 wmic UnicodeDecodeError fix). Also split the old monolithic `_warn_stale_dashboard_processes` into `_find_stale_dashboard_pids` (scan) + `_kill_stale_dashboard_processes` (kill), keeping the old name as an alias so any external callers still work. E2E verified: spawned a fake `hermes dashboard` cmdline via `exec -a 'hermes dashboard …' sleep 300`, ran `_kill_stale_dashboard_processes()`, confirmed SIGTERM exit (-15) and that a post-scan returns an empty PID list.
This commit is contained in:
@@ -5335,8 +5335,8 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _warn_stale_dashboard_processes() -> None:
|
||||
"""Warn about running dashboard processes that still hold pre-update code.
|
||||
def _find_stale_dashboard_pids() -> list[int]:
|
||||
"""Return PIDs of ``hermes dashboard`` processes other than ourselves.
|
||||
|
||||
``hermes dashboard`` is a long-lived server process commonly started and
|
||||
forgotten. When ``hermes update`` replaces files on disk, the running
|
||||
@@ -5344,9 +5344,13 @@ def _warn_stale_dashboard_processes() -> None:
|
||||
disk is updated, causing a silent frontend/backend mismatch (e.g. new
|
||||
auth headers the old backend doesn't recognise → every API call 401s).
|
||||
|
||||
Unlike the gateway, the dashboard has no service manager (systemd /
|
||||
launchd), so we can only warn — we don't auto-kill user-managed
|
||||
background processes.
|
||||
The dashboard has no service manager (systemd / launchd), no PID file,
|
||||
and we can't know the original launch args — so the only sane action
|
||||
after an update is to kill the stale process and let the user restart
|
||||
it. This helper is just the detection step; see
|
||||
``_kill_stale_dashboard_processes`` for the kill.
|
||||
|
||||
Returns an empty list on any scan error (missing ps/wmic, timeout, etc.).
|
||||
"""
|
||||
patterns = [
|
||||
"hermes dashboard",
|
||||
@@ -5372,7 +5376,7 @@ def _warn_stale_dashboard_processes() -> None:
|
||||
encoding="utf-8", errors="ignore",
|
||||
)
|
||||
if result.returncode != 0 or result.stdout is None:
|
||||
return
|
||||
return []
|
||||
current_cmd = ""
|
||||
for line in result.stdout.split("\n"):
|
||||
line = line.strip()
|
||||
@@ -5414,20 +5418,110 @@ def _warn_stale_dashboard_processes() -> None:
|
||||
and pid != self_pid):
|
||||
dashboard_pids.append(pid)
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
|
||||
return
|
||||
return []
|
||||
|
||||
if not dashboard_pids:
|
||||
return dashboard_pids
|
||||
|
||||
|
||||
def _kill_stale_dashboard_processes() -> None:
|
||||
"""Kill ``hermes dashboard`` processes left over from the previous version.
|
||||
|
||||
Called at the end of ``hermes update``. The dashboard has no service
|
||||
manager, so after an update the running process is guaranteed to be
|
||||
serving stale Python against a freshly-updated JS bundle. Leaving it
|
||||
alive produces silent frontend/backend mismatches (new auth headers the
|
||||
old backend doesn't recognise → every API call 401s).
|
||||
|
||||
POSIX: SIGTERM, wait up to ~3s for graceful exit, SIGKILL any survivors.
|
||||
Windows: ``taskkill /PID <pid> /F`` since there's no clean SIGTERM
|
||||
equivalent for background console apps.
|
||||
|
||||
The dashboard isn't auto-restarted because we don't know the original
|
||||
launch args (--host, --port, --insecure, --tui, --no-open). The user
|
||||
restarts it manually; we print a hint with the previous port(s) where
|
||||
possible.
|
||||
"""
|
||||
pids = _find_stale_dashboard_pids()
|
||||
if not pids:
|
||||
return
|
||||
|
||||
print()
|
||||
print(f"⚠ {len(dashboard_pids)} dashboard process(es) still running "
|
||||
f"with the previous version:")
|
||||
for pid in dashboard_pids:
|
||||
print(f" PID {pid}")
|
||||
print(" The running backend may not match the updated frontend,")
|
||||
print(" causing silent auth failures or empty data.")
|
||||
print(" Restart them to pick up the changes:")
|
||||
print(" kill <pid> && hermes dashboard --port <port> ...")
|
||||
print(f"⟲ Stopping {len(pids)} stale dashboard process(es) "
|
||||
f"(the running backend no longer matches the updated frontend)")
|
||||
|
||||
killed: list[int] = []
|
||||
failed: list[tuple[int, str]] = []
|
||||
|
||||
if sys.platform == "win32":
|
||||
for pid in pids:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["taskkill", "/PID", str(pid), "/F"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
killed.append(pid)
|
||||
else:
|
||||
failed.append((pid, (result.stderr or result.stdout or "").strip()))
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired, OSError) as e:
|
||||
failed.append((pid, str(e)))
|
||||
else:
|
||||
import signal as _signal
|
||||
import time as _time
|
||||
|
||||
# SIGTERM first — give each process a chance to shut down cleanly
|
||||
# (uvicorn closes its socket, flushes logs, etc.).
|
||||
for pid in pids:
|
||||
try:
|
||||
os.kill(pid, _signal.SIGTERM)
|
||||
except ProcessLookupError:
|
||||
# Already gone — count as killed.
|
||||
killed.append(pid)
|
||||
except (PermissionError, OSError) as e:
|
||||
failed.append((pid, str(e)))
|
||||
|
||||
# Poll for exit up to ~3s total.
|
||||
deadline = _time.monotonic() + 3.0
|
||||
pending = [p for p in pids if p not in killed
|
||||
and p not in {f[0] for f in failed}]
|
||||
while pending and _time.monotonic() < deadline:
|
||||
_time.sleep(0.1)
|
||||
still_pending = []
|
||||
for pid in pending:
|
||||
try:
|
||||
os.kill(pid, 0) # probe
|
||||
except ProcessLookupError:
|
||||
killed.append(pid)
|
||||
except (PermissionError, OSError):
|
||||
# Can't probe — assume still there.
|
||||
still_pending.append(pid)
|
||||
else:
|
||||
still_pending.append(pid)
|
||||
pending = still_pending
|
||||
|
||||
# SIGKILL any survivors.
|
||||
for pid in pending:
|
||||
try:
|
||||
os.kill(pid, _signal.SIGKILL)
|
||||
killed.append(pid)
|
||||
except ProcessLookupError:
|
||||
killed.append(pid)
|
||||
except (PermissionError, OSError) as e:
|
||||
failed.append((pid, str(e)))
|
||||
|
||||
for pid in killed:
|
||||
print(f" ✓ stopped PID {pid}")
|
||||
for pid, reason in failed:
|
||||
print(f" ✗ failed to stop PID {pid}: {reason}")
|
||||
|
||||
if killed:
|
||||
print(" Restart the dashboard when you're ready:")
|
||||
print(" hermes dashboard --port <port>")
|
||||
|
||||
|
||||
# Back-compat alias: some tests and any external callers may import the old
|
||||
# warn-only name. The new behaviour (kill stale processes) replaces it.
|
||||
_warn_stale_dashboard_processes = _kill_stale_dashboard_processes
|
||||
|
||||
|
||||
def _update_via_zip(args):
|
||||
@@ -5564,7 +5658,7 @@ def _update_via_zip(args):
|
||||
|
||||
print()
|
||||
print("✓ Update complete!")
|
||||
_warn_stale_dashboard_processes()
|
||||
_kill_stale_dashboard_processes()
|
||||
|
||||
|
||||
def _stash_local_changes_if_needed(git_cmd: list[str], cwd: Path) -> Optional[str]:
|
||||
@@ -7381,9 +7475,12 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
except Exception as e:
|
||||
logger.debug("Legacy unit check during update failed: %s", e)
|
||||
|
||||
# Warn about stale dashboard processes — the dashboard has no
|
||||
# service manager, so we can only tell the user to restart them.
|
||||
_warn_stale_dashboard_processes()
|
||||
# Kill stale dashboard processes — the dashboard has no service
|
||||
# manager, so leaving it alive after a code update produces a
|
||||
# silent frontend/backend mismatch. We can't auto-restart it
|
||||
# (no saved launch args) but we can stop it, and a hint is
|
||||
# printed for the user to re-launch.
|
||||
_kill_stale_dashboard_processes()
|
||||
|
||||
print()
|
||||
print("Tip: You can now select a provider and model:")
|
||||
|
||||
@@ -1,19 +1,29 @@
|
||||
"""Tests for _warn_stale_dashboard_processes — stale dashboard detection.
|
||||
"""Tests for the stale-dashboard handling run at the end of ``hermes update``.
|
||||
|
||||
Ensures ``hermes update`` warns the user when dashboard processes from a
|
||||
previous version are still running after files on disk have been replaced.
|
||||
See #16872.
|
||||
``hermes update`` detects ``hermes dashboard`` processes left over from the
|
||||
previous version and kills them (SIGTERM + SIGKILL grace, or ``taskkill /F``
|
||||
on Windows). Without this, the running backend silently serves stale Python
|
||||
against a freshly-updated JS bundle, producing 401s / empty data.
|
||||
|
||||
History:
|
||||
- #16872 introduced the warn-only helper (``_warn_stale_dashboard_processes``).
|
||||
- #17049 fixed a Windows wmic UnicodeDecodeError crash on non-UTF-8 locales.
|
||||
- This file now also covers the kill semantics that replaced the warning.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from unittest.mock import patch, MagicMock
|
||||
from unittest.mock import patch, MagicMock, call
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli.main import _warn_stale_dashboard_processes
|
||||
from hermes_cli.main import (
|
||||
_find_stale_dashboard_pids,
|
||||
_kill_stale_dashboard_processes,
|
||||
_warn_stale_dashboard_processes, # back-compat alias
|
||||
)
|
||||
|
||||
|
||||
def _ps_line(pid: int, cmd: str) -> str:
|
||||
@@ -21,11 +31,26 @@ def _ps_line(pid: int, cmd: str) -> str:
|
||||
return f"{pid:>7} {cmd}"
|
||||
|
||||
|
||||
class TestWarnStaleDashboardProcesses:
|
||||
"""Unit tests for the stale dashboard process warning."""
|
||||
def _ps_runner(stdout: str):
|
||||
"""Build a subprocess.run side_effect that only stubs ps -A calls.
|
||||
|
||||
def test_no_warning_when_no_dashboard_running(self, capsys):
|
||||
"""ps returns no matching processes — no warning should be printed."""
|
||||
Any other subprocess.run invocation (e.g. taskkill on Windows) is
|
||||
handed back as a successful no-op. This lets tests exercise the real
|
||||
scan path without having to re-stub every unrelated subprocess call
|
||||
made later in ``_kill_stale_dashboard_processes``.
|
||||
"""
|
||||
def _side_effect(args, *a, **kw):
|
||||
if isinstance(args, (list, tuple)) and args and args[0] == "ps":
|
||||
return MagicMock(returncode=0, stdout=stdout, stderr="")
|
||||
# Any other subprocess.run (e.g. taskkill) — benign success stub.
|
||||
return MagicMock(returncode=0, stdout="", stderr="")
|
||||
return _side_effect
|
||||
|
||||
|
||||
class TestFindStaleDashboardPids:
|
||||
"""Unit tests for the ps/wmic-based detection step."""
|
||||
|
||||
def test_no_matches_returns_empty(self):
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0,
|
||||
@@ -35,26 +60,18 @@ class TestWarnStaleDashboardProcesses:
|
||||
+ "\n",
|
||||
stderr="",
|
||||
)
|
||||
_warn_stale_dashboard_processes()
|
||||
output = capsys.readouterr().out
|
||||
assert "dashboard process" not in output
|
||||
assert _find_stale_dashboard_pids() == []
|
||||
|
||||
def test_warning_printed_for_running_dashboard(self, capsys):
|
||||
"""ps finds a dashboard PID — warning with PID should appear."""
|
||||
def test_matches_running_dashboard(self):
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0,
|
||||
stdout=_ps_line(12345, "python3 -m hermes_cli.main dashboard --port 9119") + "\n",
|
||||
stderr="",
|
||||
)
|
||||
_warn_stale_dashboard_processes()
|
||||
output = capsys.readouterr().out
|
||||
assert "1 dashboard process" in output
|
||||
assert "PID 12345" in output
|
||||
assert "kill <pid>" in output
|
||||
assert _find_stale_dashboard_pids() == [12345]
|
||||
|
||||
def test_multiple_dashboard_pids(self, capsys):
|
||||
"""Multiple dashboard processes — all PIDs listed."""
|
||||
def test_multiple_matches(self):
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0,
|
||||
@@ -65,15 +82,9 @@ class TestWarnStaleDashboardProcesses:
|
||||
]) + "\n",
|
||||
stderr="",
|
||||
)
|
||||
_warn_stale_dashboard_processes()
|
||||
output = capsys.readouterr().out
|
||||
assert "3 dashboard process" in output
|
||||
assert "PID 12345" in output
|
||||
assert "PID 12346" in output
|
||||
assert "PID 12347" in output
|
||||
assert sorted(_find_stale_dashboard_pids()) == [12345, 12346, 12347]
|
||||
|
||||
def test_self_pid_excluded(self, capsys):
|
||||
"""The current process PID should not be reported."""
|
||||
def test_self_pid_excluded(self):
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0,
|
||||
@@ -83,41 +94,51 @@ class TestWarnStaleDashboardProcesses:
|
||||
]) + "\n",
|
||||
stderr="",
|
||||
)
|
||||
_warn_stale_dashboard_processes()
|
||||
output = capsys.readouterr().out
|
||||
# The self PID may still appear inside an unrelated context, so anchor
|
||||
# the check to "PID <self>" which is how the warning prints.
|
||||
assert f"PID {os.getpid()}" not in output
|
||||
assert "PID 12345" in output
|
||||
pids = _find_stale_dashboard_pids()
|
||||
assert os.getpid() not in pids
|
||||
assert 12345 in pids
|
||||
|
||||
def test_ps_not_found_silently_ignored(self, capsys):
|
||||
"""If ps is missing (FileNotFoundError), no crash, no warning."""
|
||||
def test_ps_not_found_returns_empty(self):
|
||||
with patch("subprocess.run", side_effect=FileNotFoundError):
|
||||
_warn_stale_dashboard_processes()
|
||||
output = capsys.readouterr().out
|
||||
assert output == ""
|
||||
assert _find_stale_dashboard_pids() == []
|
||||
|
||||
def test_ps_timeout_silently_ignored(self, capsys):
|
||||
"""If ps times out, no crash, no warning."""
|
||||
def test_ps_timeout_returns_empty(self):
|
||||
import subprocess as sp
|
||||
|
||||
with patch("subprocess.run", side_effect=sp.TimeoutExpired("ps", 10)):
|
||||
_warn_stale_dashboard_processes()
|
||||
output = capsys.readouterr().out
|
||||
assert output == ""
|
||||
assert _find_stale_dashboard_pids() == []
|
||||
|
||||
def test_empty_ps_output_no_warning(self, capsys):
|
||||
"""ps returns 0 but empty stdout — no warning."""
|
||||
def test_unrelated_process_containing_word_dashboard_not_matched(self):
|
||||
"""Guards against greedy pgrep-style matching catching chat sessions
|
||||
or unrelated processes whose cmdline happens to contain 'dashboard'.
|
||||
"""
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0, stdout="\n", stderr=""
|
||||
returncode=0,
|
||||
stdout="\n".join([
|
||||
_ps_line(12345, "python3 -m hermes_cli.main dashboard --port 9119"),
|
||||
_ps_line(22222, "python3 -m hermes_cli.main chat -q 'rewrite my dashboard'"),
|
||||
_ps_line(33333, "node /opt/grafana/dashboard-server.js"),
|
||||
]) + "\n",
|
||||
stderr="",
|
||||
)
|
||||
_warn_stale_dashboard_processes()
|
||||
output = capsys.readouterr().out
|
||||
assert "dashboard process" not in output
|
||||
pids = _find_stale_dashboard_pids()
|
||||
assert pids == [12345]
|
||||
|
||||
def test_invalid_pid_lines_skipped(self, capsys):
|
||||
"""Malformed ps lines should be skipped gracefully."""
|
||||
def test_grep_lines_ignored(self):
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0,
|
||||
stdout="\n".join([
|
||||
_ps_line(99999, "grep hermes dashboard"),
|
||||
_ps_line(12345, "hermes dashboard --port 9119"),
|
||||
]) + "\n",
|
||||
stderr="",
|
||||
)
|
||||
pids = _find_stale_dashboard_pids()
|
||||
assert 99999 not in pids
|
||||
assert 12345 in pids
|
||||
|
||||
def test_invalid_pid_lines_skipped(self):
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0,
|
||||
@@ -128,53 +149,166 @@ class TestWarnStaleDashboardProcesses:
|
||||
]) + "\n",
|
||||
stderr="",
|
||||
)
|
||||
_warn_stale_dashboard_processes()
|
||||
output = capsys.readouterr().out
|
||||
assert "PID 12345" in output
|
||||
assert "1 dashboard process" in output
|
||||
pids = _find_stale_dashboard_pids()
|
||||
assert pids == [12345]
|
||||
|
||||
def test_unrelated_process_containing_word_dashboard_not_matched(self, capsys):
|
||||
"""A process whose cmdline contains 'dashboard' but isn't a hermes
|
||||
dashboard process must NOT be flagged. This guards against the old
|
||||
``pgrep -f "hermes.*dashboard"`` greedy regex that matched e.g. a
|
||||
chat session argv containing both words.
|
||||
"""
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0,
|
||||
stdout="\n".join([
|
||||
# Legitimate dashboard — should match.
|
||||
_ps_line(12345, "python3 -m hermes_cli.main dashboard --port 9119"),
|
||||
# hermes running something else, with "dashboard" as a
|
||||
# substring of an unrelated arg — should NOT match.
|
||||
_ps_line(22222, "python3 -m hermes_cli.main chat -q 'rewrite my dashboard'"),
|
||||
# Completely unrelated process mentioning dashboard.
|
||||
_ps_line(33333, "node /opt/grafana/dashboard-server.js"),
|
||||
]) + "\n",
|
||||
stderr="",
|
||||
)
|
||||
_warn_stale_dashboard_processes()
|
||||
output = capsys.readouterr().out
|
||||
assert "1 dashboard process" in output
|
||||
assert "PID 12345" in output
|
||||
assert "PID 22222" not in output
|
||||
assert "PID 33333" not in output
|
||||
|
||||
def test_grep_lines_ignored(self, capsys):
|
||||
"""Lines containing 'grep' (from a pipe in ps output) are ignored."""
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0,
|
||||
stdout="\n".join([
|
||||
_ps_line(99999, "grep hermes dashboard"),
|
||||
_ps_line(12345, "hermes dashboard --port 9119"),
|
||||
]) + "\n",
|
||||
stderr="",
|
||||
)
|
||||
_warn_stale_dashboard_processes()
|
||||
output = capsys.readouterr().out
|
||||
assert "PID 99999" not in output
|
||||
assert "PID 12345" in output
|
||||
@pytest.mark.skipif(sys.platform == "win32", reason="POSIX kill semantics")
|
||||
class TestKillStaleDashboardPosix:
|
||||
"""Kill path on Linux / macOS: SIGTERM then SIGKILL any survivors."""
|
||||
|
||||
def test_no_stale_processes_is_a_noop(self, capsys):
|
||||
with patch("hermes_cli.main._find_stale_dashboard_pids", return_value=[]):
|
||||
_kill_stale_dashboard_processes()
|
||||
assert capsys.readouterr().out == ""
|
||||
|
||||
def test_sigterm_graceful_exit(self, capsys):
|
||||
"""Processes that exit on SIGTERM (the probe gets ProcessLookupError)
|
||||
are reported as stopped and SIGKILL is never sent."""
|
||||
import signal as _signal
|
||||
|
||||
killed_signals: list[tuple[int, int]] = []
|
||||
|
||||
def fake_kill(pid, sig):
|
||||
killed_signals.append((pid, sig))
|
||||
if sig == 0:
|
||||
# Probe after SIGTERM → "process gone".
|
||||
raise ProcessLookupError
|
||||
# SIGTERM itself: succeed silently.
|
||||
|
||||
with patch("hermes_cli.main._find_stale_dashboard_pids",
|
||||
return_value=[12345, 12346]), \
|
||||
patch("os.kill", side_effect=fake_kill), \
|
||||
patch("time.sleep"):
|
||||
_kill_stale_dashboard_processes()
|
||||
|
||||
# Both got SIGTERM.
|
||||
sigterms = [pid for pid, sig in killed_signals if sig == _signal.SIGTERM]
|
||||
assert sorted(sigterms) == [12345, 12346]
|
||||
# No SIGKILL was needed.
|
||||
assert not any(sig == _signal.SIGKILL for _, sig in killed_signals)
|
||||
|
||||
out = capsys.readouterr().out
|
||||
assert "Stopping 2 stale dashboard" in out
|
||||
assert "✓ stopped PID 12345" in out
|
||||
assert "✓ stopped PID 12346" in out
|
||||
assert "Restart the dashboard" in out
|
||||
|
||||
def test_sigkill_fallback_for_survivors(self, capsys):
|
||||
"""If a process survives SIGTERM + the grace window, SIGKILL is sent."""
|
||||
import signal as _signal
|
||||
|
||||
sent: list[tuple[int, int]] = []
|
||||
|
||||
def fake_kill(pid, sig):
|
||||
sent.append((pid, sig))
|
||||
# Simulate stubborn process: probe (sig 0) always succeeds,
|
||||
# SIGTERM does nothing, SIGKILL is where it "dies".
|
||||
if sig in (_signal.SIGTERM, 0, _signal.SIGKILL):
|
||||
return
|
||||
# Any other signal — also fine.
|
||||
|
||||
with patch("hermes_cli.main._find_stale_dashboard_pids",
|
||||
return_value=[99999]), \
|
||||
patch("os.kill", side_effect=fake_kill), \
|
||||
patch("time.sleep"), \
|
||||
patch("time.monotonic", side_effect=[0.0] + [10.0] * 20):
|
||||
# monotonic jumps past the 3s deadline on the second read so the
|
||||
# grace loop exits immediately after one iteration.
|
||||
_kill_stale_dashboard_processes()
|
||||
|
||||
signals_sent = [sig for _, sig in sent]
|
||||
assert _signal.SIGTERM in signals_sent
|
||||
assert _signal.SIGKILL in signals_sent
|
||||
|
||||
out = capsys.readouterr().out
|
||||
assert "✓ stopped PID 99999" in out
|
||||
|
||||
def test_permission_error_is_reported_not_raised(self, capsys):
|
||||
"""os.kill raising PermissionError (e.g. another user's process)
|
||||
must not abort hermes update — it's reported as a failure and we
|
||||
move on."""
|
||||
def fake_kill(pid, sig):
|
||||
raise PermissionError("Operation not permitted")
|
||||
|
||||
with patch("hermes_cli.main._find_stale_dashboard_pids",
|
||||
return_value=[12345]), \
|
||||
patch("os.kill", side_effect=fake_kill), \
|
||||
patch("time.sleep"):
|
||||
_kill_stale_dashboard_processes() # must not raise
|
||||
|
||||
out = capsys.readouterr().out
|
||||
assert "✗ failed to stop PID 12345" in out
|
||||
assert "Operation not permitted" in out
|
||||
|
||||
def test_process_already_gone_counts_as_stopped(self, capsys):
|
||||
"""ProcessLookupError on the initial SIGTERM means the process
|
||||
already exited between detection and the kill — treat as success."""
|
||||
def fake_kill(pid, sig):
|
||||
raise ProcessLookupError
|
||||
|
||||
with patch("hermes_cli.main._find_stale_dashboard_pids",
|
||||
return_value=[12345]), \
|
||||
patch("os.kill", side_effect=fake_kill), \
|
||||
patch("time.sleep"):
|
||||
_kill_stale_dashboard_processes()
|
||||
|
||||
out = capsys.readouterr().out
|
||||
assert "✓ stopped PID 12345" in out
|
||||
assert "failed to stop" not in out
|
||||
|
||||
|
||||
class TestKillStaleDashboardWindows:
|
||||
"""Kill path on Windows: taskkill /F."""
|
||||
|
||||
def test_taskkill_invoked_for_each_pid(self, monkeypatch, capsys):
|
||||
monkeypatch.setattr(sys, "platform", "win32")
|
||||
|
||||
def fake_run(args, *a, **kw):
|
||||
# taskkill returns 0 on success
|
||||
return MagicMock(returncode=0, stdout="", stderr="")
|
||||
|
||||
with patch("hermes_cli.main._find_stale_dashboard_pids",
|
||||
return_value=[12345, 12346]), \
|
||||
patch("subprocess.run", side_effect=fake_run) as mock_run:
|
||||
_kill_stale_dashboard_processes()
|
||||
|
||||
# Each PID triggered a taskkill /PID <n> /F invocation.
|
||||
taskkill_calls = [
|
||||
c for c in mock_run.call_args_list
|
||||
if c.args and isinstance(c.args[0], list) and c.args[0][:1] == ["taskkill"]
|
||||
]
|
||||
assert len(taskkill_calls) == 2
|
||||
assert ["taskkill", "/PID", "12345", "/F"] in [c.args[0] for c in taskkill_calls]
|
||||
assert ["taskkill", "/PID", "12346", "/F"] in [c.args[0] for c in taskkill_calls]
|
||||
|
||||
out = capsys.readouterr().out
|
||||
assert "✓ stopped PID 12345" in out
|
||||
assert "✓ stopped PID 12346" in out
|
||||
|
||||
def test_taskkill_failure_is_reported(self, monkeypatch, capsys):
|
||||
monkeypatch.setattr(sys, "platform", "win32")
|
||||
|
||||
def fake_run(args, *a, **kw):
|
||||
return MagicMock(returncode=128, stdout="",
|
||||
stderr="ERROR: Access is denied.")
|
||||
|
||||
with patch("hermes_cli.main._find_stale_dashboard_pids",
|
||||
return_value=[12345]), \
|
||||
patch("subprocess.run", side_effect=fake_run):
|
||||
_kill_stale_dashboard_processes() # must not raise
|
||||
|
||||
out = capsys.readouterr().out
|
||||
assert "✗ failed to stop PID 12345" in out
|
||||
assert "Access is denied" in out
|
||||
|
||||
|
||||
class TestBackCompatAlias:
|
||||
"""``_warn_stale_dashboard_processes`` is kept as an alias for the
|
||||
new kill function so old imports don't break."""
|
||||
|
||||
def test_alias_is_the_kill_function(self):
|
||||
assert _warn_stale_dashboard_processes is _kill_stale_dashboard_processes
|
||||
|
||||
|
||||
class TestWindowsWmicEncoding:
|
||||
@@ -188,7 +322,6 @@ class TestWindowsWmicEncoding:
|
||||
UnicodeDecodeError on non-UTF-8 wmic output."""
|
||||
monkeypatch.setattr(sys, "platform", "win32")
|
||||
with patch("subprocess.run") as mock_run:
|
||||
# Provide a minimal valid wmic /FORMAT:LIST response.
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0,
|
||||
stdout=(
|
||||
@@ -197,10 +330,12 @@ class TestWindowsWmicEncoding:
|
||||
),
|
||||
stderr="",
|
||||
)
|
||||
_warn_stale_dashboard_processes()
|
||||
_find_stale_dashboard_pids()
|
||||
|
||||
# The wmic call is the first subprocess.run invocation.
|
||||
assert mock_run.called, "subprocess.run was not invoked"
|
||||
kwargs = mock_run.call_args.kwargs
|
||||
wmic_call = mock_run.call_args_list[0]
|
||||
kwargs = wmic_call.kwargs
|
||||
assert kwargs.get("encoding") == "utf-8", (
|
||||
"encoding kwarg must be 'utf-8' so wmic output is decoded "
|
||||
"deterministically rather than via the implicit reader-thread "
|
||||
@@ -211,10 +346,10 @@ class TestWindowsWmicEncoding:
|
||||
"down the reader thread (#17049)."
|
||||
)
|
||||
|
||||
def test_wmic_returns_none_stdout_does_not_crash(self, monkeypatch, capsys):
|
||||
def test_wmic_returns_none_stdout_does_not_crash(self, monkeypatch):
|
||||
"""If subprocess.run returns successfully but stdout is None — which
|
||||
is what Python 3.11 leaves behind when the reader thread silently
|
||||
crashed on UnicodeDecodeError before this fix landed — the warning
|
||||
crashed on UnicodeDecodeError before this fix landed — detection
|
||||
must short-circuit instead of raising AttributeError on
|
||||
``None.split('\\n')`` and aborting `hermes update` (#17049)."""
|
||||
monkeypatch.setattr(sys, "platform", "win32")
|
||||
@@ -223,7 +358,4 @@ class TestWindowsWmicEncoding:
|
||||
returncode=0, stdout=None, stderr=""
|
||||
)
|
||||
# Must not raise.
|
||||
_warn_stale_dashboard_processes()
|
||||
|
||||
output = capsys.readouterr().out
|
||||
assert "dashboard process" not in output
|
||||
assert _find_stale_dashboard_pids() == []
|
||||
|
||||
Reference in New Issue
Block a user