From 437feabb74d9b57e69402ac13ff690be5be372ce Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 10 Apr 2026 03:45:34 -0700 Subject: [PATCH] fix(gateway): launchd_stop uses bootout so KeepAlive doesn't respawn (#7119) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit launchd_stop() previously used `launchctl kill SIGTERM` which only signals the process. Because the plist has KeepAlive.SuccessfulExit=false, launchd immediately respawns the gateway — making `hermes gateway stop` a no-op that prints '✓ Service stopped' while the service keeps running. Switch to `launchctl bootout` which unloads the service definition so KeepAlive can't trigger. The process exits and stays stopped until `hermes gateway start` (which already handles re-bootstrapping unloaded jobs via error codes 3/113). Also adds _wait_for_gateway_exit() after bootout to ensure the process is fully gone before returning, and tolerates 'already unloaded' errors. Fixes: .env changes not taking effect after gateway stop+restart on macOS. The root cause was that stop didn't actually stop — the respawned process loaded the old env before the user's restart command ran. --- hermes_cli/gateway.py | 14 +++++- tests/hermes_cli/test_gateway_service.py | 57 ++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 90b89be8c..9ee1d892b 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -1196,7 +1196,19 @@ def launchd_start(): def launchd_stop(): label = get_launchd_label() - subprocess.run(["launchctl", "kill", "SIGTERM", f"{_launchd_domain()}/{label}"], check=True, timeout=30) + target = f"{_launchd_domain()}/{label}" + # bootout unloads the service definition so KeepAlive doesn't respawn + # the process. A plain `kill SIGTERM` only signals the process — launchd + # immediately restarts it because KeepAlive.SuccessfulExit = false. + # `hermes gateway start` re-bootstraps when it detects the job is unloaded. + try: + subprocess.run(["launchctl", "bootout", target], check=True, timeout=90) + except subprocess.CalledProcessError as e: + if e.returncode in (3, 113): + pass # Already unloaded — nothing to stop. + else: + raise + _wait_for_gateway_exit(timeout=10.0, force_after=5.0) print("✓ Service stopped") def _wait_for_gateway_exit(timeout: float = 10.0, force_after: float = 5.0): diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py index 23ad21b36..3a543693e 100644 --- a/tests/hermes_cli/test_gateway_service.py +++ b/tests/hermes_cli/test_gateway_service.py @@ -234,6 +234,63 @@ class TestLaunchdServiceRecovery: ["launchctl", "kickstart", target], ] + def test_launchd_stop_uses_bootout_not_kill(self, monkeypatch): + """launchd_stop must bootout the service so KeepAlive doesn't respawn it.""" + label = gateway_cli.get_launchd_label() + domain = gateway_cli._launchd_domain() + target = f"{domain}/{label}" + + calls = [] + + def fake_run(cmd, check=False, **kwargs): + calls.append(cmd) + return SimpleNamespace(returncode=0, stdout="", stderr="") + + monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run) + monkeypatch.setattr(gateway_cli, "_wait_for_gateway_exit", lambda **kw: None) + + gateway_cli.launchd_stop() + + assert calls == [["launchctl", "bootout", target]] + + def test_launchd_stop_tolerates_already_unloaded(self, monkeypatch, capsys): + """launchd_stop silently handles exit codes 3/113 (job not loaded).""" + label = gateway_cli.get_launchd_label() + domain = gateway_cli._launchd_domain() + target = f"{domain}/{label}" + + def fake_run(cmd, check=False, **kwargs): + if "bootout" in cmd: + raise gateway_cli.subprocess.CalledProcessError(3, cmd, stderr="Could not find service") + return SimpleNamespace(returncode=0, stdout="", stderr="") + + monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run) + monkeypatch.setattr(gateway_cli, "_wait_for_gateway_exit", lambda **kw: None) + + # Should not raise — exit code 3 means already unloaded + gateway_cli.launchd_stop() + + output = capsys.readouterr().out + assert "stopped" in output.lower() + + def test_launchd_stop_waits_for_process_exit(self, monkeypatch): + """launchd_stop calls _wait_for_gateway_exit after bootout.""" + wait_called = [] + + def fake_run(cmd, check=False, **kwargs): + return SimpleNamespace(returncode=0, stdout="", stderr="") + + def fake_wait(**kwargs): + wait_called.append(kwargs) + + monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run) + monkeypatch.setattr(gateway_cli, "_wait_for_gateway_exit", fake_wait) + + gateway_cli.launchd_stop() + + assert len(wait_called) == 1 + assert wait_called[0] == {"timeout": 10.0, "force_after": 5.0} + def test_launchd_status_reports_local_stale_plist_when_unloaded(self, tmp_path, monkeypatch, capsys): plist_path = tmp_path / "ai.hermes.gateway.plist" plist_path.write_text("old content", encoding="utf-8")