diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 70b0b7b27..1beb893cd 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -89,7 +89,7 @@ def find_gateway_pids() -> list: def kill_gateway_processes(force: bool = False) -> int: - """Kill any running gateway processes. Returns count killed.""" + """Kill ALL running gateway processes (across all profiles). Returns count killed.""" pids = find_gateway_pids() killed = 0 @@ -109,6 +109,43 @@ def kill_gateway_processes(force: bool = False) -> int: return killed +def stop_profile_gateway() -> bool: + """Stop only the gateway for the current profile (HERMES_HOME-scoped). + + Uses the PID file written by start_gateway(), so it only kills the + gateway belonging to this profile — not gateways from other profiles. + Returns True if a process was stopped, False if none was found. + """ + try: + from gateway.status import get_running_pid, remove_pid_file + except ImportError: + return False + + pid = get_running_pid() + if pid is None: + return False + + try: + os.kill(pid, signal.SIGTERM) + except ProcessLookupError: + pass # Already gone + except PermissionError: + print(f"⚠ Permission denied to kill PID {pid}") + return False + + # Wait briefly for it to exit + import time as _time + for _ in range(20): + try: + os.kill(pid, 0) + _time.sleep(0.5) + except (ProcessLookupError, PermissionError): + break + + remove_pid_file() + return True + + def is_linux() -> bool: return sys.platform.startswith('linux') @@ -1831,7 +1868,7 @@ def gateway_setup(): elif is_macos(): launchd_restart() else: - kill_gateway_processes() + stop_profile_gateway() print_info("Start manually: hermes gateway") except subprocess.CalledProcessError as e: print_error(f" Restart failed: {e}") @@ -1945,31 +1982,54 @@ def gateway_command(args): sys.exit(1) elif subcmd == "stop": - # Try service first, then sweep any stray/manual gateway processes. - service_available = False + stop_all = getattr(args, 'all', False) system = getattr(args, 'system', False) - - if is_linux() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()): - try: - systemd_stop(system=system) - service_available = True - except subprocess.CalledProcessError: - pass # Fall through to process kill - elif is_macos() and get_launchd_plist_path().exists(): - try: - launchd_stop() - service_available = True - except subprocess.CalledProcessError: - pass - killed = kill_gateway_processes() - if not service_available: - if killed: - print(f"✓ Stopped {killed} gateway process(es)") + if stop_all: + # --all: kill every gateway process on the machine + service_available = False + if is_linux() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()): + try: + systemd_stop(system=system) + service_available = True + except subprocess.CalledProcessError: + pass + elif is_macos() and get_launchd_plist_path().exists(): + try: + launchd_stop() + service_available = True + except subprocess.CalledProcessError: + pass + killed = kill_gateway_processes() + total = killed + (1 if service_available else 0) + if total: + print(f"✓ Stopped {total} gateway process(es) across all profiles") else: print("✗ No gateway processes found") - elif killed: - print(f"✓ Stopped {killed} additional manual gateway process(es)") + else: + # Default: stop only the current profile's gateway + service_available = False + if is_linux() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()): + try: + systemd_stop(system=system) + service_available = True + except subprocess.CalledProcessError: + pass + elif is_macos() and get_launchd_plist_path().exists(): + try: + launchd_stop() + service_available = True + except subprocess.CalledProcessError: + pass + + if not service_available: + # No systemd/launchd — use profile-scoped PID file + if stop_profile_gateway(): + print("✓ Stopped gateway for this profile") + else: + print("✗ No gateway running for this profile") + else: + print(f"✓ Stopped {get_service_name()} service") elif subcmd == "restart": # Try service first, fall back to killing and restarting @@ -2016,10 +2076,9 @@ def gateway_command(args): print(" Fix the service, then retry: hermes gateway start") sys.exit(1) - # Manual restart: kill existing processes - killed = kill_gateway_processes() - if killed: - print(f"✓ Stopped {killed} gateway process(es)") + # Manual restart: stop only this profile's gateway + if stop_profile_gateway(): + print("✓ Stopped gateway for this profile") _wait_for_gateway_exit(timeout=10.0, force_after=5.0) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 52c12c104..0f1f4aa51 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -3516,139 +3516,103 @@ def cmd_update(args): print() print("✓ Update complete!") - # Auto-restart gateway if it's running. - # Uses the PID file (scoped to HERMES_HOME) to find this - # installation's gateway — safe with multiple installations. + # Auto-restart ALL gateways after update. + # The code update (git pull) is shared across all profiles, so every + # running gateway needs restarting to pick up the new code. try: - from gateway.status import get_running_pid, remove_pid_file from hermes_cli.gateway import ( - get_service_name, get_launchd_plist_path, is_macos, is_linux, - launchd_restart, _ensure_user_systemd_env, - get_systemd_linger_status, + is_macos, is_linux, _ensure_user_systemd_env, + get_systemd_linger_status, find_gateway_pids, ) import signal as _signal - _gw_service_name = get_service_name() - existing_pid = get_running_pid() - has_systemd_service = False - has_system_service = False - has_launchd_service = False + restarted_services = [] + killed_pids = set() - try: - _ensure_user_systemd_env() - check = subprocess.run( - ["systemctl", "--user", "is-active", _gw_service_name], - capture_output=True, text=True, timeout=5, - ) - has_systemd_service = check.stdout.strip() == "active" - except (FileNotFoundError, subprocess.TimeoutExpired): - pass - - # Also check for a system-level service (hermes gateway install --system). - # This covers gateways running under system systemd where --user - # fails due to missing D-Bus session. - if not has_systemd_service and is_linux(): + # --- Systemd services (Linux) --- + # Discover all hermes-gateway* units (default + profiles) + if is_linux(): try: - check = subprocess.run( - ["systemctl", "is-active", _gw_service_name], - capture_output=True, text=True, timeout=5, - ) - has_system_service = check.stdout.strip() == "active" - except (FileNotFoundError, subprocess.TimeoutExpired): + _ensure_user_systemd_env() + except Exception: pass - # Check for macOS launchd service + for scope, scope_cmd in [("user", ["systemctl", "--user"]), ("system", ["systemctl"])]: + try: + result = subprocess.run( + scope_cmd + ["list-units", "hermes-gateway*", "--plain", "--no-legend", "--no-pager"], + capture_output=True, text=True, timeout=10, + ) + for line in result.stdout.strip().splitlines(): + parts = line.split() + if not parts: + continue + unit = parts[0] # e.g. hermes-gateway.service or hermes-gateway-coder.service + if not unit.endswith(".service"): + continue + svc_name = unit.removesuffix(".service") + # Check if active + check = subprocess.run( + scope_cmd + ["is-active", svc_name], + capture_output=True, text=True, timeout=5, + ) + if check.stdout.strip() == "active": + restart = subprocess.run( + scope_cmd + ["restart", svc_name], + capture_output=True, text=True, timeout=15, + ) + if restart.returncode == 0: + restarted_services.append(svc_name) + else: + print(f" ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}") + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + + # --- Launchd services (macOS) --- if is_macos(): try: - from hermes_cli.gateway import get_launchd_label + from hermes_cli.gateway import launchd_restart, get_launchd_label, get_launchd_plist_path plist_path = get_launchd_plist_path() if plist_path.exists(): check = subprocess.run( ["launchctl", "list", get_launchd_label()], capture_output=True, text=True, timeout=5, ) - has_launchd_service = check.returncode == 0 - except (FileNotFoundError, subprocess.TimeoutExpired): + if check.returncode == 0: + try: + launchd_restart() + restarted_services.append(get_launchd_label()) + except subprocess.CalledProcessError as e: + stderr = (getattr(e, "stderr", "") or "").strip() + print(f" ⚠ Gateway restart failed: {stderr}") + except (FileNotFoundError, subprocess.TimeoutExpired, ImportError): pass - if existing_pid or has_systemd_service or has_system_service or has_launchd_service: - print() + # --- Manual (non-service) gateways --- + # Kill any remaining gateway processes not managed by a service + manual_pids = find_gateway_pids() + for pid in manual_pids: + try: + os.kill(pid, _signal.SIGTERM) + killed_pids.add(pid) + except (ProcessLookupError, PermissionError): + pass + + if restarted_services or killed_pids: + print() + for svc in restarted_services: + print(f" ✓ Restarted {svc}") + if killed_pids: + print(f" → Stopped {len(killed_pids)} manual gateway process(es)") + print(" Restart manually: hermes gateway run") + # Also restart for each profile if needed + if len(killed_pids) > 1: + print(" (or: hermes -p gateway run for each profile)") + + if not restarted_services and not killed_pids: + # No gateways were running — nothing to do + pass - # When a service manager is handling the gateway, let it - # manage the lifecycle — don't manually SIGTERM the PID - # (launchd KeepAlive would respawn immediately, causing races). - if has_systemd_service: - import time as _time - if existing_pid: - try: - os.kill(existing_pid, _signal.SIGTERM) - print(f"→ Stopped gateway process (PID {existing_pid})") - except ProcessLookupError: - pass - except PermissionError: - print(f"⚠ Permission denied killing gateway PID {existing_pid}") - remove_pid_file() - _time.sleep(1) # Brief pause for port/socket release - print("→ Restarting gateway service...") - restart = subprocess.run( - ["systemctl", "--user", "restart", _gw_service_name], - capture_output=True, text=True, timeout=15, - ) - if restart.returncode == 0: - print("✓ Gateway restarted.") - else: - print(f"⚠ Gateway restart failed: {restart.stderr.strip()}") - # Check if linger is the issue - if is_linux(): - linger_ok, _detail = get_systemd_linger_status() - if linger_ok is not True: - import getpass - _username = getpass.getuser() - print() - print(" Linger must be enabled for the gateway user service to function.") - print(f" Run: sudo loginctl enable-linger {_username}") - print() - print(" Then restart the gateway:") - print(" hermes gateway restart") - else: - print(" Try manually: hermes gateway restart") - elif has_system_service: - # System-level service (hermes gateway install --system). - # No D-Bus session needed — systemctl without --user talks - # directly to the system manager over /run/systemd/private. - print("→ Restarting system gateway service...") - restart = subprocess.run( - ["systemctl", "restart", _gw_service_name], - capture_output=True, text=True, timeout=15, - ) - if restart.returncode == 0: - print("✓ Gateway restarted (system service).") - else: - print(f"⚠ Gateway restart failed: {restart.stderr.strip()}") - print(" System services may require root. Try:") - print(f" sudo systemctl restart {_gw_service_name}") - elif has_launchd_service: - # Use the shared launchd restart helper so we wait for the - # old gateway process to fully exit before starting the new - # one. This avoids stop/start races during self-update. - print("→ Restarting gateway service...") - try: - launchd_restart() - except subprocess.CalledProcessError as e: - stderr = (getattr(e, "stderr", "") or "").strip() - print(f"⚠ Gateway restart failed: {stderr}") - print(" Try manually: hermes gateway restart") - elif existing_pid: - try: - os.kill(existing_pid, _signal.SIGTERM) - print(f"→ Stopped gateway process (PID {existing_pid})") - except ProcessLookupError: - pass # Already gone - except PermissionError: - print(f"⚠ Permission denied killing gateway PID {existing_pid}") - remove_pid_file() - print(" ℹ️ Gateway was running manually (not as a service).") - print(" Restart it with: hermes gateway run") except Exception as e: logger.debug("Gateway restart during update failed: %s", e) @@ -4214,6 +4178,7 @@ For more help on a command: # gateway stop gateway_stop = gateway_subparsers.add_parser("stop", help="Stop gateway service") gateway_stop.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service") + gateway_stop.add_argument("--all", action="store_true", help="Stop ALL gateway processes across all profiles") # gateway restart gateway_restart = gateway_subparsers.add_parser("restart", help="Restart gateway service") diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py index 21c70c589..e97aad4c7 100644 --- a/tests/hermes_cli/test_gateway_service.py +++ b/tests/hermes_cli/test_gateway_service.py @@ -103,7 +103,9 @@ class TestGeneratedSystemdUnits: class TestGatewayStopCleanup: - def test_stop_sweeps_manual_gateway_processes_after_service_stop(self, tmp_path, monkeypatch): + def test_stop_only_kills_current_profile_by_default(self, tmp_path, monkeypatch): + """Without --all, stop uses systemd (if available) and does NOT call + the global kill_gateway_processes().""" unit_path = tmp_path / "hermes-gateway.service" unit_path.write_text("unit\n", encoding="utf-8") @@ -123,6 +125,31 @@ class TestGatewayStopCleanup: gateway_cli.gateway_command(SimpleNamespace(gateway_command="stop")) + assert service_calls == ["stop"] + # Global kill should NOT be called without --all + assert kill_calls == [] + + def test_stop_all_sweeps_all_gateway_processes(self, tmp_path, monkeypatch): + """With --all, stop uses systemd AND calls the global kill_gateway_processes().""" + unit_path = tmp_path / "hermes-gateway.service" + unit_path.write_text("unit\n", encoding="utf-8") + + monkeypatch.setattr(gateway_cli, "is_linux", lambda: True) + monkeypatch.setattr(gateway_cli, "is_macos", lambda: False) + monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path) + + service_calls = [] + kill_calls = [] + + monkeypatch.setattr(gateway_cli, "systemd_stop", lambda system=False: service_calls.append("stop")) + monkeypatch.setattr( + gateway_cli, + "kill_gateway_processes", + lambda force=False: kill_calls.append(force) or 2, + ) + + gateway_cli.gateway_command(SimpleNamespace(gateway_command="stop", **{"all": True})) + assert service_calls == ["stop"] assert kill_calls == [False] diff --git a/tests/hermes_cli/test_update_gateway_restart.py b/tests/hermes_cli/test_update_gateway_restart.py index 40511c8a2..ff91e134d 100644 --- a/tests/hermes_cli/test_update_gateway_restart.py +++ b/tests/hermes_cli/test_update_gateway_restart.py @@ -47,6 +47,22 @@ def _make_run_side_effect( if "rev-list" in joined: return subprocess.CompletedProcess(cmd, 0, stdout=f"{commit_count}\n", stderr="") + # systemctl list-units hermes-gateway* — discover all gateway services + if "systemctl" in joined and "list-units" in joined: + if "--user" in joined and systemd_active: + return subprocess.CompletedProcess( + cmd, 0, + stdout="hermes-gateway.service loaded active running Hermes Gateway\n", + stderr="", + ) + elif "--user" not in joined and system_service_active: + return subprocess.CompletedProcess( + cmd, 0, + stdout="hermes-gateway.service loaded active running Hermes Gateway\n", + stderr="", + ) + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + # systemctl is-active — distinguish --user from system scope if "systemctl" in joined and "is-active" in joined: if "--user" in joined: @@ -305,15 +321,14 @@ class TestCmdUpdateLaunchdRestart: launchctl_loaded=True, ) - # Mock get_running_pid to return a PID - with patch("gateway.status.get_running_pid", return_value=12345), \ - patch("gateway.status.remove_pid_file"), \ - patch.object(gateway_cli, "launchd_restart") as mock_launchd_restart: + # Mock launchd_restart + find_gateway_pids (new code discovers all gateways) + with patch.object(gateway_cli, "launchd_restart") as mock_launchd_restart, \ + patch.object(gateway_cli, "find_gateway_pids", return_value=[]): cmd_update(mock_args) captured = capsys.readouterr().out - assert "Restarting gateway service" in captured - assert "Restart it with: hermes gateway run" not in captured + assert "Restarted" in captured + assert "Restart manually: hermes gateway run" not in captured mock_launchd_restart.assert_called_once_with() @patch("shutil.which", return_value=None) @@ -321,7 +336,7 @@ class TestCmdUpdateLaunchdRestart: def test_update_without_launchd_shows_manual_restart( self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch, ): - """When no service manager is running, update should show the manual restart hint.""" + """When no service manager is running but manual gateway is found, show manual restart hint.""" monkeypatch.setattr( gateway_cli, "is_macos", lambda: True, ) @@ -336,14 +351,13 @@ class TestCmdUpdateLaunchdRestart: launchctl_loaded=False, ) - with patch("gateway.status.get_running_pid", return_value=12345), \ - patch("gateway.status.remove_pid_file"), \ + # Simulate a manual gateway process found by find_gateway_pids + with patch.object(gateway_cli, "find_gateway_pids", return_value=[12345]), \ patch("os.kill"): cmd_update(mock_args) captured = capsys.readouterr().out - assert "Restart it with: hermes gateway run" in captured - assert "Gateway restarted via launchd" not in captured + assert "Restart manually: hermes gateway run" in captured @patch("shutil.which", return_value=None) @patch("subprocess.run") @@ -360,13 +374,11 @@ class TestCmdUpdateLaunchdRestart: systemd_active=True, ) - with patch("gateway.status.get_running_pid", return_value=12345), \ - patch("gateway.status.remove_pid_file"), \ - patch("os.kill"): + with patch.object(gateway_cli, "find_gateway_pids", return_value=[]): cmd_update(mock_args) captured = capsys.readouterr().out - assert "Gateway restarted" in captured + assert "Restarted hermes-gateway" in captured # Verify systemctl restart was called restart_calls = [ c for c in mock_run.call_args_list @@ -422,13 +434,11 @@ class TestCmdUpdateSystemService: system_service_active=True, ) - with patch("gateway.status.get_running_pid", return_value=12345), \ - patch("gateway.status.remove_pid_file"): + with patch.object(gateway_cli, "find_gateway_pids", return_value=[]): cmd_update(mock_args) captured = capsys.readouterr().out - assert "system gateway service" in captured.lower() - assert "Gateway restarted (system service)" in captured + assert "Restarted hermes-gateway" in captured # Verify systemctl restart (no --user) was called restart_calls = [ c for c in mock_run.call_args_list @@ -440,10 +450,10 @@ class TestCmdUpdateSystemService: @patch("shutil.which", return_value=None) @patch("subprocess.run") - def test_update_system_service_restart_failure_shows_sudo_hint( + def test_update_system_service_restart_failure_shows_error( self, mock_run, _mock_which, mock_args, capsys, monkeypatch, ): - """When system service restart fails (e.g. no root), show sudo hint.""" + """When system service restart fails, show the failure message.""" monkeypatch.setattr(gateway_cli, "is_macos", lambda: False) monkeypatch.setattr(gateway_cli, "is_linux", lambda: True) @@ -454,19 +464,18 @@ class TestCmdUpdateSystemService: system_restart_rc=1, ) - with patch("gateway.status.get_running_pid", return_value=12345), \ - patch("gateway.status.remove_pid_file"): + with patch.object(gateway_cli, "find_gateway_pids", return_value=[]): cmd_update(mock_args) captured = capsys.readouterr().out - assert "sudo systemctl restart" in captured + assert "Failed to restart" in captured @patch("shutil.which", return_value=None) @patch("subprocess.run") def test_user_service_takes_priority_over_system( self, mock_run, _mock_which, mock_args, capsys, monkeypatch, ): - """When both user and system services are active, user wins.""" + """When both user and system services are active, both are restarted.""" monkeypatch.setattr(gateway_cli, "is_macos", lambda: False) monkeypatch.setattr(gateway_cli, "is_linux", lambda: True) @@ -476,12 +485,9 @@ class TestCmdUpdateSystemService: system_service_active=True, ) - with patch("gateway.status.get_running_pid", return_value=12345), \ - patch("gateway.status.remove_pid_file"), \ - patch("os.kill"): + with patch.object(gateway_cli, "find_gateway_pids", return_value=[]): cmd_update(mock_args) captured = capsys.readouterr().out - # Should restart via user service, not system - assert "Gateway restarted." in captured - assert "(system service)" not in captured + # Both scopes are discovered and restarted + assert "Restarted hermes-gateway" in captured