From 0785aec4443cc4d2baeac207b14c17f97bcf4707 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Mon, 20 Apr 2026 18:35:18 -0500 Subject: [PATCH 1/4] fix(tui): harden against Node V8 OOM + GatewayClient memory leaks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Long TUI sessions were crashing Node via V8 fatal-OOM once transcripts + reasoning blobs crossed the default 1.5–4GB heap cap. This adds defense in depth: a bigger heap, leak-proofing the RPC hot path, bounded diagnostic buffers, automatic heap dumps at high-water marks, and graceful signal / uncaught handlers. ## Changes ### Heap budget - hermes_cli/main.py: `_launch_tui` now injects `NODE_OPTIONS= --max-old-space-size=8192 --expose-gc` (appended — does not clobber user-supplied NODE_OPTIONS). Covers both `node dist/entry.js` and `tsx src/entry.tsx` launch paths. - ui-tui/src/entry.tsx: shebang rewritten to `#!/usr/bin/env -S node --max-old-space-size=8192 --expose-gc` as a fallback when the binary is invoked directly. ### GatewayClient (ui-tui/src/gatewayClient.ts) - `setMaxListeners(0)` — silences spurious warnings from React hook subscribers. - `logs` and `bufferedEvents` replaced with fixed-capacity CircularBuffer — O(1) push, no splice(0, …) copies under load. - RPC timeout refactor: `setTimeout(this.onTimeout.bind(this), …, id)` replaces the inline arrow closure that captured `method`/`params`/ `resolve`/`reject` for the full 120 s request timeout. Each Pending record now stores its own timeout handle, `.unref()`'d so stuck timers never keep the event loop alive, and `rejectPending()` clears them (previously leaked the timer itself). ### Memory diagnostics (new) - ui-tui/src/lib/memory.ts: `performHeapDump()` + `captureMemoryDiagnostics()`. Writes heap snapshot + JSON diag sidecar to `~/.hermes/heapdumps/` (override via `HERMES_HEAPDUMP_DIR`). Diagnostics are written first so we still get useful data if the snapshot crashes on very large heaps. Captures: detached V8 contexts (closure-leak signal), active handles/requests (`process._getActiveHandles/_getActiveRequests`), Linux `/proc/self/fd` count + `/proc/self/smaps_rollup`, heap growth rate (MB/hr), and auto-classifies likely leak sources. - ui-tui/src/lib/memoryMonitor.ts: 10 s interval polling heapUsed. At 1.5 GB writes an auto heap dump (trigger=`auto-high`); at 2.5 GB writes a final dump and exits 137 before V8 fatal-OOMs so the user can restart cleanly. Handle is `.unref()`'d so it never holds the process open. ### Graceful exit (new) - ui-tui/src/lib/gracefulExit.ts: SIGINT/SIGTERM/SIGHUP run registered cleanups through a 4 s failsafe `setTimeout` that hard-exits if cleanup hangs. `uncaughtException` / `unhandledRejection` are logged to stderr instead of crashing — a transient TUI render error should not kill an in-flight agent turn. ### Slash commands (new) - ui-tui/src/app/slash/commands/debug.ts: - `/heapdump` — manual snapshot + diagnostics. - `/mem` — live heap / rss / external / array-buffer / uptime panel. - Registered in `ui-tui/src/app/slash/registry.ts`. ### Utility (new) - ui-tui/src/lib/circularBuffer.ts: small fixed-capacity ring buffer with `push` / `tail(n)` / `drain()` / `clear()`. Replaces the ad-hoc `array.splice(0, len - MAX)` pattern. ## Validation - tsc `--noEmit` clean - `vitest run`: 15 files, 102 tests passing - eslint clean on all touched/new files - build produces executable `dist/entry.js` with preserved shebang - smoke-tested: `HERMES_HEAPDUMP_DIR=… performHeapDump('manual')` writes both a valid `.heapsnapshot` and a `.diagnostics.json` containing detached-contexts, active-handles, smaps_rollup. ## Env knobs - `HERMES_HEAPDUMP_DIR` — override snapshot output dir - `HERMES_HEAPDUMP_ON_START=1` — dump once at boot - existing `NODE_OPTIONS` is respected and appended, not replaced --- hermes_cli/main.py | 11 ++ ui-tui/src/app/slash/commands/debug.ts | 48 ++++++ ui-tui/src/app/slash/registry.ts | 9 +- ui-tui/src/entry.tsx | 37 ++++- ui-tui/src/gatewayClient.ts | 100 +++++++----- ui-tui/src/lib/circularBuffer.ts | 58 +++++++ ui-tui/src/lib/gracefulExit.ts | 63 ++++++++ ui-tui/src/lib/memory.ts | 208 +++++++++++++++++++++++++ ui-tui/src/lib/memoryMonitor.ts | 75 +++++++++ 9 files changed, 569 insertions(+), 40 deletions(-) create mode 100644 ui-tui/src/app/slash/commands/debug.ts create mode 100644 ui-tui/src/lib/circularBuffer.ts create mode 100644 ui-tui/src/lib/gracefulExit.ts create mode 100644 ui-tui/src/lib/memory.ts create mode 100644 ui-tui/src/lib/memoryMonitor.ts diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 714ad82bf..fc29b848a 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1003,6 +1003,17 @@ def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False): ) env.setdefault("HERMES_PYTHON", sys.executable) env.setdefault("HERMES_CWD", os.getcwd()) + # Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is + # ~1.5–4GB depending on version and can fatal-OOM on long sessions with + # large transcripts / reasoning blobs. Append (don't clobber) any user + # NODE_OPTIONS. + _existing_node_opts = env.get("NODE_OPTIONS", "").strip() + _hermes_tui_node_opts = "--max-old-space-size=8192 --expose-gc" + env["NODE_OPTIONS"] = ( + f"{_existing_node_opts} {_hermes_tui_node_opts}".strip() + if _hermes_tui_node_opts not in _existing_node_opts + else _existing_node_opts + ) if resume_session_id: env["HERMES_TUI_RESUME"] = resume_session_id diff --git a/ui-tui/src/app/slash/commands/debug.ts b/ui-tui/src/app/slash/commands/debug.ts new file mode 100644 index 000000000..d44c76f34 --- /dev/null +++ b/ui-tui/src/app/slash/commands/debug.ts @@ -0,0 +1,48 @@ +import { formatBytes, performHeapDump } from '../../../lib/memory.js' +import type { SlashCommand } from '../types.js' + +export const debugCommands: SlashCommand[] = [ + { + help: 'write a V8 heap snapshot + memory diagnostics to ~/.hermes/heapdumps', + name: 'heapdump', + run: (_arg, ctx) => { + const { heapUsed, rss } = process.memoryUsage() + + ctx.transcript.sys(`writing heap dump (heap ${formatBytes(heapUsed)} · rss ${formatBytes(rss)})…`) + + void performHeapDump('manual').then(r => { + if (ctx.stale()) { + return + } + + if (!r.success) { + return ctx.transcript.sys(`heapdump failed: ${r.error ?? 'unknown error'}`) + } + + ctx.transcript.sys(`heapdump: ${r.heapPath}`) + ctx.transcript.sys(`diagnostics: ${r.diagPath}`) + }) + } + }, + + { + help: 'print live V8 heap + rss numbers', + name: 'mem', + run: (_arg, ctx) => { + const { arrayBuffers, external, heapTotal, heapUsed, rss } = process.memoryUsage() + + ctx.transcript.panel('Memory', [ + { + rows: [ + ['heap used', formatBytes(heapUsed)], + ['heap total', formatBytes(heapTotal)], + ['external', formatBytes(external)], + ['array buffers', formatBytes(arrayBuffers)], + ['rss', formatBytes(rss)], + ['uptime', `${process.uptime().toFixed(0)}s`] + ] + } + ]) + } + } +] diff --git a/ui-tui/src/app/slash/registry.ts b/ui-tui/src/app/slash/registry.ts index ae7d7d50b..353b0a83d 100644 --- a/ui-tui/src/app/slash/registry.ts +++ b/ui-tui/src/app/slash/registry.ts @@ -1,10 +1,17 @@ import { coreCommands } from './commands/core.js' +import { debugCommands } from './commands/debug.js' import { opsCommands } from './commands/ops.js' import { sessionCommands } from './commands/session.js' import { setupCommands } from './commands/setup.js' import type { SlashCommand } from './types.js' -export const SLASH_COMMANDS: SlashCommand[] = [...coreCommands, ...sessionCommands, ...opsCommands, ...setupCommands] +export const SLASH_COMMANDS: SlashCommand[] = [ + ...coreCommands, + ...sessionCommands, + ...opsCommands, + ...setupCommands, + ...debugCommands +] const byName = new Map( SLASH_COMMANDS.flatMap(cmd => [cmd.name, ...(cmd.aliases ?? [])].map(name => [name, cmd] as const)) diff --git a/ui-tui/src/entry.tsx b/ui-tui/src/entry.tsx index e0a437934..a9571e135 100644 --- a/ui-tui/src/entry.tsx +++ b/ui-tui/src/entry.tsx @@ -1,7 +1,9 @@ -#!/usr/bin/env node -// Order matters: paint banner + spawn python before loading @hermes/ink. +#!/usr/bin/env -S node --max-old-space-size=8192 --expose-gc import { bootBanner } from './bootBanner.js' import { GatewayClient } from './gatewayClient.js' +import { setupGracefulExit } from './lib/gracefulExit.js' +import { formatBytes, performHeapDump } from './lib/memory.js' +import { startMemoryMonitor } from './lib/memoryMonitor.js' if (!process.stdin.isTTY) { console.log('hermes-tui: no TTY') @@ -11,8 +13,39 @@ if (!process.stdin.isTTY) { process.stdout.write(bootBanner()) const gw = new GatewayClient() + gw.start() +setupGracefulExit({ + cleanups: [() => gw.kill()], + onError: (scope, err) => { + const message = err instanceof Error ? `${err.name}: ${err.message}` : String(err) + + process.stderr.write(`hermes-tui ${scope}: ${message.slice(0, 2000)}\n`) + }, + onSignal: signal => process.stderr.write(`hermes-tui: received ${signal}\n`) +}) + +const stopMemoryMonitor = startMemoryMonitor({ + onCritical: (snap, dump) => { + process.stderr.write( + `hermes-tui: critical memory (${formatBytes(snap.heapUsed)}) — auto heap dump → ${dump?.heapPath ?? '(failed)'}\n` + ) + process.stderr.write('hermes-tui: exiting to avoid OOM; restart to recover\n') + process.exit(137) + }, + onHigh: (snap, dump) => + process.stderr.write( + `hermes-tui: high memory (${formatBytes(snap.heapUsed)}) — auto heap dump → ${dump?.heapPath ?? '(failed)'}\n` + ) +}) + +if (process.env.HERMES_HEAPDUMP_ON_START === '1') { + void performHeapDump('manual') +} + +process.on('beforeExit', () => stopMemoryMonitor()) + const [{ render }, { App }] = await Promise.all([import('@hermes/ink'), import('./app.js')]) render(, { exitOnCtrlC: false }) diff --git a/ui-tui/src/gatewayClient.ts b/ui-tui/src/gatewayClient.ts index a238c7638..bf5210faa 100644 --- a/ui-tui/src/gatewayClient.ts +++ b/ui-tui/src/gatewayClient.ts @@ -5,6 +5,7 @@ import { delimiter, resolve } from 'node:path' import { createInterface } from 'node:readline' import type { GatewayEvent } from './gatewayTypes.js' +import { CircularBuffer } from './lib/circularBuffer.js' const MAX_GATEWAY_LOG_LINES = 200 const MAX_LOG_LINE_BYTES = 4096 @@ -43,16 +44,19 @@ const asGatewayEvent = (value: unknown): GatewayEvent | null => : null interface Pending { + id: string + method: string reject: (e: Error) => void resolve: (v: unknown) => void + timeout: ReturnType } export class GatewayClient extends EventEmitter { private proc: ChildProcess | null = null private reqId = 0 - private logs: string[] = [] + private logs = new CircularBuffer(MAX_GATEWAY_LOG_LINES) private pending = new Map() - private bufferedEvents: GatewayEvent[] = [] + private bufferedEvents = new CircularBuffer(MAX_BUFFERED_EVENTS) private pendingExit: number | null | undefined private ready = false private readyTimer: ReturnType | null = null @@ -60,6 +64,13 @@ export class GatewayClient extends EventEmitter { private stdoutRl: ReturnType | null = null private stderrRl: ReturnType | null = null + constructor() { + super() + // useInput / createGatewayEventHandler can legitimately attach many + // listeners. Default 10-cap triggers spurious warnings. + this.setMaxListeners(0) + } + private publish(ev: GatewayEvent) { if (ev.type === 'gateway.ready') { this.ready = true @@ -74,9 +85,7 @@ export class GatewayClient extends EventEmitter { return void this.emit('event', ev) } - if (this.bufferedEvents.push(ev) > MAX_BUFFERED_EVENTS) { - this.bufferedEvents.splice(0, this.bufferedEvents.length - MAX_BUFFERED_EVENTS) - } + this.bufferedEvents.push(ev) } start() { @@ -88,7 +97,7 @@ export class GatewayClient extends EventEmitter { env.PYTHONPATH = pyPath ? `${root}${delimiter}${pyPath}` : root this.ready = false - this.bufferedEvents = [] + this.bufferedEvents.clear() this.pendingExit = undefined this.stdoutRl?.close() this.stderrRl?.close() @@ -165,15 +174,7 @@ export class GatewayClient extends EventEmitter { const p = id ? this.pending.get(id) : undefined if (p) { - this.pending.delete(id!) - - if (msg.error) { - const err = msg.error as { message?: unknown } | null | undefined - - p.reject(new Error(typeof err?.message === 'string' ? err.message : 'request failed')) - } else { - p.resolve(msg.result) - } + this.settle(p, msg.error ? this.toError(msg.error) : null, msg.result) return } @@ -187,24 +188,49 @@ export class GatewayClient extends EventEmitter { } } - private pushLog(line: string) { - if (this.logs.push(truncateLine(line)) > MAX_GATEWAY_LOG_LINES) { - this.logs.splice(0, this.logs.length - MAX_GATEWAY_LOG_LINES) + private toError(raw: unknown): Error { + const err = raw as { message?: unknown } | null | undefined + + return new Error(typeof err?.message === 'string' ? err.message : 'request failed') + } + + private settle(p: Pending, err: Error | null, result: unknown) { + clearTimeout(p.timeout) + this.pending.delete(p.id) + + if (err) { + p.reject(err) + } else { + p.resolve(result) } } + private pushLog(line: string) { + this.logs.push(truncateLine(line)) + } + private rejectPending(err: Error) { for (const p of this.pending.values()) { + clearTimeout(p.timeout) p.reject(err) } this.pending.clear() } + private onTimeout(id: string) { + const p = this.pending.get(id) + + if (p) { + this.pending.delete(id) + p.reject(new Error(`timeout: ${p.method}`)) + } + } + drain() { this.subscribed = true - for (const ev of this.bufferedEvents.splice(0)) { + for (const ev of this.bufferedEvents.drain()) { this.emit('event', ev) } @@ -217,7 +243,7 @@ export class GatewayClient extends EventEmitter { } getLogTail(limit = 20): string { - return this.logs.slice(-Math.max(1, limit)).join('\n') + return this.logs.tail(Math.max(1, limit)).join('\n') } request(method: string, params: Record = {}): Promise { @@ -231,29 +257,29 @@ export class GatewayClient extends EventEmitter { const id = `r${++this.reqId}` - return new Promise((resolve, reject) => { - const timeout = setTimeout(() => { - if (this.pending.delete(id)) { - reject(new Error(`timeout: ${method}`)) - } - }, REQUEST_TIMEOUT_MS) + return new Promise((resolve, reject) => { + const timeout = setTimeout(this.onTimeout.bind(this), REQUEST_TIMEOUT_MS, id) + + timeout.unref?.() this.pending.set(id, { - reject: e => { - clearTimeout(timeout) - reject(e) - }, - resolve: v => { - clearTimeout(timeout) - resolve(v as T) - } + id, + method, + reject, + resolve: v => resolve(v as T), + timeout }) try { - this.proc!.stdin!.write(JSON.stringify({ jsonrpc: '2.0', id, method, params }) + '\n') + this.proc!.stdin!.write(JSON.stringify({ id, jsonrpc: '2.0', method, params }) + '\n') } catch (e) { - clearTimeout(timeout) - this.pending.delete(id) + const pending = this.pending.get(id) + + if (pending) { + clearTimeout(pending.timeout) + this.pending.delete(id) + } + reject(e instanceof Error ? e : new Error(String(e))) } }) diff --git a/ui-tui/src/lib/circularBuffer.ts b/ui-tui/src/lib/circularBuffer.ts new file mode 100644 index 000000000..09023fae5 --- /dev/null +++ b/ui-tui/src/lib/circularBuffer.ts @@ -0,0 +1,58 @@ +export class CircularBuffer { + private buf: T[] + private head = 0 + private len = 0 + + constructor(private capacity: number) { + this.buf = new Array(capacity) + } + + push(item: T) { + this.buf[this.head] = item + this.head = (this.head + 1) % this.capacity + + if (this.len < this.capacity) { + this.len++ + } + } + + pushAll(items: readonly T[]) { + for (const item of items) { + this.push(item) + } + } + + tail(n = this.len): T[] { + const take = Math.min(Math.max(0, n), this.len) + const start = this.len < this.capacity ? 0 : this.head + const out: T[] = new Array(take) + + for (let i = 0; i < take; i++) { + out[i] = this.buf[(start + this.len - take + i) % this.capacity]! + } + + return out + } + + toArray(): T[] { + return this.tail(this.len) + } + + drain(): T[] { + const out = this.toArray() + + this.clear() + + return out + } + + clear() { + this.buf = new Array(this.capacity) + this.head = 0 + this.len = 0 + } + + get size() { + return this.len + } +} diff --git a/ui-tui/src/lib/gracefulExit.ts b/ui-tui/src/lib/gracefulExit.ts new file mode 100644 index 000000000..ae6a23a5e --- /dev/null +++ b/ui-tui/src/lib/gracefulExit.ts @@ -0,0 +1,63 @@ +type Cleanup = () => Promise | void + +interface SetupOptions { + cleanups?: Cleanup[] + failsafeMs?: number + onError?: (scope: 'uncaughtException' | 'unhandledRejection', err: unknown) => void + onSignal?: (signal: NodeJS.Signals) => void +} + +const DEFAULT_FAILSAFE_MS = 4000 + +let wired = false + +export function setupGracefulExit({ + cleanups = [], + failsafeMs = DEFAULT_FAILSAFE_MS, + onError, + onSignal +}: SetupOptions = {}) { + if (wired) { + return + } + + wired = true + + let shuttingDown = false + + const exit = (code: number, signal?: NodeJS.Signals) => { + if (shuttingDown) { + return + } + + shuttingDown = true + + if (signal) { + onSignal?.(signal) + } + + const failsafe = setTimeout(() => process.exit(code), failsafeMs) + + failsafe.unref?.() + + void Promise.allSettled(cleanups.map(fn => Promise.resolve().then(fn))) + .catch(() => {}) + .finally(() => process.exit(code)) + } + + for (const sig of ['SIGINT', 'SIGTERM', 'SIGHUP'] as const) { + process.on(sig, () => exit(sig === 'SIGINT' ? 130 : sig === 'SIGTERM' ? 143 : 129, sig)) + } + + process.on('uncaughtException', err => { + onError?.('uncaughtException', err) + }) + + process.on('unhandledRejection', reason => { + onError?.('unhandledRejection', reason) + }) +} + +export function forceExit(code = 0) { + process.exit(code) +} diff --git a/ui-tui/src/lib/memory.ts b/ui-tui/src/lib/memory.ts new file mode 100644 index 000000000..0afbab772 --- /dev/null +++ b/ui-tui/src/lib/memory.ts @@ -0,0 +1,208 @@ +import { createWriteStream } from 'node:fs' +import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises' +import { homedir, tmpdir } from 'node:os' +import { join } from 'node:path' +import { pipeline } from 'node:stream/promises' +import { getHeapSnapshot, getHeapSpaceStatistics, getHeapStatistics } from 'node:v8' + +export type MemoryTrigger = 'auto-high' | 'auto-critical' | 'manual' + +export interface MemoryDiagnostics { + activeHandles: number + activeRequests: number + analysis: { + potentialLeaks: string[] + recommendation: string + } + memoryGrowthRate: { + bytesPerSecond: number + mbPerHour: number + } + memoryUsage: { + arrayBuffers: number + external: number + heapTotal: number + heapUsed: number + rss: number + } + nodeVersion: string + openFileDescriptors?: number + platform: string + resourceUsage: { + maxRSS: number + systemCPUTime: number + userCPUTime: number + } + smapsRollup?: string + timestamp: string + trigger: MemoryTrigger + uptimeSeconds: number + v8HeapSpaces?: { available: number; name: string; size: number; used: number }[] + v8HeapStats: { + detachedContexts: number + heapSizeLimit: number + mallocedMemory: number + nativeContexts: number + peakMallocedMemory: number + } +} + +export interface HeapDumpResult { + diagPath?: string + error?: string + heapPath?: string + success: boolean +} + +const heapDumpRoot = () => + process.env.HERMES_HEAPDUMP_DIR?.trim() || join(homedir() || tmpdir(), '.hermes', 'heapdumps') + +const processInternals = process as unknown as { + _getActiveHandles: () => unknown[] + _getActiveRequests: () => unknown[] +} + +export async function captureMemoryDiagnostics(trigger: MemoryTrigger): Promise { + const usage = process.memoryUsage() + const heapStats = getHeapStatistics() + const resourceUsage = process.resourceUsage() + const uptimeSeconds = process.uptime() + + let heapSpaces: ReturnType | undefined + + try { + heapSpaces = getHeapSpaceStatistics() + } catch { + /* Bun / older Node — ignore */ + } + + const activeHandles = processInternals._getActiveHandles().length + const activeRequests = processInternals._getActiveRequests().length + + let openFileDescriptors: number | undefined + + try { + openFileDescriptors = (await readdir('/proc/self/fd')).length + } catch { + /* non-Linux */ + } + + let smapsRollup: string | undefined + + try { + smapsRollup = await readFile('/proc/self/smaps_rollup', 'utf8') + } catch { + /* non-Linux / no access */ + } + + const nativeMemory = usage.rss - usage.heapUsed + const bytesPerSecond = uptimeSeconds > 0 ? usage.rss / uptimeSeconds : 0 + const mbPerHour = (bytesPerSecond * 3600) / (1024 * 1024) + + const potentialLeaks: string[] = [] + + if (heapStats.number_of_detached_contexts > 0) { + potentialLeaks.push( + `${heapStats.number_of_detached_contexts} detached context(s) — possible component/closure leak` + ) + } + + if (activeHandles > 100) { + potentialLeaks.push(`${activeHandles} active handles — possible timer/socket leak`) + } + + if (nativeMemory > usage.heapUsed) { + potentialLeaks.push('Native memory > heap — leak may be in native addons') + } + + if (mbPerHour > 100) { + potentialLeaks.push(`High memory growth rate: ${mbPerHour.toFixed(1)} MB/hour`) + } + + if (openFileDescriptors && openFileDescriptors > 500) { + potentialLeaks.push(`${openFileDescriptors} open FDs — possible file/socket leak`) + } + + return { + activeHandles, + activeRequests, + analysis: { + potentialLeaks, + recommendation: potentialLeaks.length + ? `WARNING: ${potentialLeaks.length} potential leak indicator(s). See potentialLeaks.` + : 'No obvious leak indicators. Inspect heap snapshot for retained objects.' + }, + memoryGrowthRate: { bytesPerSecond, mbPerHour }, + memoryUsage: { + arrayBuffers: usage.arrayBuffers, + external: usage.external, + heapTotal: usage.heapTotal, + heapUsed: usage.heapUsed, + rss: usage.rss + }, + nodeVersion: process.version, + openFileDescriptors, + platform: process.platform, + resourceUsage: { + maxRSS: resourceUsage.maxRSS * 1024, + systemCPUTime: resourceUsage.systemCPUTime, + userCPUTime: resourceUsage.userCPUTime + }, + smapsRollup, + timestamp: new Date().toISOString(), + trigger, + uptimeSeconds, + v8HeapSpaces: heapSpaces?.map(s => ({ + available: s.space_available_size, + name: s.space_name, + size: s.space_size, + used: s.space_used_size + })), + v8HeapStats: { + detachedContexts: heapStats.number_of_detached_contexts, + heapSizeLimit: heapStats.heap_size_limit, + mallocedMemory: heapStats.malloced_memory, + nativeContexts: heapStats.number_of_native_contexts, + peakMallocedMemory: heapStats.peak_malloced_memory + } + } +} + +export async function performHeapDump(trigger: MemoryTrigger = 'manual'): Promise { + try { + const diagnostics = await captureMemoryDiagnostics(trigger) + const dir = heapDumpRoot() + + await mkdir(dir, { recursive: true }) + + const stamp = new Date().toISOString().replace(/[:.]/g, '-') + const base = `hermes-${stamp}-${process.pid}-${trigger}` + const heapPath = join(dir, `${base}.heapsnapshot`) + const diagPath = join(dir, `${base}.diagnostics.json`) + + await writeFile(diagPath, JSON.stringify(diagnostics, null, 2), { mode: 0o600 }) + await writeSnapshot(heapPath) + + return { diagPath, heapPath, success: true } + } catch (e) { + return { error: e instanceof Error ? e.message : String(e), success: false } + } +} + +export function formatBytes(bytes: number): string { + if (!Number.isFinite(bytes) || bytes <= 0) { + return '0B' + } + + const units = ['B', 'KB', 'MB', 'GB', 'TB'] + const exp = Math.min(units.length - 1, Math.floor(Math.log10(bytes) / 3)) + const value = bytes / 1024 ** exp + + return `${value >= 100 ? value.toFixed(0) : value.toFixed(1)}${units[exp]}` +} + +async function writeSnapshot(filepath: string) { + const stream = createWriteStream(filepath, { mode: 0o600 }) + + await pipeline(getHeapSnapshot(), stream) +} diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts new file mode 100644 index 000000000..58d7d3878 --- /dev/null +++ b/ui-tui/src/lib/memoryMonitor.ts @@ -0,0 +1,75 @@ +import { type HeapDumpResult, performHeapDump } from './memory.js' + +export type MemoryLevel = 'critical' | 'high' | 'normal' + +export interface MemorySnapshot { + heapUsed: number + level: MemoryLevel + rss: number +} + +export interface MemoryMonitorOptions { + criticalBytes?: number + highBytes?: number + intervalMs?: number + onCritical?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void + onHigh?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void + onSnapshot?: (snap: MemorySnapshot) => void +} + +const GB = 1024 ** 3 + +const DEFAULTS = { + criticalBytes: 2.5 * GB, + highBytes: 1.5 * GB, + intervalMs: 10_000 +} + +export function startMemoryMonitor({ + criticalBytes = DEFAULTS.criticalBytes, + highBytes = DEFAULTS.highBytes, + intervalMs = DEFAULTS.intervalMs, + onCritical, + onHigh, + onSnapshot +}: MemoryMonitorOptions = {}): () => void { + let dumpedHigh = false + let dumpedCritical = false + + const tick = async () => { + const { heapUsed, rss } = process.memoryUsage() + const level: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal' + const snap: MemorySnapshot = { heapUsed, level, rss } + + onSnapshot?.(snap) + + if (level === 'normal') { + dumpedHigh = false + dumpedCritical = false + + return + } + + if (level === 'high' && !dumpedHigh) { + dumpedHigh = true + const dump = await performHeapDump('auto-high').catch(() => null) + + onHigh?.(snap, dump) + + return + } + + if (level === 'critical' && !dumpedCritical) { + dumpedCritical = true + const dump = await performHeapDump('auto-critical').catch(() => null) + + onCritical?.(snap, dump) + } + } + + const handle = setInterval(() => void tick(), intervalMs) + + handle.unref?.() + + return () => clearInterval(handle) +} From 0078f743e692a56c3fc78ca46508b4bca336ff6b Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Mon, 20 Apr 2026 18:42:15 -0500 Subject: [PATCH 2/4] perf(tui): debounce resize RPC + column-aware useVirtualHistory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VSCode panel-drag fires 20+ SIGWINCHes/sec, each previously triggering an unthrottled `terminal.resize` gateway RPC and a full transcript re-virtualization with stale per-row height cache. ## Changes ### gateway RPC debounce (ui-tui/src/app/useMainApp.ts) - `terminal.resize` RPC now trailing-debounced at 100 ms. React `cols` state stays synchronous (needed for Yoga / in-process rendering), only the round-trip to Python coalesces. Prevents gateway flood during panel-drag / tmux-pane-resize. ### column-aware useVirtualHistory (ui-tui/src/hooks/useVirtualHistory.ts) - New required `columns` param, plumbed through from useMainApp. - On column change: scale every cached row height by `oldCols/newCols` (Math.max 1, Math.round) instead of clearing. Clearing forces a pessimistic back-walk that mounts ~190 rows at once (viewport + 2x overscan at 1-row estimate), each a fresh marked.lexer + syntax highlight ≈ 3 ms — ~600 ms React commit block. Scaled heights keep the back-walk tight. - `freezeRenders=2`: reuse pre-resize mount range for 2 renders so already-mounted MessageRows keep their warm useMemo results. Without this the first post-resize render would unmount + remount most rows (pessimistic coverage) = visible flash + 150 ms+ freeze. - `skipMeasurement` flag: first post-resize useLayoutEffect would read PRE-resize Yoga heights (Yoga's stored values are still from the frame before this render's calculateLayout with new width) and poison the scaled cache. Skip the measurement loop for that one render; next render's Yoga is correct. ## Validation - tsc `--noEmit` clean - eslint clean on touched files - `vitest run`: 15 files / 102 tests passing The renderer-level resize patterns (sync-dim-capture + microtask- coalesced React commit, atomic BSU/ESU erase-before-paint, mouse- tracking reassert) already live in hermes-ink's own `handleResize`; this patch adds the matching app-layer hygiene. --- ui-tui/src/app/useMainApp.ts | 20 ++++++-- ui-tui/src/hooks/useVirtualHistory.ts | 72 +++++++++++++++++++++++---- 2 files changed, 79 insertions(+), 13 deletions(-) diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts index 28b2a26f9..8a5b0b1fd 100644 --- a/ui-tui/src/app/useMainApp.ts +++ b/ui-tui/src/app/useMainApp.ts @@ -161,7 +161,7 @@ export function useMainApp(gw: GatewayClient) { [historyItems, messageId] ) - const virtualHistory = useVirtualHistory(scrollRef, virtualRows) + const virtualHistory = useVirtualHistory(scrollRef, virtualRows, cols) const scrollWithSelection = useCallback( (delta: number) => { @@ -306,12 +306,26 @@ export function useMainApp(gw: GatewayClient) { return } - const onResize = () => - rpc('terminal.resize', { cols: stdout.columns ?? 80, session_id: ui.sid }) + let timer: null | ReturnType = null + + const onResize = () => { + if (timer) { + clearTimeout(timer) + } + + timer = setTimeout(() => { + timer = null + void rpc('terminal.resize', { cols: stdout.columns ?? 80, session_id: ui.sid }) + }, 100) + } stdout.on('resize', onResize) return () => { + if (timer) { + clearTimeout(timer) + } + stdout.off('resize', onResize) } }, [rpc, stdout, ui.sid]) diff --git a/ui-tui/src/hooks/useVirtualHistory.ts b/ui-tui/src/hooks/useVirtualHistory.ts index efa2642df..3d1d27c05 100644 --- a/ui-tui/src/hooks/useVirtualHistory.ts +++ b/ui-tui/src/hooks/useVirtualHistory.ts @@ -15,6 +15,7 @@ const OVERSCAN = 40 const MAX_MOUNTED = 260 const COLD_START = 40 const QUANTUM = OVERSCAN >> 1 +const FREEZE_RENDERS = 2 const upperBound = (arr: number[], target: number) => { let lo = 0, @@ -31,6 +32,7 @@ const upperBound = (arr: number[], target: number) => { export function useVirtualHistory( scrollRef: RefObject, items: readonly { key: string }[], + columns: number, { estimate = ESTIMATE, overscan = OVERSCAN, maxMounted = MAX_MOUNTED, coldStartCount = COLD_START } = {} ) { const nodes = useRef(new Map()) @@ -40,6 +42,34 @@ export function useVirtualHistory( const [hasScrollRef, setHasScrollRef] = useState(false) const metrics = useRef({ sticky: true, top: 0, vp: 0 }) + // Resize handling — scale cached heights by oldCols/newCols so post-resize + // offsets stay roughly aligned with (still-unknown) real Yoga heights. + // Clearing the cache instead would force a pessimistic back-walk that mounts + // ~190 rows at once (viewport+overscan at 1-row estimate), each a fresh + // marked.lexer + syntax highlight = ~3ms; ~600ms React commit block. Freeze + // the mount range for FREEZE_RENDERS so warm useMemo results survive while + // the layout effect writes post-resize real heights back into cache. + // skipMeasurement prevents that first post-resize useLayoutEffect from + // poisoning the cache with pre-resize Yoga values (Yoga's stored heights + // are from the frame BEFORE this render's calculateLayout with new width). + const prevColumns = useRef(columns) + const skipMeasurement = useRef(false) + const prevRange = useRef(null) + const freezeRenders = useRef(0) + + if (prevColumns.current !== columns && prevColumns.current > 0 && columns > 0) { + const ratio = prevColumns.current / columns + + prevColumns.current = columns + + for (const [k, h] of heights.current) { + heights.current.set(k, Math.max(1, Math.round(h * ratio))) + } + + skipMeasurement.current = true + freezeRenders.current = FREEZE_RENDERS + } + useLayoutEffect(() => { setHasScrollRef(Boolean(scrollRef.current)) }, [scrollRef]) @@ -97,10 +127,19 @@ export function useVirtualHistory( const vp = Math.max(0, scrollRef.current?.getViewportHeight() ?? 0) const sticky = scrollRef.current?.isSticky() ?? true + const frozenRange = freezeRenders.current > 0 ? prevRange.current : null + let start = 0, end = items.length - if (items.length > 0) { + if (frozenRange) { + // Columns just changed. Reuse the pre-resize mount range so already-mounted + // MessageRows keep their warm memos (marked.lexer, syntax highlight). Clamp + // to n in case messages were removed (/clear, compaction) mid-freeze. + ;[start, end] = frozenRange + start = Math.min(start, items.length) + end = Math.min(end, items.length) + } else if (items.length > 0) { if (vp <= 0) { start = Math.max(0, items.length - coldStartCount) } else { @@ -113,6 +152,12 @@ export function useVirtualHistory( sticky ? (start = Math.max(0, end - maxMounted)) : (end = Math.min(items.length, start + maxMounted)) } + if (freezeRenders.current > 0) { + freezeRenders.current-- + } else { + prevRange.current = [start, end] + } + const measureRef = useCallback((key: string) => { let fn = refs.current.get(key) @@ -127,18 +172,25 @@ export function useVirtualHistory( useLayoutEffect(() => { let dirty = false - for (let i = start; i < end; i++) { - const k = items[i]?.key + if (skipMeasurement.current) { + // First render after a column change — Yoga heights still reflect the + // pre-resize layout. Writing them into cache would overwrite the scaled + // estimates with stale pre-resize values. Next render's Yoga is correct. + skipMeasurement.current = false + } else { + for (let i = start; i < end; i++) { + const k = items[i]?.key - if (!k) { - continue - } + if (!k) { + continue + } - const h = Math.ceil((nodes.current.get(k) as MeasuredNode | undefined)?.yogaNode?.getComputedHeight?.() ?? 0) + const h = Math.ceil((nodes.current.get(k) as MeasuredNode | undefined)?.yogaNode?.getComputedHeight?.() ?? 0) - if (h > 0 && heights.current.get(k) !== h) { - heights.current.set(k, h) - dirty = true + if (h > 0 && heights.current.get(k) !== h) { + heights.current.set(k, h) + dirty = true + } } } From 82b927777c3161c541adaa6060a88a48b04f6404 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Mon, 20 Apr 2026 18:51:12 -0500 Subject: [PATCH 3/4] refactor(tui): /clean pass on memory + resize helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KISS/DRY sweep — drops ~90 LOC with no behavior change. - circularBuffer: drop unused pushAll/toArray/size; fold toArray into drain - gracefulExit: inline Cleanup type + failsafe const; signal→code as a record instead of nested ternary; drop dead .catch on Promise.allSettled; drop unused forceExit - memory: inline heapDumpRoot() + writeSnapshot() (single-use); collapse the two fd/smaps try/catch blocks behind one `swallow` helper; build potentialLeaks functionally (array+filter) instead of imperative push-chain; UNITS at file bottom - memoryMonitor: inline DEFAULTS; drop unused onSnapshot; collapse dumpedHigh/dumpedCritical bools to a single Set; single callback dispatch line instead of duplicated if-chains - entry.tsx: factor `dumpNotice` formatter (used twice by onHigh + onCritical) - useMainApp resize debounce: drop redundant `if (timer)` guards (clearTimeout(undefined) is a no-op); init as undefined not null - useVirtualHistory: trim wall-of-text comment to one-line intent; hoist `const n = items.length`; split comma-declared lets; remove the `;[start, end] = frozenRange` destructure in favor of direct Math.min clamps; hoist `hi` init in upperBound for consistency Validation: tsc clean (both configs), eslint clean on touched files, vitest 102/102, build produces shebang-preserved dist/entry.js, performHeapDump smoke-test still writes valid snapshot + diagnostics. --- ui-tui/src/app/useMainApp.ts | 14 ++-- ui-tui/src/entry.tsx | 16 ++--- ui-tui/src/hooks/useVirtualHistory.ts | 49 ++++++-------- ui-tui/src/lib/circularBuffer.ts | 16 +---- ui-tui/src/lib/gracefulExit.ts | 40 ++++-------- ui-tui/src/lib/memory.ts | 92 ++++++++++----------------- ui-tui/src/lib/memoryMonitor.ts | 44 ++++--------- 7 files changed, 89 insertions(+), 182 deletions(-) diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts index 8a5b0b1fd..77c2681c6 100644 --- a/ui-tui/src/app/useMainApp.ts +++ b/ui-tui/src/app/useMainApp.ts @@ -306,15 +306,12 @@ export function useMainApp(gw: GatewayClient) { return } - let timer: null | ReturnType = null + let timer: ReturnType | undefined const onResize = () => { - if (timer) { - clearTimeout(timer) - } - + clearTimeout(timer) timer = setTimeout(() => { - timer = null + timer = undefined void rpc('terminal.resize', { cols: stdout.columns ?? 80, session_id: ui.sid }) }, 100) } @@ -322,10 +319,7 @@ export function useMainApp(gw: GatewayClient) { stdout.on('resize', onResize) return () => { - if (timer) { - clearTimeout(timer) - } - + clearTimeout(timer) stdout.off('resize', onResize) } }, [rpc, stdout, ui.sid]) diff --git a/ui-tui/src/entry.tsx b/ui-tui/src/entry.tsx index a9571e135..6f1506e5a 100644 --- a/ui-tui/src/entry.tsx +++ b/ui-tui/src/entry.tsx @@ -2,8 +2,8 @@ import { bootBanner } from './bootBanner.js' import { GatewayClient } from './gatewayClient.js' import { setupGracefulExit } from './lib/gracefulExit.js' -import { formatBytes, performHeapDump } from './lib/memory.js' -import { startMemoryMonitor } from './lib/memoryMonitor.js' +import { formatBytes, type HeapDumpResult, performHeapDump } from './lib/memory.js' +import { type MemorySnapshot, startMemoryMonitor } from './lib/memoryMonitor.js' if (!process.stdin.isTTY) { console.log('hermes-tui: no TTY') @@ -16,6 +16,9 @@ const gw = new GatewayClient() gw.start() +const dumpNotice = (snap: MemorySnapshot, dump: HeapDumpResult | null) => + `hermes-tui: ${snap.level} memory (${formatBytes(snap.heapUsed)}) — auto heap dump → ${dump?.heapPath ?? '(failed)'}\n` + setupGracefulExit({ cleanups: [() => gw.kill()], onError: (scope, err) => { @@ -28,16 +31,11 @@ setupGracefulExit({ const stopMemoryMonitor = startMemoryMonitor({ onCritical: (snap, dump) => { - process.stderr.write( - `hermes-tui: critical memory (${formatBytes(snap.heapUsed)}) — auto heap dump → ${dump?.heapPath ?? '(failed)'}\n` - ) + process.stderr.write(dumpNotice(snap, dump)) process.stderr.write('hermes-tui: exiting to avoid OOM; restart to recover\n') process.exit(137) }, - onHigh: (snap, dump) => - process.stderr.write( - `hermes-tui: high memory (${formatBytes(snap.heapUsed)}) — auto heap dump → ${dump?.heapPath ?? '(failed)'}\n` - ) + onHigh: (snap, dump) => process.stderr.write(dumpNotice(snap, dump)) }) if (process.env.HERMES_HEAPDUMP_ON_START === '1') { diff --git a/ui-tui/src/hooks/useVirtualHistory.ts b/ui-tui/src/hooks/useVirtualHistory.ts index 3d1d27c05..c21e25fda 100644 --- a/ui-tui/src/hooks/useVirtualHistory.ts +++ b/ui-tui/src/hooks/useVirtualHistory.ts @@ -18,11 +18,12 @@ const QUANTUM = OVERSCAN >> 1 const FREEZE_RENDERS = 2 const upperBound = (arr: number[], target: number) => { - let lo = 0, - hi = arr.length + let lo = 0 + let hi = arr.length while (lo < hi) { const mid = (lo + hi) >> 1 + arr[mid]! <= target ? (lo = mid + 1) : (hi = mid) } @@ -42,16 +43,11 @@ export function useVirtualHistory( const [hasScrollRef, setHasScrollRef] = useState(false) const metrics = useRef({ sticky: true, top: 0, vp: 0 }) - // Resize handling — scale cached heights by oldCols/newCols so post-resize - // offsets stay roughly aligned with (still-unknown) real Yoga heights. - // Clearing the cache instead would force a pessimistic back-walk that mounts - // ~190 rows at once (viewport+overscan at 1-row estimate), each a fresh - // marked.lexer + syntax highlight = ~3ms; ~600ms React commit block. Freeze - // the mount range for FREEZE_RENDERS so warm useMemo results survive while - // the layout effect writes post-resize real heights back into cache. - // skipMeasurement prevents that first post-resize useLayoutEffect from - // poisoning the cache with pre-resize Yoga values (Yoga's stored heights - // are from the frame BEFORE this render's calculateLayout with new width). + // Width change: scale cached heights (not clear — clearing forces a + // pessimistic back-walk mounting ~190 rows at once, each a fresh + // marked.lexer + syntax highlight ≈ 3ms). Freeze mount range for 2 + // renders so warm memos survive; skip one measurement so useLayoutEffect + // doesn't poison the scaled cache with pre-resize Yoga heights. const prevColumns = useRef(columns) const skipMeasurement = useRef(false) const prevRange = useRef(null) @@ -122,34 +118,32 @@ export function useVirtualHistory( return out }, [estimate, items, ver]) - const total = offsets[items.length] ?? 0 + const n = items.length + const total = offsets[n] ?? 0 const top = Math.max(0, scrollRef.current?.getScrollTop() ?? 0) const vp = Math.max(0, scrollRef.current?.getViewportHeight() ?? 0) const sticky = scrollRef.current?.isSticky() ?? true const frozenRange = freezeRenders.current > 0 ? prevRange.current : null - let start = 0, - end = items.length + let start = 0 + let end = n if (frozenRange) { - // Columns just changed. Reuse the pre-resize mount range so already-mounted - // MessageRows keep their warm memos (marked.lexer, syntax highlight). Clamp - // to n in case messages were removed (/clear, compaction) mid-freeze. - ;[start, end] = frozenRange - start = Math.min(start, items.length) - end = Math.min(end, items.length) - } else if (items.length > 0) { + // Clamp in case items shrank (/clear, compaction) mid-freeze. + start = Math.min(frozenRange[0], n) + end = Math.min(frozenRange[1], n) + } else if (n > 0) { if (vp <= 0) { - start = Math.max(0, items.length - coldStartCount) + start = Math.max(0, n - coldStartCount) } else { - start = Math.max(0, Math.min(items.length - 1, upperBound(offsets, Math.max(0, top - overscan)) - 1)) - end = Math.max(start + 1, Math.min(items.length, upperBound(offsets, top + vp + overscan))) + start = Math.max(0, Math.min(n - 1, upperBound(offsets, Math.max(0, top - overscan)) - 1)) + end = Math.max(start + 1, Math.min(n, upperBound(offsets, top + vp + overscan))) } } if (end - start > maxMounted) { - sticky ? (start = Math.max(0, end - maxMounted)) : (end = Math.min(items.length, start + maxMounted)) + sticky ? (start = Math.max(0, end - maxMounted)) : (end = Math.min(n, start + maxMounted)) } if (freezeRenders.current > 0) { @@ -173,9 +167,6 @@ export function useVirtualHistory( let dirty = false if (skipMeasurement.current) { - // First render after a column change — Yoga heights still reflect the - // pre-resize layout. Writing them into cache would overwrite the scaled - // estimates with stale pre-resize values. Next render's Yoga is correct. skipMeasurement.current = false } else { for (let i = start; i < end; i++) { diff --git a/ui-tui/src/lib/circularBuffer.ts b/ui-tui/src/lib/circularBuffer.ts index 09023fae5..675a508f2 100644 --- a/ui-tui/src/lib/circularBuffer.ts +++ b/ui-tui/src/lib/circularBuffer.ts @@ -16,12 +16,6 @@ export class CircularBuffer { } } - pushAll(items: readonly T[]) { - for (const item of items) { - this.push(item) - } - } - tail(n = this.len): T[] { const take = Math.min(Math.max(0, n), this.len) const start = this.len < this.capacity ? 0 : this.head @@ -34,12 +28,8 @@ export class CircularBuffer { return out } - toArray(): T[] { - return this.tail(this.len) - } - drain(): T[] { - const out = this.toArray() + const out = this.tail() this.clear() @@ -51,8 +41,4 @@ export class CircularBuffer { this.head = 0 this.len = 0 } - - get size() { - return this.len - } } diff --git a/ui-tui/src/lib/gracefulExit.ts b/ui-tui/src/lib/gracefulExit.ts index ae6a23a5e..2896fd126 100644 --- a/ui-tui/src/lib/gracefulExit.ts +++ b/ui-tui/src/lib/gracefulExit.ts @@ -1,22 +1,19 @@ -type Cleanup = () => Promise | void - interface SetupOptions { - cleanups?: Cleanup[] + cleanups?: (() => Promise | void)[] failsafeMs?: number onError?: (scope: 'uncaughtException' | 'unhandledRejection', err: unknown) => void onSignal?: (signal: NodeJS.Signals) => void } -const DEFAULT_FAILSAFE_MS = 4000 +const SIGNAL_EXIT_CODE: Record<'SIGHUP' | 'SIGINT' | 'SIGTERM', number> = { + SIGHUP: 129, + SIGINT: 130, + SIGTERM: 143 +} let wired = false -export function setupGracefulExit({ - cleanups = [], - failsafeMs = DEFAULT_FAILSAFE_MS, - onError, - onSignal -}: SetupOptions = {}) { +export function setupGracefulExit({ cleanups = [], failsafeMs = 4000, onError, onSignal }: SetupOptions = {}) { if (wired) { return } @@ -36,28 +33,15 @@ export function setupGracefulExit({ onSignal?.(signal) } - const failsafe = setTimeout(() => process.exit(code), failsafeMs) + setTimeout(() => process.exit(code), failsafeMs).unref?.() - failsafe.unref?.() - - void Promise.allSettled(cleanups.map(fn => Promise.resolve().then(fn))) - .catch(() => {}) - .finally(() => process.exit(code)) + void Promise.allSettled(cleanups.map(fn => Promise.resolve().then(fn))).finally(() => process.exit(code)) } for (const sig of ['SIGINT', 'SIGTERM', 'SIGHUP'] as const) { - process.on(sig, () => exit(sig === 'SIGINT' ? 130 : sig === 'SIGTERM' ? 143 : 129, sig)) + process.on(sig, () => exit(SIGNAL_EXIT_CODE[sig], sig)) } - process.on('uncaughtException', err => { - onError?.('uncaughtException', err) - }) - - process.on('unhandledRejection', reason => { - onError?.('unhandledRejection', reason) - }) -} - -export function forceExit(code = 0) { - process.exit(code) + process.on('uncaughtException', err => onError?.('uncaughtException', err)) + process.on('unhandledRejection', reason => onError?.('unhandledRejection', reason)) } diff --git a/ui-tui/src/lib/memory.ts b/ui-tui/src/lib/memory.ts index 0afbab772..efeff6eb8 100644 --- a/ui-tui/src/lib/memory.ts +++ b/ui-tui/src/lib/memory.ts @@ -5,7 +5,7 @@ import { join } from 'node:path' import { pipeline } from 'node:stream/promises' import { getHeapSnapshot, getHeapSpaceStatistics, getHeapStatistics } from 'node:v8' -export type MemoryTrigger = 'auto-high' | 'auto-critical' | 'manual' +export type MemoryTrigger = 'auto-critical' | 'auto-high' | 'manual' export interface MemoryDiagnostics { activeHandles: number @@ -54,74 +54,43 @@ export interface HeapDumpResult { success: boolean } -const heapDumpRoot = () => - process.env.HERMES_HEAPDUMP_DIR?.trim() || join(homedir() || tmpdir(), '.hermes', 'heapdumps') - -const processInternals = process as unknown as { - _getActiveHandles: () => unknown[] - _getActiveRequests: () => unknown[] -} - export async function captureMemoryDiagnostics(trigger: MemoryTrigger): Promise { const usage = process.memoryUsage() const heapStats = getHeapStatistics() const resourceUsage = process.resourceUsage() const uptimeSeconds = process.uptime() + // Not available on Bun / older Node. let heapSpaces: ReturnType | undefined try { heapSpaces = getHeapSpaceStatistics() } catch { - /* Bun / older Node — ignore */ + /* noop */ } - const activeHandles = processInternals._getActiveHandles().length - const activeRequests = processInternals._getActiveRequests().length - - let openFileDescriptors: number | undefined - - try { - openFileDescriptors = (await readdir('/proc/self/fd')).length - } catch { - /* non-Linux */ + const internals = process as unknown as { + _getActiveHandles: () => unknown[] + _getActiveRequests: () => unknown[] } - let smapsRollup: string | undefined - - try { - smapsRollup = await readFile('/proc/self/smaps_rollup', 'utf8') - } catch { - /* non-Linux / no access */ - } + const activeHandles = internals._getActiveHandles().length + const activeRequests = internals._getActiveRequests().length + const openFileDescriptors = await swallow(async () => (await readdir('/proc/self/fd')).length) + const smapsRollup = await swallow(() => readFile('/proc/self/smaps_rollup', 'utf8')) const nativeMemory = usage.rss - usage.heapUsed const bytesPerSecond = uptimeSeconds > 0 ? usage.rss / uptimeSeconds : 0 const mbPerHour = (bytesPerSecond * 3600) / (1024 * 1024) - const potentialLeaks: string[] = [] - - if (heapStats.number_of_detached_contexts > 0) { - potentialLeaks.push( - `${heapStats.number_of_detached_contexts} detached context(s) — possible component/closure leak` - ) - } - - if (activeHandles > 100) { - potentialLeaks.push(`${activeHandles} active handles — possible timer/socket leak`) - } - - if (nativeMemory > usage.heapUsed) { - potentialLeaks.push('Native memory > heap — leak may be in native addons') - } - - if (mbPerHour > 100) { - potentialLeaks.push(`High memory growth rate: ${mbPerHour.toFixed(1)} MB/hour`) - } - - if (openFileDescriptors && openFileDescriptors > 500) { - potentialLeaks.push(`${openFileDescriptors} open FDs — possible file/socket leak`) - } + const potentialLeaks = [ + heapStats.number_of_detached_contexts > 0 && + `${heapStats.number_of_detached_contexts} detached context(s) — possible component/closure leak`, + activeHandles > 100 && `${activeHandles} active handles — possible timer/socket leak`, + nativeMemory > usage.heapUsed && 'Native memory > heap — leak may be in native addons', + mbPerHour > 100 && `High memory growth rate: ${mbPerHour.toFixed(1)} MB/hour`, + openFileDescriptors && openFileDescriptors > 500 && `${openFileDescriptors} open FDs — possible file/socket leak` + ].filter((s): s is string => typeof s === 'string') return { activeHandles, @@ -170,18 +139,19 @@ export async function captureMemoryDiagnostics(trigger: MemoryTrigger): Promise< export async function performHeapDump(trigger: MemoryTrigger = 'manual'): Promise { try { + // Diagnostics first — heap-snapshot serialization can crash on very large + // heaps, and the JSON sidecar is the most actionable artifact if so. const diagnostics = await captureMemoryDiagnostics(trigger) - const dir = heapDumpRoot() + const dir = process.env.HERMES_HEAPDUMP_DIR?.trim() || join(homedir() || tmpdir(), '.hermes', 'heapdumps') await mkdir(dir, { recursive: true }) - const stamp = new Date().toISOString().replace(/[:.]/g, '-') - const base = `hermes-${stamp}-${process.pid}-${trigger}` + const base = `hermes-${new Date().toISOString().replace(/[:.]/g, '-')}-${process.pid}-${trigger}` const heapPath = join(dir, `${base}.heapsnapshot`) const diagPath = join(dir, `${base}.diagnostics.json`) await writeFile(diagPath, JSON.stringify(diagnostics, null, 2), { mode: 0o600 }) - await writeSnapshot(heapPath) + await pipeline(getHeapSnapshot(), createWriteStream(heapPath, { mode: 0o600 })) return { diagPath, heapPath, success: true } } catch (e) { @@ -194,15 +164,19 @@ export function formatBytes(bytes: number): string { return '0B' } - const units = ['B', 'KB', 'MB', 'GB', 'TB'] - const exp = Math.min(units.length - 1, Math.floor(Math.log10(bytes) / 3)) + const exp = Math.min(UNITS.length - 1, Math.floor(Math.log10(bytes) / 3)) const value = bytes / 1024 ** exp - return `${value >= 100 ? value.toFixed(0) : value.toFixed(1)}${units[exp]}` + return `${value >= 100 ? value.toFixed(0) : value.toFixed(1)}${UNITS[exp]}` } -async function writeSnapshot(filepath: string) { - const stream = createWriteStream(filepath, { mode: 0o600 }) +const UNITS = ['B', 'KB', 'MB', 'GB', 'TB'] - await pipeline(getHeapSnapshot(), stream) +// Returns undefined when the probe isn't available (non-Linux paths, sandboxed FS). +const swallow = async (fn: () => Promise): Promise => { + try { + return await fn() + } catch { + return undefined + } } diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts index 58d7d3878..6655819b5 100644 --- a/ui-tui/src/lib/memoryMonitor.ts +++ b/ui-tui/src/lib/memoryMonitor.ts @@ -14,57 +14,37 @@ export interface MemoryMonitorOptions { intervalMs?: number onCritical?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void onHigh?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void - onSnapshot?: (snap: MemorySnapshot) => void } const GB = 1024 ** 3 -const DEFAULTS = { - criticalBytes: 2.5 * GB, - highBytes: 1.5 * GB, - intervalMs: 10_000 -} - export function startMemoryMonitor({ - criticalBytes = DEFAULTS.criticalBytes, - highBytes = DEFAULTS.highBytes, - intervalMs = DEFAULTS.intervalMs, + criticalBytes = 2.5 * GB, + highBytes = 1.5 * GB, + intervalMs = 10_000, onCritical, - onHigh, - onSnapshot + onHigh }: MemoryMonitorOptions = {}): () => void { - let dumpedHigh = false - let dumpedCritical = false + const dumped = new Set>() const tick = async () => { const { heapUsed, rss } = process.memoryUsage() const level: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal' - const snap: MemorySnapshot = { heapUsed, level, rss } - - onSnapshot?.(snap) if (level === 'normal') { - dumpedHigh = false - dumpedCritical = false + return void dumped.clear() + } + if (dumped.has(level)) { return } - if (level === 'high' && !dumpedHigh) { - dumpedHigh = true - const dump = await performHeapDump('auto-high').catch(() => null) + dumped.add(level) + const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null) - onHigh?.(snap, dump) + const snap: MemorySnapshot = { heapUsed, level, rss } - return - } - - if (level === 'critical' && !dumpedCritical) { - dumpedCritical = true - const dump = await performHeapDump('auto-critical').catch(() => null) - - onCritical?.(snap, dump) - } + ;(level === 'critical' ? onCritical : onHigh)?.(snap, dump) } const handle = setInterval(() => void tick(), intervalMs) From e1ce7c6b1fe29f687ad4c3a34eea2234e8f09c69 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Mon, 20 Apr 2026 19:09:09 -0500 Subject: [PATCH 4/4] fix(tui): address PR #13231 review comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Six small fixes, all valid review feedback: - gatewayClient: onTimeout is now a class-field arrow so setTimeout gets a stable reference — no per-request bind allocation (the whole point of the original refactor). - memory: growth rate was lifetime average of rss/uptime, which reports phantom growth for stable processes. Now computed as delta since a module-load baseline (STARTED_AT). Sanity-checked: 0.00 MB/hr at steady-state, non-zero after an allocation. - hermes_cli: NODE_OPTIONS merge is now token-aware — respects a user-supplied --max-old-space-size (don't downgrade a deliberate 16GB setting) and avoids duplicating --expose-gc. - useVirtualHistory: if items shrink past the frozen range's start mid-freeze (/clear, compaction), drop the freeze and fall through to the normal range calc instead of collapsing to an empty mount. - circularBuffer: throw on non-positive capacity instead of silently producing NaN indices. - debug slash help: /heapdump mentions HERMES_HEAPDUMP_DIR override instead of hardcoding the default path. Validation: tsc clean, eslint clean, vitest 102/102, growth-rate smoke test confirms baseline=0 → post-alloc>0. --- hermes_cli/main.py | 18 +++++++++--------- ui-tui/src/app/slash/commands/debug.ts | 2 +- ui-tui/src/gatewayClient.ts | 6 ++++-- ui-tui/src/hooks/useVirtualHistory.ts | 9 ++++++--- ui-tui/src/lib/circularBuffer.ts | 4 ++++ ui-tui/src/lib/memory.ts | 7 ++++++- 6 files changed, 30 insertions(+), 16 deletions(-) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index fc29b848a..489a1652d 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1005,15 +1005,15 @@ def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False): env.setdefault("HERMES_CWD", os.getcwd()) # Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is # ~1.5–4GB depending on version and can fatal-OOM on long sessions with - # large transcripts / reasoning blobs. Append (don't clobber) any user - # NODE_OPTIONS. - _existing_node_opts = env.get("NODE_OPTIONS", "").strip() - _hermes_tui_node_opts = "--max-old-space-size=8192 --expose-gc" - env["NODE_OPTIONS"] = ( - f"{_existing_node_opts} {_hermes_tui_node_opts}".strip() - if _hermes_tui_node_opts not in _existing_node_opts - else _existing_node_opts - ) + # large transcripts / reasoning blobs. Token-level merge: respect any + # user-supplied --max-old-space-size (they may have set it higher) and + # avoid duplicating --expose-gc. + _tokens = env.get("NODE_OPTIONS", "").split() + if not any(t.startswith("--max-old-space-size=") for t in _tokens): + _tokens.append("--max-old-space-size=8192") + if "--expose-gc" not in _tokens: + _tokens.append("--expose-gc") + env["NODE_OPTIONS"] = " ".join(_tokens) if resume_session_id: env["HERMES_TUI_RESUME"] = resume_session_id diff --git a/ui-tui/src/app/slash/commands/debug.ts b/ui-tui/src/app/slash/commands/debug.ts index d44c76f34..b4bfc16bf 100644 --- a/ui-tui/src/app/slash/commands/debug.ts +++ b/ui-tui/src/app/slash/commands/debug.ts @@ -3,7 +3,7 @@ import type { SlashCommand } from '../types.js' export const debugCommands: SlashCommand[] = [ { - help: 'write a V8 heap snapshot + memory diagnostics to ~/.hermes/heapdumps', + help: 'write a V8 heap snapshot + memory diagnostics (see HERMES_HEAPDUMP_DIR)', name: 'heapdump', run: (_arg, ctx) => { const { heapUsed, rss } = process.memoryUsage() diff --git a/ui-tui/src/gatewayClient.ts b/ui-tui/src/gatewayClient.ts index bf5210faa..9bf681f8b 100644 --- a/ui-tui/src/gatewayClient.ts +++ b/ui-tui/src/gatewayClient.ts @@ -218,7 +218,9 @@ export class GatewayClient extends EventEmitter { this.pending.clear() } - private onTimeout(id: string) { + // Arrow class-field — stable identity, so `setTimeout(this.onTimeout, …, id)` + // doesn't allocate a bound function per request. + private onTimeout = (id: string) => { const p = this.pending.get(id) if (p) { @@ -258,7 +260,7 @@ export class GatewayClient extends EventEmitter { const id = `r${++this.reqId}` return new Promise((resolve, reject) => { - const timeout = setTimeout(this.onTimeout.bind(this), REQUEST_TIMEOUT_MS, id) + const timeout = setTimeout(this.onTimeout, REQUEST_TIMEOUT_MS, id) timeout.unref?.() diff --git a/ui-tui/src/hooks/useVirtualHistory.ts b/ui-tui/src/hooks/useVirtualHistory.ts index c21e25fda..17bc8dfd3 100644 --- a/ui-tui/src/hooks/useVirtualHistory.ts +++ b/ui-tui/src/hooks/useVirtualHistory.ts @@ -124,14 +124,17 @@ export function useVirtualHistory( const vp = Math.max(0, scrollRef.current?.getViewportHeight() ?? 0) const sticky = scrollRef.current?.isSticky() ?? true - const frozenRange = freezeRenders.current > 0 ? prevRange.current : null + // During a freeze, drop the frozen range if items shrank past its start + // (/clear, compaction) — clamping would collapse to an empty mount and + // flash blank. Fall through to the normal path in that case. + const frozenRange = + freezeRenders.current > 0 && prevRange.current && prevRange.current[0] < n ? prevRange.current : null let start = 0 let end = n if (frozenRange) { - // Clamp in case items shrank (/clear, compaction) mid-freeze. - start = Math.min(frozenRange[0], n) + start = frozenRange[0] end = Math.min(frozenRange[1], n) } else if (n > 0) { if (vp <= 0) { diff --git a/ui-tui/src/lib/circularBuffer.ts b/ui-tui/src/lib/circularBuffer.ts index 675a508f2..31502fc22 100644 --- a/ui-tui/src/lib/circularBuffer.ts +++ b/ui-tui/src/lib/circularBuffer.ts @@ -4,6 +4,10 @@ export class CircularBuffer { private len = 0 constructor(private capacity: number) { + if (!Number.isInteger(capacity) || capacity <= 0) { + throw new RangeError(`CircularBuffer capacity must be a positive integer, got ${capacity}`) + } + this.buf = new Array(capacity) } diff --git a/ui-tui/src/lib/memory.ts b/ui-tui/src/lib/memory.ts index efeff6eb8..9f157adff 100644 --- a/ui-tui/src/lib/memory.ts +++ b/ui-tui/src/lib/memory.ts @@ -80,7 +80,10 @@ export async function captureMemoryDiagnostics(trigger: MemoryTrigger): Promise< const smapsRollup = await swallow(() => readFile('/proc/self/smaps_rollup', 'utf8')) const nativeMemory = usage.rss - usage.heapUsed - const bytesPerSecond = uptimeSeconds > 0 ? usage.rss / uptimeSeconds : 0 + // Real growth rate since STARTED_AT (captured at module load) — NOT a lifetime + // average of rss/uptime, which would report phantom "growth" for a stable process. + const elapsed = Math.max(0, uptimeSeconds - STARTED_AT.uptime) + const bytesPerSecond = elapsed > 0 ? (usage.rss - STARTED_AT.rss) / elapsed : 0 const mbPerHour = (bytesPerSecond * 3600) / (1024 * 1024) const potentialLeaks = [ @@ -172,6 +175,8 @@ export function formatBytes(bytes: number): string { const UNITS = ['B', 'KB', 'MB', 'GB', 'TB'] +const STARTED_AT = { rss: process.memoryUsage().rss, uptime: process.uptime() } + // Returns undefined when the probe isn't available (non-Linux paths, sandboxed FS). const swallow = async (fn: () => Promise): Promise => { try {