From 2b6cf2c14b79607e081ac38eb8dcad68cc950118 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Guti=C3=A9rrez?= <35082514+alezmad@users.noreply.github.com> Date: Mon, 4 May 2026 11:17:32 +0100 Subject: [PATCH] feat(cli): self-healing daemon lifecycle every daemon-routed verb now probes the ipc socket via /v1/version (instead of trusting existsSync), cleans up stale sock/pid files left by a crashed daemon, and auto-spawns a detached `claudemesh daemon up` under a file-lock when the daemon is down. polls for liveness up to a budget (3s for ad-hoc verbs, 10s for launch) before falling through to cold path. includes a per-process result cache (script doing 50 sends pays spawn cost at most once), a 30s recently-failed marker (no thundering-herd retries on crash-loop), a spawn-lock (concurrent invocations share one attempt), and a recursion guard env var (nested cli calls inside the daemon process skip auto-spawn). fixes the stale-socket bug where launch's ensureDaemonRunning returned early on a left-over socket file from a crashed daemon, silently breaking the spawned claude session's mcp shim. deferred to 1.28.0: --strict / --no-daemon flags, lazy-loading of cold-path code, per-session ipc tokens. Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/cli/CHANGELOG.md | 49 ++++ apps/cli/package.json | 2 +- apps/cli/src/commands/launch.ts | 52 ++-- apps/cli/src/services/bridge/daemon-route.ts | 45 +++- apps/cli/src/services/daemon/lifecycle.ts | 243 +++++++++++++++++++ apps/cli/src/ui/warnings.ts | 54 +++++ 6 files changed, 396 insertions(+), 49 deletions(-) create mode 100644 apps/cli/src/services/daemon/lifecycle.ts create mode 100644 apps/cli/src/ui/warnings.ts diff --git a/apps/cli/CHANGELOG.md b/apps/cli/CHANGELOG.md index ff12b74..27cc394 100644 --- a/apps/cli/CHANGELOG.md +++ b/apps/cli/CHANGELOG.md @@ -1,5 +1,54 @@ # Changelog +## 1.27.3 (2026-05-04) — self-healing daemon lifecycle + +The CLI now auto-recovers from a dead daemon on every invocation +instead of silently mis-routing through a stale socket. + +### What changed + +- New `services/daemon/lifecycle.ts` — single helper that probes the + IPC socket via `/v1/version` (instead of trusting `existsSync`), + cleans up stale `daemon.sock` / `daemon.pid` files, and auto-spawns + a detached `claudemesh daemon up` under a file-lock when the daemon + is missing. +- Polls for socket liveness up to a budget (3 s for ad-hoc verbs, + 10 s for `claudemesh launch`) before falling through. +- Recently-failed marker (`~/.claudemesh/daemon/.spawn-failure`, + 30 s TTL) prevents thundering-herd retries when the daemon + crash-loops at startup. +- Spawn-lock (`~/.claudemesh/daemon/.spawn.lock`) ensures concurrent + CLI invocations share one spawn attempt instead of racing. +- Per-process result cache — a script doing 50 sends pays the spawn + cost at most once, not 50 times. +- Recursion guard via `CLAUDEMESH_INTERNAL_NO_AUTOSPAWN=1` env (set + on the spawned daemon's env) so nested CLI calls inside the daemon + process don't re-trigger spawn. + +### User-visible behavior + +- `peer list`, `send`, `state get`, etc. now restart the daemon + automatically when invoked while the daemon is down. +- One-line stderr info on auto-restart: + `[claudemesh] info daemon restarted automatically (took 615ms)`. +- Cold-path fallback fires only when auto-spawn fails or is + suppressed by the recently-failed marker; in those cases a `warn` + line points at the daemon log. + +### Bug fixed + +`claudemesh launch`'s `ensureDaemonRunning` previously checked only +`existsSync(SOCK_FILE)` and returned early on a stale socket left by +a crashed daemon — silently breaking new sessions. Now delegates to +the lifecycle helper which probes the socket and recovers. + +### What's not in this patch + +- `--strict` and `--no-daemon` flags (deferred to D in 1.28.0). +- Lazy-loading of cold-path code (deferred to 1.28.0). +- Per-session IPC tokens (deferred to 1.28.0 alongside D's + thin-client conversion). + ## 1.27.2 (2026-05-04) — skill: full-flag launch templates Documentation-only ship. `skills/claudemesh/SKILL.md` gains a canonical diff --git a/apps/cli/package.json b/apps/cli/package.json index f900e62..7efe254 100644 --- a/apps/cli/package.json +++ b/apps/cli/package.json @@ -1,6 +1,6 @@ { "name": "claudemesh-cli", - "version": "1.27.2", + "version": "1.27.3", "description": "Peer mesh for Claude Code sessions — CLI + MCP server.", "keywords": [ "claude-code", diff --git a/apps/cli/src/commands/launch.ts b/apps/cli/src/commands/launch.ts index 97c19fa..c36e044 100644 --- a/apps/cli/src/commands/launch.ts +++ b/apps/cli/src/commands/launch.ts @@ -49,46 +49,28 @@ export interface LaunchFlags { * * As of 1.24.0 the daemon owns the broker WS and feeds the MCP push-pipe * over IPC SSE. If the socket is absent when Claude boots its MCP shim, - * the shim bails (no fallback). So we probe for the socket here and, if - * missing, spawn `claudemesh daemon up --mesh ` in the background, - * waiting briefly for the socket to appear. - * - * Best-effort: if the daemon spawn fails, we surface the error and let - * the launch proceed — Claude Code will print the same "daemon not - * running" message and the user can fix it manually. + * the shim bails (no fallback). Delegates to the shared lifecycle helper + * (services/daemon/lifecycle.ts) which probes the socket properly + * (avoiding the stale-socket bug where existsSync was a false positive + * after a daemon crash), spawns under a file-lock, and polls for liveness. */ async function ensureDaemonRunning(meshSlug: string, quiet: boolean): Promise { - const { DAEMON_PATHS } = await import("~/daemon/paths.js"); - if (existsSync(DAEMON_PATHS.SOCK_FILE)) return; - - if (!quiet) render.info("starting claudemesh daemon…"); - const { spawn } = await import("node:child_process"); - const argv0 = process.argv[1] ?? "claudemesh"; - let binary = argv0; - if (/\.ts$/.test(binary) || /node_modules|src\/entrypoints/.test(binary)) { - try { - const { execSync } = await import("node:child_process"); - binary = execSync("which claudemesh", { encoding: "utf8" }).trim(); - } catch { binary = "claudemesh"; } + const { ensureDaemonReady } = await import("~/services/daemon/lifecycle.js"); + if (!quiet) render.info("ensuring claudemesh daemon is running…"); + // Larger budget for `launch` — it's a one-shot flow where the user + // is actively waiting; cold node start + broker hello can take + // longer than the default 3s budget for ad-hoc verbs. + const res = await ensureDaemonReady({ budgetMs: 10_000, mesh: meshSlug }); + if (res.state === "up") { + if (!quiet) render.ok("daemon already running"); + return; } - const child = spawn(binary, ["daemon", "up", "--mesh", meshSlug], { - detached: true, - stdio: "ignore", - }); - child.unref(); - - // Wait for the socket to appear. 10 s budget — covers cold node start + - // broker hello round-trip on slow links. - const start = Date.now(); - while (Date.now() - start < 10_000) { - if (existsSync(DAEMON_PATHS.SOCK_FILE)) { - if (!quiet) render.ok("daemon ready"); - return; - } - await new Promise((r) => setTimeout(r, 200)); + if (res.state === "started") { + if (!quiet) render.ok(`daemon ready (${res.durationMs}ms)`); + return; } render.warn( - "daemon failed to start within 10s", + `daemon ${res.state}${res.reason ? `: ${res.reason}` : ""}`, "Run `claudemesh daemon up --mesh " + meshSlug + "` manually, then re-launch.", ); } diff --git a/apps/cli/src/services/bridge/daemon-route.ts b/apps/cli/src/services/bridge/daemon-route.ts index 10a9889..671c273 100644 --- a/apps/cli/src/services/bridge/daemon-route.ts +++ b/apps/cli/src/services/bridge/daemon-route.ts @@ -1,21 +1,40 @@ // Try forwarding a send through the local daemon's IPC. Returns null if // the daemon isn't running or the daemon's mesh doesn't match the target // mesh — the caller falls back to the bridge or cold path. - -import { existsSync } from "node:fs"; +// +// Auto-recovery: when the daemon socket is missing or stale, every +// helper here calls into the lifecycle module which probes, spawns +// (under a lock), polls, and retries — so cold-path fallback only +// fires if auto-spawn failed. The lifecycle module caches its +// per-process result, so a script doing 50 sends pays the spawn cost +// at most once. import { ipc } from "~/daemon/ipc/client.js"; -import { DAEMON_PATHS } from "~/daemon/paths.js"; +import { ensureDaemonReady } from "~/services/daemon/lifecycle.js"; +import { warnDaemonState } from "~/ui/warnings.ts"; function meshQuery(mesh?: string): string { return mesh ? `?mesh=${encodeURIComponent(mesh)}` : ""; } +/** Common entry: ensure the daemon is reachable, emitting a one-shot + * stderr warning describing what we did. Returns true when the daemon + * is now reachable, false when the caller should fall back. */ +async function daemonReachable(): Promise { + const res = await ensureDaemonReady(); + // Suppress the warning under JSON / quiet at the call site — + // helpers here can't see those flags. JSON callers should switch + // to lifecycle directly. For now we always print; --quiet at the + // top of each verb already redirects stderr where needed. + warnDaemonState(res, {}); + return res.state === "up" || res.state === "started"; +} + /** Try fetching the peer list through the daemon (~1ms warm IPC). * Returns null when the daemon socket isn't present so the caller can * fall back to bridge / cold paths. */ export async function tryListPeersViaDaemon(mesh?: string): Promise { - if (!existsSync(DAEMON_PATHS.SOCK_FILE)) return null; + if (!(await daemonReachable())) return null; try { const res = await ipc<{ peers?: unknown[] }>({ path: `/v1/peers${meshQuery(mesh)}`, timeoutMs: 3_000 }); if (res.status !== 200) return null; @@ -29,7 +48,7 @@ export async function tryListPeersViaDaemon(mesh?: string): Promise { - if (!existsSync(DAEMON_PATHS.SOCK_FILE)) return null; + if (!(await daemonReachable())) return null; try { const res = await ipc<{ skills?: unknown[] }>({ path: `/v1/skills${meshQuery(mesh)}`, timeoutMs: 3_000 }); if (res.status !== 200) return null; @@ -43,7 +62,7 @@ export async function tryListSkillsViaDaemon(mesh?: string): Promise { - if (!existsSync(DAEMON_PATHS.SOCK_FILE)) return null; + if (!(await daemonReachable())) return null; try { const res = await ipc<{ skill?: unknown }>({ path: `/v1/skills/${encodeURIComponent(name)}${meshQuery(mesh)}`, @@ -70,7 +89,7 @@ export type StateEntry = { * - undefined when the daemon ran but the key is unset (404) * - null when the daemon socket isn't present (caller falls back) */ export async function tryGetStateViaDaemon(key: string, mesh?: string): Promise { - if (!existsSync(DAEMON_PATHS.SOCK_FILE)) return null; + if (!(await daemonReachable())) return null; try { const path = `/v1/state?key=${encodeURIComponent(key)}${mesh ? `&mesh=${encodeURIComponent(mesh)}` : ""}`; const res = await ipc<{ state?: StateEntry; error?: string }>({ path, timeoutMs: 3_000 }); @@ -85,7 +104,7 @@ export async function tryGetStateViaDaemon(key: string, mesh?: string): Promise< } export async function tryListStateViaDaemon(mesh?: string): Promise { - if (!existsSync(DAEMON_PATHS.SOCK_FILE)) return null; + if (!(await daemonReachable())) return null; try { const res = await ipc<{ entries?: StateEntry[] }>({ path: `/v1/state${meshQuery(mesh)}`, timeoutMs: 3_000 }); if (res.status !== 200) return null; @@ -98,7 +117,7 @@ export async function tryListStateViaDaemon(mesh?: string): Promise { - if (!existsSync(DAEMON_PATHS.SOCK_FILE)) return false; + if (!(await daemonReachable())) return false; try { const res = await ipc<{ ok?: boolean; error?: string }>({ method: "POST", @@ -122,7 +141,7 @@ export type MemoryEntry = { }; export async function tryRememberViaDaemon(content: string, tags?: string[], mesh?: string): Promise<{ id: string; mesh?: string } | null> { - if (!existsSync(DAEMON_PATHS.SOCK_FILE)) return null; + if (!(await daemonReachable())) return null; try { const res = await ipc<{ id?: string; mesh?: string; error?: string }>({ method: "POST", @@ -136,7 +155,7 @@ export async function tryRememberViaDaemon(content: string, tags?: string[], mes } export async function tryRecallViaDaemon(query: string, mesh?: string): Promise { - if (!existsSync(DAEMON_PATHS.SOCK_FILE)) return null; + if (!(await daemonReachable())) return null; try { const path = `/v1/memory?q=${encodeURIComponent(query)}${mesh ? `&mesh=${encodeURIComponent(mesh)}` : ""}`; const res = await ipc<{ matches?: MemoryEntry[] }>({ path, timeoutMs: 5_000 }); @@ -150,7 +169,7 @@ export async function tryRecallViaDaemon(query: string, mesh?: string): Promise< } export async function tryForgetViaDaemon(id: string, mesh?: string): Promise { - if (!existsSync(DAEMON_PATHS.SOCK_FILE)) return false; + if (!(await daemonReachable())) return false; try { const path = `/v1/memory/${encodeURIComponent(id)}${meshQuery(mesh)}`; const res = await ipc<{ ok?: boolean }>({ method: "DELETE", path, timeoutMs: 3_000 }); @@ -179,7 +198,7 @@ export async function trySendViaDaemon(args: { * right mesh by either flag or single-mesh-default. */ expectedMesh?: string; }): Promise { - if (!existsSync(DAEMON_PATHS.SOCK_FILE)) return null; + if (!(await daemonReachable())) return null; try { const res = await ipc<{ diff --git a/apps/cli/src/services/daemon/lifecycle.ts b/apps/cli/src/services/daemon/lifecycle.ts new file mode 100644 index 0000000..4503cca --- /dev/null +++ b/apps/cli/src/services/daemon/lifecycle.ts @@ -0,0 +1,243 @@ +/** + * Daemon lifecycle helper — probe, auto-spawn, retry, fall-through. + * + * Every daemon-routed CLI verb passes through `ensureDaemonReady()` before + * its IPC call. The helper: + * + * 1. Probes the socket via a fast `/v1/version` IPC (~5-10 ms). + * 2. If the socket is missing OR present-but-stale, attempts a detached + * `claudemesh daemon up` spawn under a file-lock. + * 3. Polls for the new socket up to a budget (default 3s). + * 4. Returns a state describing what happened, so the caller can either + * proceed warm or fall back to the cold path with a clear warning. + * + * State machine: + * - "up" daemon was already running + * - "started" daemon was down; we spawned it; it came up + * - "down" daemon was down; auto-spawn skipped (e.g., recursion guard) + * - "spawn-failed" spawn attempted but socket never appeared within budget + * - "spawn-suppressed" recently-failed marker is fresh; skipped retry + * + * Stale-socket handling: if the socket file exists but the IPC probe + * fails (ECONNREFUSED / timeout), we treat the file as stale, remove + * it, and proceed as if the daemon were down. This fixes the prior bug + * where `existsSync(SOCK_FILE)` was a false positive after a daemon + * crash. + * + * Recursion guard: when we spawn the daemon we set + * `CLAUDEMESH_INTERNAL_NO_AUTOSPAWN=1` in its env so any nested CLI + * calls inside the daemon skip the auto-spawn check and avoid a loop. + */ + +import { existsSync, readFileSync, statSync, unlinkSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; + +import { ipc, IpcError } from "~/daemon/ipc/client.js"; +import { DAEMON_PATHS } from "~/daemon/paths.js"; + +export type DaemonReadyState = + | "up" + | "started" + | "down" + | "spawn-failed" + | "spawn-suppressed"; + +export interface EnsureDaemonResult { + state: DaemonReadyState; + /** Total ms spent in this call (probe ± spawn ± poll). */ + durationMs: number; + /** When state is `spawn-failed` or `spawn-suppressed`, a one-line reason. */ + reason?: string; +} + +export interface EnsureDaemonOpts { + /** Total budget for socket-appearance polling after spawn. Default 3000ms. */ + budgetMs?: number; + /** Skip auto-spawn entirely. Used by `--no-daemon` and the recursion guard. */ + noAutoSpawn?: boolean; + /** When auto-spawning a legacy single-mesh daemon, pin a slug. Omit for multi-mesh (default). */ + mesh?: string; +} + +const SPAWN_LOCK_FILE = () => join(DAEMON_PATHS.DAEMON_DIR, ".spawn.lock"); +const SPAWN_FAIL_FILE = () => join(DAEMON_PATHS.DAEMON_DIR, ".spawn-failure"); +const SPAWN_FAIL_TTL_MS = 30_000; +const PROBE_TIMEOUT_MS = 800; + +let lastResultThisProcess: EnsureDaemonResult | null = null; + +/** Probe daemon and return what we know. Cached per-process so a script + * with 50 sends doesn't re-spawn 50 times. */ +export async function ensureDaemonReady(opts: EnsureDaemonOpts = {}): Promise { + if (lastResultThisProcess && (lastResultThisProcess.state === "up" || lastResultThisProcess.state === "started")) { + return lastResultThisProcess; + } + if (process.env.CLAUDEMESH_INTERNAL_NO_AUTOSPAWN === "1") { + opts = { ...opts, noAutoSpawn: true }; + } + const result = await runEnsureDaemon(opts); + lastResultThisProcess = result; + return result; +} + +/** Reset the per-process cache. Test helper. */ +export function _resetDaemonReadyCache(): void { + lastResultThisProcess = null; +} + +async function runEnsureDaemon(opts: EnsureDaemonOpts): Promise { + const t0 = Date.now(); + + // Step 1 — probe. + const probe = await probeDaemon(); + if (probe === "up") return { state: "up", durationMs: Date.now() - t0 }; + if (probe === "stale") cleanupStaleFiles(); + + // Step 2 — auto-spawn unless forbidden. + if (opts.noAutoSpawn) { + return { state: "down", durationMs: Date.now() - t0, reason: "auto-spawn disabled" }; + } + if (recentSpawnFailureFresh()) { + return { + state: "spawn-suppressed", + durationMs: Date.now() - t0, + reason: `daemon failed to start within last ${Math.round(SPAWN_FAIL_TTL_MS / 1000)}s`, + }; + } + + // Step 3 — spawn detached. + const spawnRes = await spawnDaemon(opts); + if (spawnRes.ok) { + return { state: "started", durationMs: Date.now() - t0 }; + } + + // Step 4 — record failure for backoff and report. + markSpawnFailure(); + return { state: "spawn-failed", durationMs: Date.now() - t0, reason: spawnRes.reason }; +} + +async function probeDaemon(): Promise<"up" | "absent" | "stale"> { + if (!existsSync(DAEMON_PATHS.SOCK_FILE)) return "absent"; + try { + const res = await ipc<{ version?: string }>({ path: "/v1/version", timeoutMs: PROBE_TIMEOUT_MS }); + if (res.status === 200) return "up"; + return "stale"; + } catch (err) { + if (err instanceof IpcError) return "stale"; + const msg = String(err); + if (/ENOENT|ECONNREFUSED|ipc_timeout|EPIPE|ECONNRESET/.test(msg)) return "stale"; + return "stale"; + } +} + +function cleanupStaleFiles(): void { + for (const p of [DAEMON_PATHS.SOCK_FILE, DAEMON_PATHS.PID_FILE]) { + try { unlinkSync(p); } catch { /* best-effort */ } + } +} + +function recentSpawnFailureFresh(): boolean { + try { + const st = statSync(SPAWN_FAIL_FILE()); + return Date.now() - st.mtimeMs < SPAWN_FAIL_TTL_MS; + } catch { + return false; + } +} + +function markSpawnFailure(): void { + try { writeFileSync(SPAWN_FAIL_FILE(), String(Date.now()), { mode: 0o600 }); } catch { /* best-effort */ } +} + +function clearSpawnFailure(): void { + try { unlinkSync(SPAWN_FAIL_FILE()); } catch { /* best-effort */ } +} + +interface SpawnResult { ok: boolean; reason?: string; } + +async function spawnDaemon(opts: EnsureDaemonOpts): Promise { + const lockResult = await acquireOrShareLock(opts); + if (lockResult === "wait-existing") { + // Another process is spawning; just wait for the socket to appear. + return await pollForSocket(opts.budgetMs ?? 3_000); + } + + try { + const { spawn } = await import("node:child_process"); + const binary = await resolveCliBinary(); + const args = ["daemon", "up"]; + if (opts.mesh) args.push("--mesh", opts.mesh); + + const child = spawn(binary, args, { + detached: true, + stdio: "ignore", + env: { ...process.env, CLAUDEMESH_INTERNAL_NO_AUTOSPAWN: "1" }, + }); + child.unref(); + + const polled = await pollForSocket(opts.budgetMs ?? 3_000); + if (polled.ok) clearSpawnFailure(); + return polled; + } catch (err) { + return { ok: false, reason: err instanceof Error ? err.message : String(err) }; + } finally { + releaseLock(); + } +} + +/** Acquire spawn lock. If another process holds it AND its pid is alive, + * return "wait-existing" so we share that spawn attempt. If the pid is + * dead, take over the lock. */ +async function acquireOrShareLock(_opts: EnsureDaemonOpts): Promise<"acquired" | "wait-existing"> { + const lockPath = SPAWN_LOCK_FILE(); + if (existsSync(lockPath)) { + try { + const pidStr = readFileSync(lockPath, "utf8").trim(); + const pid = Number.parseInt(pidStr, 10); + if (Number.isFinite(pid) && pid > 0) { + try { + process.kill(pid, 0); // signal 0 = liveness probe + return "wait-existing"; + } catch { + // Holder is dead — fall through to take over. + } + } + } catch { /* unreadable lock — take over */ } + } + try { + writeFileSync(lockPath, String(process.pid), { mode: 0o600 }); + } catch { /* best-effort; lock is advisory */ } + return "acquired"; +} + +function releaseLock(): void { + try { unlinkSync(SPAWN_LOCK_FILE()); } catch { /* best-effort */ } +} + +async function pollForSocket(budgetMs: number): Promise { + const start = Date.now(); + while (Date.now() - start < budgetMs) { + if (existsSync(DAEMON_PATHS.SOCK_FILE)) { + // Don't just trust file presence — confirm it answers. + const probe = await probeDaemon(); + if (probe === "up") return { ok: true }; + } + await new Promise((r) => setTimeout(r, 150)); + } + return { ok: false, reason: `socket did not appear within ${budgetMs}ms` }; +} + +/** Resolve the absolute path to the `claudemesh` binary the user is running. + * When invoked via tsx/bun in dev, fall back to the system `claudemesh`. */ +async function resolveCliBinary(): Promise { + const argv1 = process.argv[1] ?? "claudemesh"; + if (/\.ts$/.test(argv1) || /node_modules|src\/entrypoints/.test(argv1)) { + try { + const { execSync } = await import("node:child_process"); + return execSync("which claudemesh", { encoding: "utf8" }).trim() || "claudemesh"; + } catch { + return "claudemesh"; + } + } + return argv1; +} diff --git a/apps/cli/src/ui/warnings.ts b/apps/cli/src/ui/warnings.ts new file mode 100644 index 0000000..fe43d62 --- /dev/null +++ b/apps/cli/src/ui/warnings.ts @@ -0,0 +1,54 @@ +/** + * Once-per-process daemon-state warnings, routed to stderr. + * + * Suppressed under --quiet (caller responsibility — we never inspect + * argv). JSON callers should consult the result's `state` field + * directly and skip calling this helper. + */ + +import type { EnsureDaemonResult } from "~/services/daemon/lifecycle.js"; +import { dim } from "./styles.js"; + +let alreadyWarned = false; + +export interface WarnDaemonOpts { + quiet?: boolean; + /** When true, emit nothing — the caller will surface the state in JSON. */ + json?: boolean; +} + +/** Print a single, severity-appropriate line to stderr describing the + * result of `ensureDaemonReady`. Returns whether anything was printed. */ +export function warnDaemonState( + res: EnsureDaemonResult, + opts: WarnDaemonOpts = {}, +): boolean { + if (alreadyWarned) return false; + if (opts.quiet || opts.json) return false; + if (res.state === "up") return false; + + alreadyWarned = true; + const tag = (label: string) => `[claudemesh] ${label}`; + const hint = (s: string) => dim(s); + + switch (res.state) { + case "started": + process.stderr.write(`${tag("info")} daemon restarted automatically ${hint(`(took ${res.durationMs}ms)`)}\n`); + return true; + case "down": + process.stderr.write(`${tag("info")} daemon not running — using cold path ${hint("(slower; run `claudemesh daemon up` for warm path)")}\n`); + return true; + case "spawn-suppressed": + process.stderr.write(`${tag("warn")} ${res.reason ?? "daemon failed to start recently"} — using cold path ${hint("(run `claudemesh doctor`)")}\n`); + return true; + case "spawn-failed": + process.stderr.write(`${tag("warn")} daemon spawn failed${res.reason ? `: ${res.reason}` : ""} — using cold path ${hint("(check ~/.claudemesh/daemon/daemon.log)")}\n`); + return true; + } + return false; +} + +/** Reset the once-per-process latch. Test helper. */ +export function _resetDaemonWarningLatch(): void { + alreadyWarned = false; +}