feat(cli): self-healing daemon lifecycle

every daemon-routed verb now probes the ipc socket via /v1/version
(instead of trusting existsSync), cleans up stale sock/pid files left
by a crashed daemon, and auto-spawns a detached `claudemesh daemon up`
under a file-lock when the daemon is down. polls for liveness up to a
budget (3s for ad-hoc verbs, 10s for launch) before falling through to
cold path.

includes a per-process result cache (script doing 50 sends pays spawn
cost at most once), a 30s recently-failed marker (no thundering-herd
retries on crash-loop), a spawn-lock (concurrent invocations share one
attempt), and a recursion guard env var (nested cli calls inside the
daemon process skip auto-spawn).

fixes the stale-socket bug where launch's ensureDaemonRunning returned
early on a left-over socket file from a crashed daemon, silently
breaking the spawned claude session's mcp shim.

deferred to 1.28.0: --strict / --no-daemon flags, lazy-loading of
cold-path code, per-session ipc tokens.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-05-04 11:17:32 +01:00
parent 8a5469a5df
commit 2b6cf2c14b
6 changed files with 396 additions and 49 deletions

View File

@@ -0,0 +1,54 @@
/**
* Once-per-process daemon-state warnings, routed to stderr.
*
* Suppressed under --quiet (caller responsibility — we never inspect
* argv). JSON callers should consult the result's `state` field
* directly and skip calling this helper.
*/
import type { EnsureDaemonResult } from "~/services/daemon/lifecycle.js";
import { dim } from "./styles.js";
let alreadyWarned = false;
export interface WarnDaemonOpts {
quiet?: boolean;
/** When true, emit nothing — the caller will surface the state in JSON. */
json?: boolean;
}
/** Print a single, severity-appropriate line to stderr describing the
* result of `ensureDaemonReady`. Returns whether anything was printed. */
export function warnDaemonState(
res: EnsureDaemonResult,
opts: WarnDaemonOpts = {},
): boolean {
if (alreadyWarned) return false;
if (opts.quiet || opts.json) return false;
if (res.state === "up") return false;
alreadyWarned = true;
const tag = (label: string) => `[claudemesh] ${label}`;
const hint = (s: string) => dim(s);
switch (res.state) {
case "started":
process.stderr.write(`${tag("info")} daemon restarted automatically ${hint(`(took ${res.durationMs}ms)`)}\n`);
return true;
case "down":
process.stderr.write(`${tag("info")} daemon not running — using cold path ${hint("(slower; run `claudemesh daemon up` for warm path)")}\n`);
return true;
case "spawn-suppressed":
process.stderr.write(`${tag("warn")} ${res.reason ?? "daemon failed to start recently"} — using cold path ${hint("(run `claudemesh doctor`)")}\n`);
return true;
case "spawn-failed":
process.stderr.write(`${tag("warn")} daemon spawn failed${res.reason ? `: ${res.reason}` : ""} — using cold path ${hint("(check ~/.claudemesh/daemon/daemon.log)")}\n`);
return true;
}
return false;
}
/** Reset the once-per-process latch. Test helper. */
export function _resetDaemonWarningLatch(): void {
alreadyWarned = false;
}