feat(cli): claudemesh daemon — peer mesh runtime (v0.9.0)

Long-lived process that holds a persistent WS to the broker and exposes
a local IPC surface (UDS + bearer-auth TCP loopback). Implements the
v0.9.0 spec under .artifacts/specs/.

Core:
- daemon up | status | version | down | accept-host
- daemon outbox list [--failed|--pending|--inflight|--done|--aborted]
- daemon outbox requeue <id> [--new-client-id <id>]
- daemon install-service / uninstall-service (macOS launchd, Linux systemd)

IPC routes:
- /v1/version, /v1/health
- /v1/send  (POST)  — full §4.5.1 idempotency lookup table
- /v1/inbox (GET)   — paged history
- /v1/events        — SSE stream of message/peer_join/peer_leave/broker_status
- /v1/peers         — broker passthrough
- /v1/profile       — summary/status/visible/avatar/title/bio/capabilities
- /v1/outbox + /v1/outbox/requeue — operator recovery

Storage (SQLite via node:sqlite / bun:sqlite):
- outbox.db: pending/inflight/done/dead/aborted with audit columns
- inbox.db: dedupe by client_message_id, decrypts DMs via existing crypto
- BEGIN IMMEDIATE serialization for daemon-local accept races

Identity:
- host_fingerprint.json (machine-id || first-stable-mac)
- refuse-on-mismatch policy with `daemon accept-host` recovery

CLI integration:
- claudemesh send detects the daemon and routes through /v1/send when
  present, falling back to bridge socket / cold path otherwise

Tests: 15-case coverage of the §4.5.1 IPC duplicate lookup table.

Spec arc preserved at .artifacts/specs/2026-05-03-daemon-{v1..v10}.md;
v0.9.0 implementation target locked at 2026-05-03-daemon-spec-v0.9.0.md;
deferred items at 2026-05-03-daemon-spec-broker-hardening-followups.md.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-05-03 20:03:05 +01:00
parent 65e63b0b27
commit abaa4bcf87
34 changed files with 9067 additions and 0 deletions

View File

@@ -0,0 +1,324 @@
// Minimal broker WS connector for the daemon. Reuses the existing CLI
// hello-sign protocol so it speaks the wire current brokers understand.
//
// Differences from BrokerClient (services/broker/ws-client.ts):
// - Slim: no in-memory pending-sends queue, no list_peers/state/topic
// RPCs. The daemon's outbox is the source of truth.
// - Wire envelope adds `client_message_id` (broker may ignore in legacy
// mode; Sprint 7 promotes it to authoritative dedupe).
// - Reconnect with exponential backoff, signaled to the drain worker.
import WebSocket from "ws";
import type { JoinedMesh } from "~/services/config/facade.js";
import { signHello } from "~/services/broker/hello-sig.js";
export type ConnStatus = "connecting" | "open" | "closed" | "reconnecting";
export interface BrokerSendArgs {
/** Target as the broker expects it: peer name | pubkey | @group | * | topic. */
targetSpec: string;
priority: "now" | "next" | "low";
nonce: string;
ciphertext: string;
/** Daemon-issued idempotency id. Echoed back by the broker for dedupe. */
client_message_id: string;
/** Sha256-32 fingerprint of the request, hex. Forwarded for Sprint 7 dedupe. */
request_fingerprint_hex: string;
}
export type BrokerSendResult =
| { ok: true; messageId: string }
| { ok: false; error: string; permanent: boolean };
interface PendingAck {
resolve: (r: BrokerSendResult) => void;
timer: NodeJS.Timeout;
}
export interface PeerSummary {
pubkey: string;
memberPubkey?: string;
displayName: string;
status: string;
summary: string | null;
groups: Array<{ name: string; role?: string }>;
sessionId: string;
connectedAt: string;
cwd?: string;
hostname?: string;
peerType?: string;
channel?: string;
}
interface PendingPeerList {
resolve: (peers: PeerSummary[]) => void;
timer: NodeJS.Timeout;
}
const HELLO_ACK_TIMEOUT_MS = 5_000;
const SEND_ACK_TIMEOUT_MS = 15_000;
const BACKOFF_CAPS_MS = [1_000, 2_000, 4_000, 8_000, 16_000, 30_000];
export interface DaemonBrokerOptions {
displayName?: string;
onStatusChange?: (s: ConnStatus) => void;
onPush?: (msg: Record<string, unknown>) => void;
log?: (level: "info" | "warn" | "error", msg: string, meta?: Record<string, unknown>) => void;
}
export class DaemonBrokerClient {
private ws: WebSocket | null = null;
private _status: ConnStatus = "closed";
private closed = false;
private reconnectAttempt = 0;
private reconnectTimer: NodeJS.Timeout | null = null;
private helloTimer: NodeJS.Timeout | null = null;
private pendingAcks = new Map<string, PendingAck>();
private peerListResolvers = new Map<string, PendingPeerList>();
private sessionPubkey: string | null = null;
private sessionSecretKey: string | null = null;
private opens: Array<() => void> = [];
private reqCounter = 0;
constructor(private mesh: JoinedMesh, private opts: DaemonBrokerOptions = {}) {}
get status(): ConnStatus { return this._status; }
get meshSlug(): string { return this.mesh.slug; }
get meshId(): string { return this.mesh.meshId; }
private log = (level: "info" | "warn" | "error", msg: string, meta?: Record<string, unknown>) => {
(this.opts.log ?? defaultLog)(level, msg, { mesh: this.mesh.slug, ...meta });
};
private setConnStatus(s: ConnStatus) {
if (this._status === s) return;
this._status = s;
this.opts.onStatusChange?.(s);
}
/** Open the WS, run the hello handshake, resolve once the broker accepts. */
async connect(): Promise<void> {
if (this.closed) throw new Error("client_closed");
if (this._status === "connecting" || this._status === "open") return;
this.setConnStatus("connecting");
const ws = new WebSocket(this.mesh.brokerUrl);
this.ws = ws;
return new Promise<void>((resolve, reject) => {
ws.on("open", async () => {
try {
if (!this.sessionPubkey) {
const { generateKeypair } = await import("~/services/crypto/facade.js");
const kp = await generateKeypair();
this.sessionPubkey = kp.publicKey;
this.sessionSecretKey = kp.secretKey;
}
const { timestamp, signature } = await signHello(
this.mesh.meshId, this.mesh.memberId, this.mesh.pubkey, this.mesh.secretKey,
);
ws.send(JSON.stringify({
type: "hello",
meshId: this.mesh.meshId,
memberId: this.mesh.memberId,
pubkey: this.mesh.pubkey,
sessionPubkey: this.sessionPubkey,
displayName: this.opts.displayName,
sessionId: `daemon-${process.pid}`,
pid: process.pid,
cwd: process.cwd(),
hostname: require("node:os").hostname(),
peerType: "ai" as const,
channel: "claudemesh-daemon",
timestamp,
signature,
}));
this.helloTimer = setTimeout(() => {
this.log("warn", "broker_hello_ack_timeout");
try { ws.close(); } catch { /* ignore */ }
reject(new Error("hello_ack_timeout"));
}, HELLO_ACK_TIMEOUT_MS);
} catch (e) {
reject(e instanceof Error ? e : new Error(String(e)));
}
});
ws.on("message", (raw) => {
let msg: Record<string, unknown>;
try { msg = JSON.parse(raw.toString()) as Record<string, unknown>; }
catch { return; }
if (msg.type === "hello_ack") {
if (this.helloTimer) clearTimeout(this.helloTimer);
this.helloTimer = null;
this.setConnStatus("open");
this.reconnectAttempt = 0;
// Flush deferred openers (drain worker, etc.)
const queued = this.opens.slice();
this.opens.length = 0;
for (const fn of queued) { try { fn(); } catch (e) { this.log("warn", "open_handler_failed", { err: String(e) }); } }
resolve();
return;
}
if (msg.type === "ack") {
// Broker shape: { type: "ack", id, messageId, queued, error? }
const id = String(msg.id ?? "");
const ack = this.pendingAcks.get(id);
if (ack) {
this.pendingAcks.delete(id);
clearTimeout(ack.timer);
if (typeof msg.error === "string" && msg.error.length > 0) {
ack.resolve({ ok: false, error: msg.error, permanent: classifyPermanent(msg.error) });
} else {
ack.resolve({ ok: true, messageId: String(msg.messageId ?? id) });
}
}
return;
}
if (msg.type === "peers_list") {
const reqId = String(msg._reqId ?? "");
const pending = this.peerListResolvers.get(reqId);
if (pending) {
this.peerListResolvers.delete(reqId);
clearTimeout(pending.timer);
pending.resolve(Array.isArray(msg.peers) ? (msg.peers as PeerSummary[]) : []);
}
return;
}
if (msg.type === "push" || msg.type === "inbound") {
this.opts.onPush?.(msg);
return;
}
});
ws.on("close", (code, reason) => {
if (this.helloTimer) { clearTimeout(this.helloTimer); this.helloTimer = null; }
this.failPendingAcks(`broker_disconnected_${code}`);
if (this.closed) { this.setConnStatus("closed"); return; }
this.setConnStatus("reconnecting");
const wait = BACKOFF_CAPS_MS[Math.min(this.reconnectAttempt, BACKOFF_CAPS_MS.length - 1)] ?? 30_000;
this.reconnectAttempt++;
this.log("info", "broker_reconnect_scheduled", { wait_ms: wait, code, reason: reason.toString("utf8") });
this.reconnectTimer = setTimeout(() => this.connect().catch((err) => this.log("warn", "broker_reconnect_failed", { err: String(err) })), wait);
// First connection failure also rejects the original connect() promise.
if (this._status === "connecting") reject(new Error(`closed_before_hello_${code}`));
});
ws.on("error", (err) => this.log("warn", "broker_ws_error", { err: err.message }));
});
}
/** Send one outbox row. Resolves on broker ack/timeout. */
send(req: BrokerSendArgs): Promise<BrokerSendResult> {
return new Promise<BrokerSendResult>((resolve) => {
const dispatch = () => {
if (!this.ws || this.ws.readyState !== this.ws.OPEN) {
resolve({ ok: false, error: "broker_not_open", permanent: false });
return;
}
const id = req.client_message_id;
const timer = setTimeout(() => {
if (this.pendingAcks.delete(id)) {
resolve({ ok: false, error: "ack_timeout", permanent: false });
}
}, SEND_ACK_TIMEOUT_MS);
this.pendingAcks.set(id, { resolve, timer });
try {
this.ws.send(JSON.stringify({
type: "send",
id, // legacy correlation id
client_message_id: id, // forward-compat per spec §4.2
request_fingerprint: req.request_fingerprint_hex,
targetSpec: req.targetSpec,
priority: req.priority,
nonce: req.nonce,
ciphertext: req.ciphertext,
}));
} catch (e) {
this.pendingAcks.delete(id);
clearTimeout(timer);
resolve({ ok: false, error: `ws_write_failed: ${String(e)}`, permanent: false });
}
};
if (this._status === "open") dispatch();
else this.opens.push(dispatch);
});
}
/** Ask the broker for the current peer list. */
async listPeers(timeoutMs = 5_000): Promise<PeerSummary[]> {
if (this._status !== "open" || !this.ws) return [];
return new Promise<PeerSummary[]>((resolve) => {
const reqId = `pl-${++this.reqCounter}`;
const timer = setTimeout(() => {
if (this.peerListResolvers.delete(reqId)) resolve([]);
}, timeoutMs);
this.peerListResolvers.set(reqId, { resolve, timer });
try { this.ws!.send(JSON.stringify({ type: "list_peers", _reqId: reqId })); }
catch { this.peerListResolvers.delete(reqId); clearTimeout(timer); resolve([]); }
});
}
/** Set the daemon's profile (avatar/title/bio/capabilities). Fire-and-forget. */
setProfile(profile: { avatar?: string; title?: string; bio?: string; capabilities?: string[] }): void {
if (this._status !== "open" || !this.ws) return;
try { this.ws.send(JSON.stringify({ type: "set_profile", ...profile })); }
catch { /* ignore */ }
}
setSummary(summary: string): void {
if (this._status !== "open" || !this.ws) return;
try { this.ws.send(JSON.stringify({ type: "set_summary", summary })); }
catch { /* ignore */ }
}
setStatus(status: "idle" | "working" | "dnd"): void {
if (this._status !== "open" || !this.ws) return;
try { this.ws.send(JSON.stringify({ type: "set_status", status })); }
catch { /* ignore */ }
}
setVisible(visible: boolean): void {
if (this._status !== "open" || !this.ws) return;
try { this.ws.send(JSON.stringify({ type: "set_visible", visible })); }
catch { /* ignore */ }
}
async close(): Promise<void> {
this.closed = true;
if (this.reconnectTimer) { clearTimeout(this.reconnectTimer); this.reconnectTimer = null; }
if (this.helloTimer) { clearTimeout(this.helloTimer); this.helloTimer = null; }
this.failPendingAcks("daemon_shutdown");
try { this.ws?.close(); } catch { /* ignore */ }
this.setConnStatus("closed");
}
getSessionKeys(): { sessionPubkey: string; sessionSecretKey: string } | null {
if (!this.sessionPubkey || !this.sessionSecretKey) return null;
return { sessionPubkey: this.sessionPubkey, sessionSecretKey: this.sessionSecretKey };
}
private failPendingAcks(reason: string) {
for (const [id, ack] of this.pendingAcks) {
clearTimeout(ack.timer);
ack.resolve({ ok: false, error: reason, permanent: false });
this.pendingAcks.delete(id);
}
}
}
function defaultLog(level: "info" | "warn" | "error", msg: string, meta?: Record<string, unknown>) {
const line = JSON.stringify({ level, msg, ...meta, ts: new Date().toISOString() });
if (level === "info") process.stdout.write(line + "\n");
else process.stderr.write(line + "\n");
}
/** Heuristic: which broker errors are unrecoverable for this id. */
function classifyPermanent(err: string): boolean {
return /payload_too_large|forbidden|not_found|invalid|schema|auth|signature/i.test(err);
}