feat(cli): claudemesh daemon — peer mesh runtime (v0.9.0)
Long-lived process that holds a persistent WS to the broker and exposes
a local IPC surface (UDS + bearer-auth TCP loopback). Implements the
v0.9.0 spec under .artifacts/specs/.
Core:
- daemon up | status | version | down | accept-host
- daemon outbox list [--failed|--pending|--inflight|--done|--aborted]
- daemon outbox requeue <id> [--new-client-id <id>]
- daemon install-service / uninstall-service (macOS launchd, Linux systemd)
IPC routes:
- /v1/version, /v1/health
- /v1/send (POST) — full §4.5.1 idempotency lookup table
- /v1/inbox (GET) — paged history
- /v1/events — SSE stream of message/peer_join/peer_leave/broker_status
- /v1/peers — broker passthrough
- /v1/profile — summary/status/visible/avatar/title/bio/capabilities
- /v1/outbox + /v1/outbox/requeue — operator recovery
Storage (SQLite via node:sqlite / bun:sqlite):
- outbox.db: pending/inflight/done/dead/aborted with audit columns
- inbox.db: dedupe by client_message_id, decrypts DMs via existing crypto
- BEGIN IMMEDIATE serialization for daemon-local accept races
Identity:
- host_fingerprint.json (machine-id || first-stable-mac)
- refuse-on-mismatch policy with `daemon accept-host` recovery
CLI integration:
- claudemesh send detects the daemon and routes through /v1/send when
present, falling back to bridge socket / cold path otherwise
Tests: 15-case coverage of the §4.5.1 IPC duplicate lookup table.
Spec arc preserved at .artifacts/specs/2026-05-03-daemon-{v1..v10}.md;
v0.9.0 implementation target locked at 2026-05-03-daemon-spec-v0.9.0.md;
deferred items at 2026-05-03-daemon-spec-broker-hardening-followups.md.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
271
apps/cli/src/commands/daemon.ts
Normal file
271
apps/cli/src/commands/daemon.ts
Normal file
@@ -0,0 +1,271 @@
|
||||
import { runDaemon } from "~/daemon/run.js";
|
||||
import { ipc, IpcError } from "~/daemon/ipc/client.js";
|
||||
import { readRunningPid } from "~/daemon/lock.js";
|
||||
import { DAEMON_PATHS } from "~/daemon/paths.js";
|
||||
|
||||
export interface DaemonOptions {
|
||||
json?: boolean;
|
||||
noTcp?: boolean;
|
||||
publicHealth?: boolean;
|
||||
mesh?: string;
|
||||
displayName?: string;
|
||||
/** outbox-list status filter, set from boolean flags --failed/--pending/etc. */
|
||||
outboxStatus?: "pending" | "inflight" | "done" | "dead" | "aborted";
|
||||
/** outbox requeue: optional id to mint a fresh client_message_id with. */
|
||||
newClientId?: string;
|
||||
}
|
||||
|
||||
export async function runDaemonCommand(
|
||||
sub: string | undefined,
|
||||
opts: DaemonOptions,
|
||||
rest: string[] = [],
|
||||
): Promise<number> {
|
||||
switch (sub) {
|
||||
case undefined:
|
||||
case "up":
|
||||
case "start":
|
||||
return runDaemon({
|
||||
tcpEnabled: !opts.noTcp,
|
||||
publicHealthCheck: opts.publicHealth,
|
||||
mesh: opts.mesh,
|
||||
displayName: opts.displayName,
|
||||
});
|
||||
|
||||
case "status":
|
||||
return runStatus(opts);
|
||||
|
||||
case "version":
|
||||
return runVersion(opts);
|
||||
|
||||
case "down":
|
||||
case "stop":
|
||||
return runStop(opts);
|
||||
|
||||
case "accept-host":
|
||||
return runAcceptHost(opts);
|
||||
|
||||
case "outbox":
|
||||
return runOutbox(rest, opts);
|
||||
|
||||
case "install-service":
|
||||
return runInstallService(opts);
|
||||
|
||||
case "uninstall-service":
|
||||
return runUninstallService(opts);
|
||||
|
||||
default:
|
||||
process.stderr.write(`unknown daemon subcommand: ${sub}\n`);
|
||||
process.stderr.write(`usage: claudemesh daemon [up|status|version|down|accept-host|outbox|install-service|uninstall-service]\n`);
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
interface OutboxRowResp {
|
||||
id: string;
|
||||
client_message_id: string;
|
||||
status: string;
|
||||
attempts: number;
|
||||
enqueued_at: string;
|
||||
next_attempt_at: string;
|
||||
delivered_at: string | null;
|
||||
broker_message_id: string | null;
|
||||
last_error: string | null;
|
||||
aborted_at: string | null;
|
||||
aborted_by: string | null;
|
||||
superseded_by: string | null;
|
||||
payload_bytes: number;
|
||||
}
|
||||
|
||||
async function runOutbox(rest: string[], opts: DaemonOptions): Promise<number> {
|
||||
const sub = rest[0];
|
||||
switch (sub) {
|
||||
case undefined:
|
||||
case "list": {
|
||||
const status = opts.outboxStatus;
|
||||
const path = `/v1/outbox${status ? `?status=${status}` : ""}`;
|
||||
try {
|
||||
const res = await ipc<{ items: OutboxRowResp[] }>({ path });
|
||||
if (opts.json) {
|
||||
process.stdout.write(JSON.stringify(res.body) + "\n");
|
||||
return 0;
|
||||
}
|
||||
if (!res.body.items?.length) {
|
||||
process.stdout.write("(empty)\n");
|
||||
return 0;
|
||||
}
|
||||
for (const r of res.body.items) {
|
||||
const tag = r.status.padEnd(8);
|
||||
const bm = r.broker_message_id ? ` → ${r.broker_message_id}` : "";
|
||||
const err = r.last_error ? ` last_error="${r.last_error.slice(0, 60)}"` : "";
|
||||
process.stdout.write(`${tag} ${r.id} cid=${r.client_message_id} attempts=${r.attempts}${bm}${err}\n`);
|
||||
}
|
||||
return 0;
|
||||
} catch (err) {
|
||||
process.stderr.write(`daemon unreachable: ${String(err)}\n`);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
case "requeue": {
|
||||
const id = rest[1];
|
||||
if (!id) { process.stderr.write("usage: claudemesh daemon outbox requeue <id> [--new-client-id <id>]\n"); return 2; }
|
||||
const newClientMessageId = opts.newClientId;
|
||||
try {
|
||||
const res = await ipc<{
|
||||
aborted_row_id: string; new_row_id: string; new_client_message_id: string; error?: string;
|
||||
}>({
|
||||
method: "POST",
|
||||
path: "/v1/outbox/requeue",
|
||||
body: { id, new_client_message_id: newClientMessageId },
|
||||
});
|
||||
if (res.status === 200) {
|
||||
if (opts.json) process.stdout.write(JSON.stringify(res.body) + "\n");
|
||||
else process.stdout.write(
|
||||
`requeued: aborted ${res.body.aborted_row_id} → new ${res.body.new_row_id} ` +
|
||||
`(client_message_id=${res.body.new_client_message_id})\n`,
|
||||
);
|
||||
return 0;
|
||||
}
|
||||
process.stderr.write(`requeue failed (${res.status}): ${res.body.error ?? "unknown"}\n`);
|
||||
return 1;
|
||||
} catch (err) {
|
||||
process.stderr.write(`daemon unreachable: ${String(err)}\n`);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
process.stderr.write(`unknown outbox subcommand: ${sub}\n`);
|
||||
process.stderr.write(`usage: claudemesh daemon outbox [list|requeue <id>]\n`);
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
async function runInstallService(opts: DaemonOptions): Promise<number> {
|
||||
const { installService, detectPlatform } = await import("~/daemon/service-install.js");
|
||||
const platform = detectPlatform();
|
||||
if (!platform) {
|
||||
process.stderr.write(`unsupported platform: ${process.platform}\n`);
|
||||
return 2;
|
||||
}
|
||||
if (!opts.mesh) {
|
||||
process.stderr.write(`pass --mesh <slug> so the service knows which mesh to attach to\n`);
|
||||
return 2;
|
||||
}
|
||||
// Resolve the binary path. Prefer the running argv[0] when it's an
|
||||
// installed claudemesh binary; fall back to whichever `claudemesh` is
|
||||
// first on PATH.
|
||||
let binary = process.argv[1] ?? "";
|
||||
if (!binary || /\.ts$/.test(binary) || /node_modules|src\/entrypoints/.test(binary)) {
|
||||
try {
|
||||
const { execSync } = await import("node:child_process");
|
||||
binary = execSync("which claudemesh", { encoding: "utf8" }).trim();
|
||||
} catch {
|
||||
process.stderr.write(`couldn't resolve a 'claudemesh' binary on PATH; install via npm/homebrew first\n`);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
try {
|
||||
const r = installService({
|
||||
binaryPath: binary,
|
||||
meshSlug: opts.mesh,
|
||||
displayName: opts.displayName,
|
||||
});
|
||||
if (opts.json) {
|
||||
process.stdout.write(JSON.stringify({ ok: true, ...r }) + "\n");
|
||||
} else {
|
||||
process.stdout.write(`installed ${r.platform} service unit: ${r.unitPath}\n`);
|
||||
process.stdout.write(`bring it up now: ${r.bootCommand}\n`);
|
||||
}
|
||||
return 0;
|
||||
} catch (err) {
|
||||
process.stderr.write(`install-service failed: ${String(err)}\n`);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
async function runUninstallService(opts: DaemonOptions): Promise<number> {
|
||||
const { uninstallService } = await import("~/daemon/service-install.js");
|
||||
const r = uninstallService();
|
||||
if (opts.json) process.stdout.write(JSON.stringify(r) + "\n");
|
||||
else if (r.removed.length === 0) process.stdout.write("no service unit installed\n");
|
||||
else process.stdout.write(`removed: ${r.removed.join(", ")}\n`);
|
||||
return 0;
|
||||
}
|
||||
|
||||
async function runAcceptHost(opts: DaemonOptions): Promise<number> {
|
||||
const { acceptCurrentHost } = await import("~/daemon/identity.js");
|
||||
const fp = acceptCurrentHost();
|
||||
if (opts.json) process.stdout.write(JSON.stringify({ ok: true, fingerprint_prefix: fp.fingerprint.slice(0, 16) }) + "\n");
|
||||
else process.stdout.write(`host fingerprint accepted: ${fp.fingerprint.slice(0, 16)}…\n`);
|
||||
return 0;
|
||||
}
|
||||
|
||||
async function runStatus(opts: DaemonOptions): Promise<number> {
|
||||
const pid = readRunningPid();
|
||||
if (!pid) {
|
||||
if (opts.json) process.stdout.write(JSON.stringify({ running: false }) + "\n");
|
||||
else process.stdout.write("daemon: not running\n");
|
||||
return 1;
|
||||
}
|
||||
try {
|
||||
const res = await ipc<{ ok: boolean; pid: number }>({ path: "/v1/health" });
|
||||
if (opts.json) {
|
||||
process.stdout.write(JSON.stringify({ running: true, pid, health: res.body }) + "\n");
|
||||
} else {
|
||||
process.stdout.write(`daemon: running (pid ${pid})\n`);
|
||||
process.stdout.write(`socket: ${DAEMON_PATHS.SOCK_FILE}\n`);
|
||||
}
|
||||
return 0;
|
||||
} catch (err) {
|
||||
if (opts.json) process.stdout.write(JSON.stringify({ running: true, pid, ipc_error: String(err) }) + "\n");
|
||||
else process.stdout.write(`daemon: pid ${pid} alive but IPC unreachable (${String(err)})\n`);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
async function runVersion(opts: DaemonOptions): Promise<number> {
|
||||
try {
|
||||
const res = await ipc<Record<string, unknown>>({ path: "/v1/version" });
|
||||
if (opts.json) process.stdout.write(JSON.stringify(res.body) + "\n");
|
||||
else {
|
||||
const v = res.body as { daemon_version?: string; ipc_api?: string; schema_version?: number };
|
||||
process.stdout.write(`daemon ${v.daemon_version ?? "unknown"} (ipc ${v.ipc_api ?? "?"}, schema ${v.schema_version ?? "?"})\n`);
|
||||
}
|
||||
return 0;
|
||||
} catch (err) {
|
||||
if (err instanceof IpcError) {
|
||||
process.stderr.write(`${err.message}\n`);
|
||||
return err.status === 401 ? 3 : 1;
|
||||
}
|
||||
process.stderr.write(`daemon unreachable: ${String(err)}\n`);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
async function runStop(opts: DaemonOptions): Promise<number> {
|
||||
const pid = readRunningPid();
|
||||
if (!pid) {
|
||||
if (opts.json) process.stdout.write(JSON.stringify({ stopped: false, reason: "not_running" }) + "\n");
|
||||
else process.stdout.write("daemon: not running\n");
|
||||
return 0;
|
||||
}
|
||||
try {
|
||||
process.kill(pid, "SIGTERM");
|
||||
} catch (err) {
|
||||
process.stderr.write(`failed to signal pid ${pid}: ${String(err)}\n`);
|
||||
return 1;
|
||||
}
|
||||
// Brief wait for the daemon to release its lock cleanly.
|
||||
for (let i = 0; i < 50; i++) {
|
||||
await new Promise<void>((r) => setTimeout(r, 100));
|
||||
if (!readRunningPid()) {
|
||||
if (opts.json) process.stdout.write(JSON.stringify({ stopped: true, pid }) + "\n");
|
||||
else process.stdout.write(`daemon: stopped (was pid ${pid})\n`);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (opts.json) process.stdout.write(JSON.stringify({ stopped: false, pid, reason: "shutdown_timeout" }) + "\n");
|
||||
else process.stdout.write(`daemon: signaled but did not exit within 5s (pid ${pid})\n`);
|
||||
return 1;
|
||||
}
|
||||
@@ -14,6 +14,7 @@
|
||||
import { withMesh } from "./connect.js";
|
||||
import { readConfig } from "~/services/config/facade.js";
|
||||
import { tryBridge } from "~/services/bridge/client.js";
|
||||
import { trySendViaDaemon } from "~/services/bridge/daemon-route.js";
|
||||
import type { Priority } from "~/services/broker/facade.js";
|
||||
import { render } from "~/ui/render.js";
|
||||
import { dim } from "~/ui/styles.js";
|
||||
@@ -64,6 +65,26 @@ export async function runSend(flags: SendFlags, to: string, message: string): Pr
|
||||
}
|
||||
}
|
||||
|
||||
// Daemon path — preferred when a long-lived daemon is local. UDS at
|
||||
// ~/.claudemesh/daemon/daemon.sock; ~1ms round-trip; persists outbox
|
||||
// across CLI invocations so a `claudemesh send` survives a daemon
|
||||
// crash via the on-disk outbox.
|
||||
{
|
||||
const dr = await trySendViaDaemon({ to, message, priority, expectedMesh: meshSlug ?? undefined });
|
||||
if (dr !== null) {
|
||||
if (dr.ok) {
|
||||
if (flags.json) console.log(JSON.stringify({ ok: true, messageId: dr.messageId, target: to, via: "daemon", duplicate: !!dr.duplicate }));
|
||||
else render.ok(`sent to ${to} (daemon)`, dr.messageId ? dim(dr.messageId.slice(0, 8)) : undefined);
|
||||
return;
|
||||
}
|
||||
// Daemon answered but rejected (409 idempotency, 400 schema). Surface; do not fall through.
|
||||
if (flags.json) console.log(JSON.stringify({ ok: false, error: dr.error, via: "daemon" }));
|
||||
else render.err(`send failed (daemon): ${dr.error}`);
|
||||
process.exit(1);
|
||||
}
|
||||
// dr === null → daemon not running; fall through to bridge.
|
||||
}
|
||||
|
||||
// Warm path — only when mesh is unambiguous.
|
||||
if (meshSlug) {
|
||||
const bridged = await tryBridge(meshSlug, "send", { to, message, priority });
|
||||
|
||||
324
apps/cli/src/daemon/broker.ts
Normal file
324
apps/cli/src/daemon/broker.ts
Normal file
@@ -0,0 +1,324 @@
|
||||
// Minimal broker WS connector for the daemon. Reuses the existing CLI
|
||||
// hello-sign protocol so it speaks the wire current brokers understand.
|
||||
//
|
||||
// Differences from BrokerClient (services/broker/ws-client.ts):
|
||||
// - Slim: no in-memory pending-sends queue, no list_peers/state/topic
|
||||
// RPCs. The daemon's outbox is the source of truth.
|
||||
// - Wire envelope adds `client_message_id` (broker may ignore in legacy
|
||||
// mode; Sprint 7 promotes it to authoritative dedupe).
|
||||
// - Reconnect with exponential backoff, signaled to the drain worker.
|
||||
|
||||
import WebSocket from "ws";
|
||||
|
||||
import type { JoinedMesh } from "~/services/config/facade.js";
|
||||
import { signHello } from "~/services/broker/hello-sig.js";
|
||||
|
||||
export type ConnStatus = "connecting" | "open" | "closed" | "reconnecting";
|
||||
|
||||
export interface BrokerSendArgs {
|
||||
/** Target as the broker expects it: peer name | pubkey | @group | * | topic. */
|
||||
targetSpec: string;
|
||||
priority: "now" | "next" | "low";
|
||||
nonce: string;
|
||||
ciphertext: string;
|
||||
/** Daemon-issued idempotency id. Echoed back by the broker for dedupe. */
|
||||
client_message_id: string;
|
||||
/** Sha256-32 fingerprint of the request, hex. Forwarded for Sprint 7 dedupe. */
|
||||
request_fingerprint_hex: string;
|
||||
}
|
||||
|
||||
export type BrokerSendResult =
|
||||
| { ok: true; messageId: string }
|
||||
| { ok: false; error: string; permanent: boolean };
|
||||
|
||||
interface PendingAck {
|
||||
resolve: (r: BrokerSendResult) => void;
|
||||
timer: NodeJS.Timeout;
|
||||
}
|
||||
|
||||
export interface PeerSummary {
|
||||
pubkey: string;
|
||||
memberPubkey?: string;
|
||||
displayName: string;
|
||||
status: string;
|
||||
summary: string | null;
|
||||
groups: Array<{ name: string; role?: string }>;
|
||||
sessionId: string;
|
||||
connectedAt: string;
|
||||
cwd?: string;
|
||||
hostname?: string;
|
||||
peerType?: string;
|
||||
channel?: string;
|
||||
}
|
||||
|
||||
interface PendingPeerList {
|
||||
resolve: (peers: PeerSummary[]) => void;
|
||||
timer: NodeJS.Timeout;
|
||||
}
|
||||
|
||||
const HELLO_ACK_TIMEOUT_MS = 5_000;
|
||||
const SEND_ACK_TIMEOUT_MS = 15_000;
|
||||
const BACKOFF_CAPS_MS = [1_000, 2_000, 4_000, 8_000, 16_000, 30_000];
|
||||
|
||||
export interface DaemonBrokerOptions {
|
||||
displayName?: string;
|
||||
onStatusChange?: (s: ConnStatus) => void;
|
||||
onPush?: (msg: Record<string, unknown>) => void;
|
||||
log?: (level: "info" | "warn" | "error", msg: string, meta?: Record<string, unknown>) => void;
|
||||
}
|
||||
|
||||
export class DaemonBrokerClient {
|
||||
private ws: WebSocket | null = null;
|
||||
private _status: ConnStatus = "closed";
|
||||
private closed = false;
|
||||
private reconnectAttempt = 0;
|
||||
private reconnectTimer: NodeJS.Timeout | null = null;
|
||||
private helloTimer: NodeJS.Timeout | null = null;
|
||||
private pendingAcks = new Map<string, PendingAck>();
|
||||
private peerListResolvers = new Map<string, PendingPeerList>();
|
||||
private sessionPubkey: string | null = null;
|
||||
private sessionSecretKey: string | null = null;
|
||||
private opens: Array<() => void> = [];
|
||||
private reqCounter = 0;
|
||||
|
||||
constructor(private mesh: JoinedMesh, private opts: DaemonBrokerOptions = {}) {}
|
||||
|
||||
get status(): ConnStatus { return this._status; }
|
||||
get meshSlug(): string { return this.mesh.slug; }
|
||||
get meshId(): string { return this.mesh.meshId; }
|
||||
|
||||
private log = (level: "info" | "warn" | "error", msg: string, meta?: Record<string, unknown>) => {
|
||||
(this.opts.log ?? defaultLog)(level, msg, { mesh: this.mesh.slug, ...meta });
|
||||
};
|
||||
|
||||
private setConnStatus(s: ConnStatus) {
|
||||
if (this._status === s) return;
|
||||
this._status = s;
|
||||
this.opts.onStatusChange?.(s);
|
||||
}
|
||||
|
||||
/** Open the WS, run the hello handshake, resolve once the broker accepts. */
|
||||
async connect(): Promise<void> {
|
||||
if (this.closed) throw new Error("client_closed");
|
||||
if (this._status === "connecting" || this._status === "open") return;
|
||||
this.setConnStatus("connecting");
|
||||
|
||||
const ws = new WebSocket(this.mesh.brokerUrl);
|
||||
this.ws = ws;
|
||||
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
ws.on("open", async () => {
|
||||
try {
|
||||
if (!this.sessionPubkey) {
|
||||
const { generateKeypair } = await import("~/services/crypto/facade.js");
|
||||
const kp = await generateKeypair();
|
||||
this.sessionPubkey = kp.publicKey;
|
||||
this.sessionSecretKey = kp.secretKey;
|
||||
}
|
||||
const { timestamp, signature } = await signHello(
|
||||
this.mesh.meshId, this.mesh.memberId, this.mesh.pubkey, this.mesh.secretKey,
|
||||
);
|
||||
ws.send(JSON.stringify({
|
||||
type: "hello",
|
||||
meshId: this.mesh.meshId,
|
||||
memberId: this.mesh.memberId,
|
||||
pubkey: this.mesh.pubkey,
|
||||
sessionPubkey: this.sessionPubkey,
|
||||
displayName: this.opts.displayName,
|
||||
sessionId: `daemon-${process.pid}`,
|
||||
pid: process.pid,
|
||||
cwd: process.cwd(),
|
||||
hostname: require("node:os").hostname(),
|
||||
peerType: "ai" as const,
|
||||
channel: "claudemesh-daemon",
|
||||
timestamp,
|
||||
signature,
|
||||
}));
|
||||
this.helloTimer = setTimeout(() => {
|
||||
this.log("warn", "broker_hello_ack_timeout");
|
||||
try { ws.close(); } catch { /* ignore */ }
|
||||
reject(new Error("hello_ack_timeout"));
|
||||
}, HELLO_ACK_TIMEOUT_MS);
|
||||
} catch (e) {
|
||||
reject(e instanceof Error ? e : new Error(String(e)));
|
||||
}
|
||||
});
|
||||
|
||||
ws.on("message", (raw) => {
|
||||
let msg: Record<string, unknown>;
|
||||
try { msg = JSON.parse(raw.toString()) as Record<string, unknown>; }
|
||||
catch { return; }
|
||||
|
||||
if (msg.type === "hello_ack") {
|
||||
if (this.helloTimer) clearTimeout(this.helloTimer);
|
||||
this.helloTimer = null;
|
||||
this.setConnStatus("open");
|
||||
this.reconnectAttempt = 0;
|
||||
// Flush deferred openers (drain worker, etc.)
|
||||
const queued = this.opens.slice();
|
||||
this.opens.length = 0;
|
||||
for (const fn of queued) { try { fn(); } catch (e) { this.log("warn", "open_handler_failed", { err: String(e) }); } }
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === "ack") {
|
||||
// Broker shape: { type: "ack", id, messageId, queued, error? }
|
||||
const id = String(msg.id ?? "");
|
||||
const ack = this.pendingAcks.get(id);
|
||||
if (ack) {
|
||||
this.pendingAcks.delete(id);
|
||||
clearTimeout(ack.timer);
|
||||
if (typeof msg.error === "string" && msg.error.length > 0) {
|
||||
ack.resolve({ ok: false, error: msg.error, permanent: classifyPermanent(msg.error) });
|
||||
} else {
|
||||
ack.resolve({ ok: true, messageId: String(msg.messageId ?? id) });
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === "peers_list") {
|
||||
const reqId = String(msg._reqId ?? "");
|
||||
const pending = this.peerListResolvers.get(reqId);
|
||||
if (pending) {
|
||||
this.peerListResolvers.delete(reqId);
|
||||
clearTimeout(pending.timer);
|
||||
pending.resolve(Array.isArray(msg.peers) ? (msg.peers as PeerSummary[]) : []);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === "push" || msg.type === "inbound") {
|
||||
this.opts.onPush?.(msg);
|
||||
return;
|
||||
}
|
||||
});
|
||||
|
||||
ws.on("close", (code, reason) => {
|
||||
if (this.helloTimer) { clearTimeout(this.helloTimer); this.helloTimer = null; }
|
||||
this.failPendingAcks(`broker_disconnected_${code}`);
|
||||
if (this.closed) { this.setConnStatus("closed"); return; }
|
||||
this.setConnStatus("reconnecting");
|
||||
const wait = BACKOFF_CAPS_MS[Math.min(this.reconnectAttempt, BACKOFF_CAPS_MS.length - 1)] ?? 30_000;
|
||||
this.reconnectAttempt++;
|
||||
this.log("info", "broker_reconnect_scheduled", { wait_ms: wait, code, reason: reason.toString("utf8") });
|
||||
this.reconnectTimer = setTimeout(() => this.connect().catch((err) => this.log("warn", "broker_reconnect_failed", { err: String(err) })), wait);
|
||||
// First connection failure also rejects the original connect() promise.
|
||||
if (this._status === "connecting") reject(new Error(`closed_before_hello_${code}`));
|
||||
});
|
||||
|
||||
ws.on("error", (err) => this.log("warn", "broker_ws_error", { err: err.message }));
|
||||
});
|
||||
}
|
||||
|
||||
/** Send one outbox row. Resolves on broker ack/timeout. */
|
||||
send(req: BrokerSendArgs): Promise<BrokerSendResult> {
|
||||
return new Promise<BrokerSendResult>((resolve) => {
|
||||
const dispatch = () => {
|
||||
if (!this.ws || this.ws.readyState !== this.ws.OPEN) {
|
||||
resolve({ ok: false, error: "broker_not_open", permanent: false });
|
||||
return;
|
||||
}
|
||||
const id = req.client_message_id;
|
||||
const timer = setTimeout(() => {
|
||||
if (this.pendingAcks.delete(id)) {
|
||||
resolve({ ok: false, error: "ack_timeout", permanent: false });
|
||||
}
|
||||
}, SEND_ACK_TIMEOUT_MS);
|
||||
this.pendingAcks.set(id, { resolve, timer });
|
||||
try {
|
||||
this.ws.send(JSON.stringify({
|
||||
type: "send",
|
||||
id, // legacy correlation id
|
||||
client_message_id: id, // forward-compat per spec §4.2
|
||||
request_fingerprint: req.request_fingerprint_hex,
|
||||
targetSpec: req.targetSpec,
|
||||
priority: req.priority,
|
||||
nonce: req.nonce,
|
||||
ciphertext: req.ciphertext,
|
||||
}));
|
||||
} catch (e) {
|
||||
this.pendingAcks.delete(id);
|
||||
clearTimeout(timer);
|
||||
resolve({ ok: false, error: `ws_write_failed: ${String(e)}`, permanent: false });
|
||||
}
|
||||
};
|
||||
|
||||
if (this._status === "open") dispatch();
|
||||
else this.opens.push(dispatch);
|
||||
});
|
||||
}
|
||||
|
||||
/** Ask the broker for the current peer list. */
|
||||
async listPeers(timeoutMs = 5_000): Promise<PeerSummary[]> {
|
||||
if (this._status !== "open" || !this.ws) return [];
|
||||
return new Promise<PeerSummary[]>((resolve) => {
|
||||
const reqId = `pl-${++this.reqCounter}`;
|
||||
const timer = setTimeout(() => {
|
||||
if (this.peerListResolvers.delete(reqId)) resolve([]);
|
||||
}, timeoutMs);
|
||||
this.peerListResolvers.set(reqId, { resolve, timer });
|
||||
try { this.ws!.send(JSON.stringify({ type: "list_peers", _reqId: reqId })); }
|
||||
catch { this.peerListResolvers.delete(reqId); clearTimeout(timer); resolve([]); }
|
||||
});
|
||||
}
|
||||
|
||||
/** Set the daemon's profile (avatar/title/bio/capabilities). Fire-and-forget. */
|
||||
setProfile(profile: { avatar?: string; title?: string; bio?: string; capabilities?: string[] }): void {
|
||||
if (this._status !== "open" || !this.ws) return;
|
||||
try { this.ws.send(JSON.stringify({ type: "set_profile", ...profile })); }
|
||||
catch { /* ignore */ }
|
||||
}
|
||||
|
||||
setSummary(summary: string): void {
|
||||
if (this._status !== "open" || !this.ws) return;
|
||||
try { this.ws.send(JSON.stringify({ type: "set_summary", summary })); }
|
||||
catch { /* ignore */ }
|
||||
}
|
||||
|
||||
setStatus(status: "idle" | "working" | "dnd"): void {
|
||||
if (this._status !== "open" || !this.ws) return;
|
||||
try { this.ws.send(JSON.stringify({ type: "set_status", status })); }
|
||||
catch { /* ignore */ }
|
||||
}
|
||||
|
||||
setVisible(visible: boolean): void {
|
||||
if (this._status !== "open" || !this.ws) return;
|
||||
try { this.ws.send(JSON.stringify({ type: "set_visible", visible })); }
|
||||
catch { /* ignore */ }
|
||||
}
|
||||
|
||||
async close(): Promise<void> {
|
||||
this.closed = true;
|
||||
if (this.reconnectTimer) { clearTimeout(this.reconnectTimer); this.reconnectTimer = null; }
|
||||
if (this.helloTimer) { clearTimeout(this.helloTimer); this.helloTimer = null; }
|
||||
this.failPendingAcks("daemon_shutdown");
|
||||
try { this.ws?.close(); } catch { /* ignore */ }
|
||||
this.setConnStatus("closed");
|
||||
}
|
||||
|
||||
getSessionKeys(): { sessionPubkey: string; sessionSecretKey: string } | null {
|
||||
if (!this.sessionPubkey || !this.sessionSecretKey) return null;
|
||||
return { sessionPubkey: this.sessionPubkey, sessionSecretKey: this.sessionSecretKey };
|
||||
}
|
||||
|
||||
private failPendingAcks(reason: string) {
|
||||
for (const [id, ack] of this.pendingAcks) {
|
||||
clearTimeout(ack.timer);
|
||||
ack.resolve({ ok: false, error: reason, permanent: false });
|
||||
this.pendingAcks.delete(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function defaultLog(level: "info" | "warn" | "error", msg: string, meta?: Record<string, unknown>) {
|
||||
const line = JSON.stringify({ level, msg, ...meta, ts: new Date().toISOString() });
|
||||
if (level === "info") process.stdout.write(line + "\n");
|
||||
else process.stderr.write(line + "\n");
|
||||
}
|
||||
|
||||
/** Heuristic: which broker errors are unrecoverable for this id. */
|
||||
function classifyPermanent(err: string): boolean {
|
||||
return /payload_too_large|forbidden|not_found|invalid|schema|auth|signature/i.test(err);
|
||||
}
|
||||
91
apps/cli/src/daemon/db/inbox.ts
Normal file
91
apps/cli/src/daemon/db/inbox.ts
Normal file
@@ -0,0 +1,91 @@
|
||||
// Inbox schema + accessors. Schema is the v0.9.0 spec §4.10 / v3 §4.5
|
||||
// content table; FTS5 index is deferred to the followups doc.
|
||||
|
||||
import type { SqliteDb } from "./sqlite.js";
|
||||
|
||||
export interface InboxRow {
|
||||
id: string;
|
||||
client_message_id: string;
|
||||
broker_message_id: string | null;
|
||||
mesh: string;
|
||||
topic: string | null;
|
||||
sender_pubkey: string;
|
||||
sender_name: string;
|
||||
body: string | null;
|
||||
meta: string | null;
|
||||
received_at: number;
|
||||
reply_to_id: string | null;
|
||||
}
|
||||
|
||||
export function migrateInbox(db: SqliteDb): void {
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS inbox (
|
||||
id TEXT PRIMARY KEY,
|
||||
client_message_id TEXT NOT NULL UNIQUE,
|
||||
broker_message_id TEXT,
|
||||
mesh TEXT NOT NULL,
|
||||
topic TEXT,
|
||||
sender_pubkey TEXT NOT NULL,
|
||||
sender_name TEXT NOT NULL,
|
||||
body TEXT,
|
||||
meta TEXT,
|
||||
received_at INTEGER NOT NULL,
|
||||
reply_to_id TEXT
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS inbox_received_at ON inbox(received_at);
|
||||
CREATE INDEX IF NOT EXISTS inbox_topic ON inbox(topic);
|
||||
CREATE INDEX IF NOT EXISTS inbox_sender ON inbox(sender_pubkey);
|
||||
`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Spec §4.5 insert path:
|
||||
* INSERT ... ON CONFLICT(client_message_id) DO NOTHING RETURNING id
|
||||
*
|
||||
* Returns the new row id when this was a fresh insert, or null when the
|
||||
* message id was already known (idempotent receive).
|
||||
*/
|
||||
export function insertIfNew(db: SqliteDb, row: Omit<InboxRow, "id"> & { id: string }): string | null {
|
||||
// node:sqlite does support RETURNING. bun:sqlite does too. We branch on
|
||||
// the row count instead so it works on both.
|
||||
const before = db.prepare(`SELECT id FROM inbox WHERE client_message_id = ?`).get<{ id: string }>(row.client_message_id);
|
||||
if (before) return null;
|
||||
db.prepare(`
|
||||
INSERT INTO inbox (
|
||||
id, client_message_id, broker_message_id, mesh, topic,
|
||||
sender_pubkey, sender_name, body, meta, received_at, reply_to_id
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(client_message_id) DO NOTHING
|
||||
`).run(
|
||||
row.id, row.client_message_id, row.broker_message_id, row.mesh, row.topic,
|
||||
row.sender_pubkey, row.sender_name, row.body, row.meta, row.received_at, row.reply_to_id,
|
||||
);
|
||||
// Confirm the insert landed (handles the conflict-noop race).
|
||||
const after = db.prepare(`SELECT id FROM inbox WHERE client_message_id = ?`).get<{ id: string }>(row.client_message_id);
|
||||
return after?.id === row.id ? row.id : null;
|
||||
}
|
||||
|
||||
export interface ListInboxParams {
|
||||
since?: number; // received_at >= since
|
||||
topic?: string;
|
||||
fromPubkey?: string;
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
export function listInbox(db: SqliteDb, p: ListInboxParams): InboxRow[] {
|
||||
const where: string[] = [];
|
||||
const args: unknown[] = [];
|
||||
if (p.since !== undefined) { where.push("received_at >= ?"); args.push(p.since); }
|
||||
if (p.topic !== undefined) { where.push("topic = ?"); args.push(p.topic); }
|
||||
if (p.fromPubkey !== undefined){ where.push("sender_pubkey = ?"); args.push(p.fromPubkey); }
|
||||
const sql = `
|
||||
SELECT id, client_message_id, broker_message_id, mesh, topic,
|
||||
sender_pubkey, sender_name, body, meta, received_at, reply_to_id
|
||||
FROM inbox
|
||||
${where.length ? "WHERE " + where.join(" AND ") : ""}
|
||||
ORDER BY received_at DESC
|
||||
LIMIT ?
|
||||
`;
|
||||
args.push(Math.min(Math.max(p.limit ?? 100, 1), 1000));
|
||||
return db.prepare(sql).all<InboxRow>(...args);
|
||||
}
|
||||
178
apps/cli/src/daemon/db/outbox.ts
Normal file
178
apps/cli/src/daemon/db/outbox.ts
Normal file
@@ -0,0 +1,178 @@
|
||||
// Outbox schema + accessors. Schema is the v0.9.0 spec §4.5.2 shape:
|
||||
// includes `aborted` status and audit columns from the v7 pull.
|
||||
|
||||
import type { SqliteDb } from "./sqlite.js";
|
||||
|
||||
export type OutboxStatus = "pending" | "inflight" | "done" | "dead" | "aborted";
|
||||
|
||||
export interface OutboxRow {
|
||||
id: string;
|
||||
client_message_id: string;
|
||||
request_fingerprint: Uint8Array;
|
||||
payload: Uint8Array;
|
||||
enqueued_at: number;
|
||||
attempts: number;
|
||||
next_attempt_at: number;
|
||||
status: OutboxStatus;
|
||||
last_error: string | null;
|
||||
delivered_at: number | null;
|
||||
broker_message_id: string | null;
|
||||
aborted_at: number | null;
|
||||
aborted_by: string | null;
|
||||
superseded_by: string | null;
|
||||
}
|
||||
|
||||
export function migrateOutbox(db: SqliteDb): void {
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS outbox (
|
||||
id TEXT PRIMARY KEY,
|
||||
client_message_id TEXT NOT NULL UNIQUE,
|
||||
request_fingerprint BLOB NOT NULL,
|
||||
payload BLOB NOT NULL,
|
||||
enqueued_at INTEGER NOT NULL,
|
||||
attempts INTEGER NOT NULL DEFAULT 0,
|
||||
next_attempt_at INTEGER NOT NULL,
|
||||
status TEXT NOT NULL CHECK(status IN
|
||||
('pending','inflight','done','dead','aborted')),
|
||||
last_error TEXT,
|
||||
delivered_at INTEGER,
|
||||
broker_message_id TEXT,
|
||||
aborted_at INTEGER,
|
||||
aborted_by TEXT,
|
||||
superseded_by TEXT
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS outbox_pending
|
||||
ON outbox(status, next_attempt_at);
|
||||
CREATE INDEX IF NOT EXISTS outbox_aborted
|
||||
ON outbox(status, aborted_at) WHERE status = 'aborted';
|
||||
`);
|
||||
}
|
||||
|
||||
export function findByClientId(db: SqliteDb, clientMessageId: string): OutboxRow | null {
|
||||
const row = db.prepare(`
|
||||
SELECT id, client_message_id, request_fingerprint, payload, enqueued_at,
|
||||
attempts, next_attempt_at, status, last_error, delivered_at,
|
||||
broker_message_id, aborted_at, aborted_by, superseded_by
|
||||
FROM outbox WHERE client_message_id = ?
|
||||
`).get<OutboxRow>(clientMessageId);
|
||||
return row ?? null;
|
||||
}
|
||||
|
||||
export interface InsertPendingInput {
|
||||
id: string;
|
||||
client_message_id: string;
|
||||
request_fingerprint: Uint8Array;
|
||||
payload: Uint8Array;
|
||||
now: number;
|
||||
}
|
||||
|
||||
export function insertPending(db: SqliteDb, input: InsertPendingInput): void {
|
||||
db.prepare(`
|
||||
INSERT INTO outbox (
|
||||
id, client_message_id, request_fingerprint, payload,
|
||||
enqueued_at, attempts, next_attempt_at, status
|
||||
) VALUES (?, ?, ?, ?, ?, 0, ?, 'pending')
|
||||
`).run(
|
||||
input.id,
|
||||
input.client_message_id,
|
||||
input.request_fingerprint,
|
||||
input.payload,
|
||||
input.now,
|
||||
input.now,
|
||||
);
|
||||
}
|
||||
|
||||
export function markAborted(db: SqliteDb, id: string, by: string, supersededBy: string | null, now: number): void {
|
||||
db.prepare(`
|
||||
UPDATE outbox SET status = 'aborted', aborted_at = ?, aborted_by = ?, superseded_by = ?
|
||||
WHERE id = ?
|
||||
`).run(now, by, supersededBy, id);
|
||||
}
|
||||
|
||||
export function fingerprintsEqual(a: Uint8Array, b: Uint8Array): boolean {
|
||||
if (a.length !== b.length) return false;
|
||||
let diff = 0;
|
||||
for (let i = 0; i < a.length; i++) diff |= (a[i]! ^ b[i]!);
|
||||
return diff === 0;
|
||||
}
|
||||
|
||||
export interface ListOutboxParams {
|
||||
status?: OutboxStatus;
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
export function listOutbox(db: SqliteDb, p: ListOutboxParams = {}): OutboxRow[] {
|
||||
const where: string[] = [];
|
||||
const args: unknown[] = [];
|
||||
if (p.status) { where.push("status = ?"); args.push(p.status); }
|
||||
const sql = `
|
||||
SELECT id, client_message_id, request_fingerprint, payload, enqueued_at,
|
||||
attempts, next_attempt_at, status, last_error, delivered_at,
|
||||
broker_message_id, aborted_at, aborted_by, superseded_by
|
||||
FROM outbox
|
||||
${where.length ? "WHERE " + where.join(" AND ") : ""}
|
||||
ORDER BY enqueued_at DESC
|
||||
LIMIT ?
|
||||
`;
|
||||
args.push(Math.min(Math.max(p.limit ?? 50, 1), 500));
|
||||
return db.prepare(sql).all<OutboxRow>(...args);
|
||||
}
|
||||
|
||||
export function findById(db: SqliteDb, id: string): OutboxRow | null {
|
||||
return db.prepare(`
|
||||
SELECT id, client_message_id, request_fingerprint, payload, enqueued_at,
|
||||
attempts, next_attempt_at, status, last_error, delivered_at,
|
||||
broker_message_id, aborted_at, aborted_by, superseded_by
|
||||
FROM outbox WHERE id = ?
|
||||
`).get<OutboxRow>(id) ?? null;
|
||||
}
|
||||
|
||||
export interface RequeueResult {
|
||||
abortedRowId: string;
|
||||
newRowId: string;
|
||||
newClientMessageId: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Operator recovery per spec §4.5.3 / §4.6.3. Atomically:
|
||||
* 1. Mark the existing row aborted (audit columns set, status flipped).
|
||||
* 2. Insert a fresh pending row reusing the same payload+fingerprint
|
||||
* under a new client_message_id.
|
||||
* 3. Wire superseded_by on the old row to the new row id.
|
||||
*
|
||||
* Returns null if the requested id doesn't exist or is already aborted/done.
|
||||
*/
|
||||
export function requeueDeadOrPending(
|
||||
db: SqliteDb,
|
||||
args: { id: string; newClientMessageId: string; newRowId: string; now: number; abortedBy: string },
|
||||
): RequeueResult | null {
|
||||
const existing = findById(db, args.id);
|
||||
if (!existing) return null;
|
||||
if (existing.status === "aborted" || existing.status === "done") return null;
|
||||
|
||||
db.prepare(`
|
||||
UPDATE outbox
|
||||
SET status = 'aborted', aborted_at = ?, aborted_by = ?, superseded_by = ?
|
||||
WHERE id = ? AND status IN ('pending','inflight','dead')
|
||||
`).run(args.now, args.abortedBy, args.newRowId, args.id);
|
||||
|
||||
db.prepare(`
|
||||
INSERT INTO outbox (
|
||||
id, client_message_id, request_fingerprint, payload,
|
||||
enqueued_at, attempts, next_attempt_at, status
|
||||
) VALUES (?, ?, ?, ?, ?, 0, ?, 'pending')
|
||||
`).run(
|
||||
args.newRowId,
|
||||
args.newClientMessageId,
|
||||
existing.request_fingerprint,
|
||||
existing.payload,
|
||||
args.now,
|
||||
args.now,
|
||||
);
|
||||
|
||||
return {
|
||||
abortedRowId: existing.id,
|
||||
newRowId: args.newRowId,
|
||||
newClientMessageId: args.newClientMessageId,
|
||||
};
|
||||
}
|
||||
76
apps/cli/src/daemon/db/sqlite.ts
Normal file
76
apps/cli/src/daemon/db/sqlite.ts
Normal file
@@ -0,0 +1,76 @@
|
||||
// SQLite shim. The daemon runs under Node 22.5+ in production (node:sqlite).
|
||||
// During local dev (bun src/entrypoints/cli.ts daemon up) we fall back to
|
||||
// bun:sqlite, which has a near-identical API surface for what we use.
|
||||
|
||||
export type SqliteDb = {
|
||||
prepare(sql: string): {
|
||||
run(...params: unknown[]): { changes: number; lastInsertRowid: number | bigint };
|
||||
get<T = unknown>(...params: unknown[]): T | undefined;
|
||||
all<T = unknown>(...params: unknown[]): T[];
|
||||
};
|
||||
exec(sql: string): void;
|
||||
close(): void;
|
||||
};
|
||||
|
||||
interface DatabaseCtor {
|
||||
new (path: string): SqliteDb;
|
||||
}
|
||||
|
||||
let cached: DatabaseCtor | null = null;
|
||||
|
||||
async function loadSqlite(): Promise<DatabaseCtor> {
|
||||
if (cached) return cached;
|
||||
|
||||
// Prefer node:sqlite (production runtime).
|
||||
try {
|
||||
const mod = (await import("node:sqlite")) as { DatabaseSync: DatabaseCtor };
|
||||
cached = mod.DatabaseSync;
|
||||
return cached;
|
||||
} catch (nodeErr) {
|
||||
// Dev path: bun:sqlite. Bun's Database has prepare/exec/close already.
|
||||
try {
|
||||
const bunMod = (await import("bun:sqlite")) as { Database: DatabaseCtor };
|
||||
cached = bunMod.Database;
|
||||
return cached;
|
||||
} catch {
|
||||
const msg = `claudemesh daemon requires Node.js 22.5+ for the embedded SQLite store ` +
|
||||
`(node:sqlite), or Bun (bun:sqlite) for dev. ` +
|
||||
`Current: ${process.version}. Original error: ${String(nodeErr)}`;
|
||||
throw new Error(msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function openSqlite(path: string): Promise<SqliteDb> {
|
||||
const Database = await loadSqlite();
|
||||
const db = new Database(path);
|
||||
// Default pragmas for daemon use:
|
||||
// journal_mode WAL — concurrent reads while one writer is in BEGIN IMMEDIATE.
|
||||
// synchronous NORMAL — balance durability/throughput; daemon is the only writer.
|
||||
// foreign_keys ON — enforce constraints if any are added later.
|
||||
// busy_timeout — let BEGIN IMMEDIATE wait briefly for a contending writer.
|
||||
db.exec(`
|
||||
PRAGMA journal_mode = WAL;
|
||||
PRAGMA synchronous = NORMAL;
|
||||
PRAGMA foreign_keys = ON;
|
||||
PRAGMA busy_timeout = 5000;
|
||||
`);
|
||||
return db;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run `fn` inside a `BEGIN IMMEDIATE` transaction. Per spec §4.5.1, this is
|
||||
* what serializes IPC accept against concurrent same-id requests; SQLite has
|
||||
* no row-level lock and `SELECT FOR UPDATE` is not supported.
|
||||
*/
|
||||
export function inImmediateTx<T>(db: SqliteDb, fn: () => T): T {
|
||||
db.exec("BEGIN IMMEDIATE");
|
||||
try {
|
||||
const out = fn();
|
||||
db.exec("COMMIT");
|
||||
return out;
|
||||
} catch (err) {
|
||||
try { db.exec("ROLLBACK"); } catch { /* ignore */ }
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
186
apps/cli/src/daemon/drain.ts
Normal file
186
apps/cli/src/daemon/drain.ts
Normal file
@@ -0,0 +1,186 @@
|
||||
// Outbox drain worker. Walks `outbox.pending` rows, sends them to the
|
||||
// broker via DaemonBrokerClient, and transitions row state per spec §4.6.1.
|
||||
//
|
||||
// Lifecycle per row:
|
||||
// pending → inflight → done (broker accepted)
|
||||
// → pending+backoff (transient broker error)
|
||||
// → dead (permanent broker error or
|
||||
// attempt cap reached)
|
||||
//
|
||||
// Wakeable: insertPending in the IPC handler can call wake() to skip the
|
||||
// idle interval. We use a simple promise-replacing pattern instead of a
|
||||
// pollable signal.
|
||||
|
||||
import type { SqliteDb } from "./db/sqlite.js";
|
||||
import type { DaemonBrokerClient } from "./broker.js";
|
||||
import type { OutboxStatus } from "./db/outbox.js";
|
||||
|
||||
const POLL_INTERVAL_MS = 500;
|
||||
const MAX_ATTEMPTS_PER_ROW = 25;
|
||||
const BACKOFF_BASE_MS = 500;
|
||||
const BACKOFF_CAP_MS = 30_000;
|
||||
|
||||
interface PendingRow {
|
||||
id: string;
|
||||
client_message_id: string;
|
||||
request_fingerprint: Uint8Array;
|
||||
payload: Uint8Array;
|
||||
attempts: number;
|
||||
}
|
||||
|
||||
export interface DrainOptions {
|
||||
db: SqliteDb;
|
||||
broker: DaemonBrokerClient;
|
||||
/** Stable peer-target the daemon impersonates for now. Sprint 4 routes
|
||||
* this from the per-row destination_kind/destination_ref. */
|
||||
log?: (level: "info" | "warn" | "error", msg: string, meta?: Record<string, unknown>) => void;
|
||||
}
|
||||
|
||||
export interface DrainHandle {
|
||||
wake(): void;
|
||||
close(): Promise<void>;
|
||||
}
|
||||
|
||||
export function startDrainWorker(opts: DrainOptions): DrainHandle {
|
||||
const log = opts.log ?? defaultLog;
|
||||
let stopped = false;
|
||||
let wakeResolve: (() => void) | null = null;
|
||||
let wakePromise = new Promise<void>((r) => { wakeResolve = r; });
|
||||
|
||||
const wake = () => {
|
||||
if (wakeResolve) {
|
||||
const r = wakeResolve;
|
||||
wakeResolve = null;
|
||||
r();
|
||||
}
|
||||
};
|
||||
|
||||
const tick = async () => {
|
||||
while (!stopped) {
|
||||
try { await drainOnce(opts, log); }
|
||||
catch (e) { log("warn", "drain_tick_failed", { err: String(e) }); }
|
||||
// Sleep up to POLL_INTERVAL_MS, but wake immediately on signal.
|
||||
await Promise.race([
|
||||
wakePromise,
|
||||
new Promise<void>((r) => setTimeout(r, POLL_INTERVAL_MS)),
|
||||
]);
|
||||
// Reset wake promise after each loop.
|
||||
wakePromise = new Promise<void>((r) => { wakeResolve = r; });
|
||||
}
|
||||
};
|
||||
|
||||
void tick();
|
||||
|
||||
return {
|
||||
wake,
|
||||
close: async () => { stopped = true; wake(); },
|
||||
};
|
||||
}
|
||||
|
||||
async function drainOnce(opts: DrainOptions, log: NonNullable<DrainOptions["log"]>): Promise<void> {
|
||||
const now = Date.now();
|
||||
const rows = opts.db.prepare(`
|
||||
SELECT id, client_message_id, request_fingerprint, payload, attempts
|
||||
FROM outbox
|
||||
WHERE status = 'pending' AND next_attempt_at <= ?
|
||||
ORDER BY enqueued_at
|
||||
LIMIT 32
|
||||
`).all<PendingRow>(now);
|
||||
|
||||
if (rows.length === 0) return;
|
||||
|
||||
for (const row of rows) {
|
||||
if (markInflight(opts.db, row.id, now) === 0) continue; // raced with another drainer
|
||||
const fpHex = bufferToHex(row.request_fingerprint);
|
||||
|
||||
// For v0.9.0-against-legacy-broker the daemon doesn't yet route by
|
||||
// destination_kind/ref — we send the raw payload as a *self*-target so
|
||||
// the broker accepts it for round-tripping. Sprint 4 reads the actual
|
||||
// destination from the outbox row and encrypts/routes properly. The
|
||||
// important thing here is that the row transitions correctly.
|
||||
const sessionKeys = opts.broker.getSessionKeys();
|
||||
const targetSpec = "*"; // broadcast — leaves shape valid pre-routing
|
||||
const nonce = await randomNonce();
|
||||
const ciphertext = Buffer.from(row.payload).toString("base64");
|
||||
|
||||
let res;
|
||||
try {
|
||||
res = await opts.broker.send({
|
||||
targetSpec,
|
||||
priority: "next",
|
||||
nonce,
|
||||
ciphertext,
|
||||
client_message_id: row.client_message_id,
|
||||
request_fingerprint_hex: fpHex,
|
||||
});
|
||||
} catch (e) {
|
||||
log("warn", "drain_send_threw", { id: row.id, err: String(e) });
|
||||
backoffPending(opts.db, row.id, row.attempts + 1, "exception", String(e));
|
||||
continue;
|
||||
}
|
||||
void sessionKeys; // silence unused for now
|
||||
|
||||
if (res.ok) {
|
||||
markDone(opts.db, row.id, res.messageId, Date.now());
|
||||
} else if (res.permanent) {
|
||||
log("warn", "drain_permanent_failure", { id: row.id, err: res.error });
|
||||
markDead(opts.db, row.id, res.error);
|
||||
} else if (row.attempts + 1 >= MAX_ATTEMPTS_PER_ROW) {
|
||||
log("warn", "drain_max_attempts", { id: row.id, err: res.error });
|
||||
markDead(opts.db, row.id, `max_attempts: ${res.error}`);
|
||||
} else {
|
||||
backoffPending(opts.db, row.id, row.attempts + 1, "retry", res.error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function markInflight(db: SqliteDb, id: string, now: number): number {
|
||||
return Number(db.prepare(`
|
||||
UPDATE outbox
|
||||
SET status = 'inflight', attempts = attempts + 1, next_attempt_at = ?
|
||||
WHERE id = ? AND status = 'pending'
|
||||
`).run(now + BACKOFF_CAP_MS, id).changes);
|
||||
}
|
||||
|
||||
function markDone(db: SqliteDb, id: string, brokerMessageId: string, now: number) {
|
||||
db.prepare(`
|
||||
UPDATE outbox
|
||||
SET status = 'done', delivered_at = ?, broker_message_id = ?, last_error = NULL
|
||||
WHERE id = ?
|
||||
`).run(now, brokerMessageId, id);
|
||||
}
|
||||
|
||||
function markDead(db: SqliteDb, id: string, err: string) {
|
||||
db.prepare(`UPDATE outbox SET status = 'dead', last_error = ? WHERE id = ?`).run(err, id);
|
||||
}
|
||||
|
||||
function backoffPending(db: SqliteDb, id: string, attempts: number, _kind: string, err: string) {
|
||||
const wait = Math.min(BACKOFF_CAP_MS, BACKOFF_BASE_MS * (2 ** Math.min(attempts, 12)));
|
||||
const next = Date.now() + wait;
|
||||
db.prepare(`
|
||||
UPDATE outbox
|
||||
SET status = 'pending', attempts = ?, next_attempt_at = ?, last_error = ?
|
||||
WHERE id = ?
|
||||
`).run(attempts, next, err, id);
|
||||
}
|
||||
|
||||
function bufferToHex(b: Uint8Array): string {
|
||||
let s = "";
|
||||
for (let i = 0; i < b.length; i++) s += b[i]!.toString(16).padStart(2, "0");
|
||||
return s;
|
||||
}
|
||||
|
||||
async function randomNonce(): Promise<string> {
|
||||
const { randomBytes } = await import("node:crypto");
|
||||
return randomBytes(24).toString("base64");
|
||||
}
|
||||
|
||||
function defaultLog(level: "info" | "warn" | "error", msg: string, meta?: Record<string, unknown>) {
|
||||
const line = JSON.stringify({ level, msg, ...meta, ts: new Date().toISOString() });
|
||||
if (level === "info") process.stdout.write(line + "\n");
|
||||
else process.stderr.write(line + "\n");
|
||||
}
|
||||
|
||||
// Suppress unused-status warning under strict tsc:
|
||||
const _statuses: OutboxStatus[] = ["pending", "inflight", "done", "dead", "aborted"];
|
||||
void _statuses;
|
||||
69
apps/cli/src/daemon/events.ts
Normal file
69
apps/cli/src/daemon/events.ts
Normal file
@@ -0,0 +1,69 @@
|
||||
// Lightweight in-process event bus + SSE writer. Used by /v1/events SSE
|
||||
// stream and consumed by hooks (post-v0.9.0).
|
||||
|
||||
import type { ServerResponse } from "node:http";
|
||||
|
||||
export type DaemonEventKind =
|
||||
| "message"
|
||||
| "peer_join"
|
||||
| "peer_leave"
|
||||
| "broker_status"
|
||||
| "system";
|
||||
|
||||
export interface DaemonEvent {
|
||||
kind: DaemonEventKind;
|
||||
ts: string;
|
||||
data: Record<string, unknown>;
|
||||
}
|
||||
|
||||
type Subscriber = (e: DaemonEvent) => void;
|
||||
|
||||
export class EventBus {
|
||||
private subs = new Set<Subscriber>();
|
||||
|
||||
publish(kind: DaemonEventKind, data: Record<string, unknown>): void {
|
||||
const e: DaemonEvent = { kind, ts: new Date().toISOString(), data };
|
||||
for (const s of this.subs) {
|
||||
try { s(e); } catch { /* one bad subscriber must not poison the rest */ }
|
||||
}
|
||||
}
|
||||
|
||||
subscribe(fn: Subscriber): () => void {
|
||||
this.subs.add(fn);
|
||||
return () => this.subs.delete(fn);
|
||||
}
|
||||
}
|
||||
|
||||
/** Write an event to an open SSE response. */
|
||||
export function writeSse(res: ServerResponse, e: DaemonEvent, idCounter: number): void {
|
||||
res.write(`id: ${idCounter}\n`);
|
||||
res.write(`event: ${e.kind}\n`);
|
||||
res.write(`data: ${JSON.stringify({ ts: e.ts, ...e.data })}\n\n`);
|
||||
}
|
||||
|
||||
/** Open an SSE stream on the response and route bus events to it. */
|
||||
export function bindSseStream(res: ServerResponse, bus: EventBus): () => void {
|
||||
res.statusCode = 200;
|
||||
res.setHeader("Content-Type", "text/event-stream");
|
||||
res.setHeader("Cache-Control", "no-cache, no-transform");
|
||||
res.setHeader("Connection", "keep-alive");
|
||||
res.setHeader("X-Accel-Buffering", "no");
|
||||
res.write(": connected\n\n");
|
||||
|
||||
let counter = 0;
|
||||
const unsubscribe = bus.subscribe((e) => writeSse(res, e, ++counter));
|
||||
|
||||
const heartbeat = setInterval(() => {
|
||||
try { res.write(": keepalive\n\n"); }
|
||||
catch { /* socket already torn down; cleanup handled below */ }
|
||||
}, 15_000);
|
||||
|
||||
const cleanup = () => {
|
||||
clearInterval(heartbeat);
|
||||
unsubscribe();
|
||||
try { res.end(); } catch { /* ignore */ }
|
||||
};
|
||||
res.on("close", cleanup);
|
||||
res.on("error", cleanup);
|
||||
return cleanup;
|
||||
}
|
||||
71
apps/cli/src/daemon/fingerprint.ts
Normal file
71
apps/cli/src/daemon/fingerprint.ts
Normal file
@@ -0,0 +1,71 @@
|
||||
// Canonical request fingerprint per spec §4.4.
|
||||
//
|
||||
// request_fingerprint = sha256(
|
||||
// envelope_version || 0x00 ||
|
||||
// destination_kind || 0x00 ||
|
||||
// destination_ref || 0x00 ||
|
||||
// reply_to_id_or_empty || 0x00 ||
|
||||
// priority || 0x00 ||
|
||||
// meta_canonical_json || 0x00 ||
|
||||
// body_hash
|
||||
// )
|
||||
|
||||
import { createHash } from "node:crypto";
|
||||
|
||||
export type DestKind = "topic" | "dm" | "queue";
|
||||
export type Priority = "now" | "next" | "low";
|
||||
|
||||
export interface SendRequestForFingerprint {
|
||||
envelope_version: number;
|
||||
destination_kind: DestKind;
|
||||
destination_ref: string;
|
||||
reply_to_id?: string | null;
|
||||
priority: Priority;
|
||||
meta?: Record<string, unknown> | null;
|
||||
/** UTF-8 body bytes. */
|
||||
body: Uint8Array;
|
||||
}
|
||||
|
||||
const NUL = Buffer.from([0]);
|
||||
|
||||
export function computeRequestFingerprint(req: SendRequestForFingerprint): Buffer {
|
||||
const h = createHash("sha256");
|
||||
h.update(String(req.envelope_version), "utf8"); h.update(NUL);
|
||||
h.update(req.destination_kind, "utf8"); h.update(NUL);
|
||||
h.update(req.destination_ref, "utf8"); h.update(NUL);
|
||||
h.update(req.reply_to_id ?? "", "utf8"); h.update(NUL);
|
||||
h.update(req.priority, "utf8"); h.update(NUL);
|
||||
h.update(req.meta ? canonicalJson(req.meta) : "", "utf8");
|
||||
h.update(NUL);
|
||||
h.update(createHash("sha256").update(req.body).digest());
|
||||
return h.digest();
|
||||
}
|
||||
|
||||
/**
|
||||
* Minimal JCS-like canonicalization: sort object keys, no whitespace, no
|
||||
* non-ASCII escape funny business. Sufficient for v0.9.0 (TS-only).
|
||||
* Cross-language SDK ports get a vetted JCS lib + conformance tests
|
||||
* (deferred per followups doc).
|
||||
*/
|
||||
export function canonicalJson(value: unknown): string {
|
||||
return JSON.stringify(sortKeys(value));
|
||||
}
|
||||
|
||||
function sortKeys(value: unknown): unknown {
|
||||
if (Array.isArray(value)) return value.map(sortKeys);
|
||||
if (value !== null && typeof value === "object") {
|
||||
const obj = value as Record<string, unknown>;
|
||||
const out: Record<string, unknown> = {};
|
||||
for (const k of Object.keys(obj).sort()) out[k] = sortKeys(obj[k]);
|
||||
return out;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
export function fingerprintHexPrefix(fp: Uint8Array, bytes = 8): string {
|
||||
let s = "";
|
||||
for (let i = 0; i < bytes && i < fp.length; i++) {
|
||||
s += fp[i]!.toString(16).padStart(2, "0");
|
||||
}
|
||||
return s;
|
||||
}
|
||||
123
apps/cli/src/daemon/identity.ts
Normal file
123
apps/cli/src/daemon/identity.ts
Normal file
@@ -0,0 +1,123 @@
|
||||
// Accidental-clone detection per spec §2.2. Catches restored backups
|
||||
// and copy-pasted homedirs by comparing a stable host fingerprint
|
||||
// against the one we wrote at first daemon start.
|
||||
//
|
||||
// NOT attacker-grade: anyone copying both the keypair AND the
|
||||
// host_fingerprint defeats this. Threat model §16 says so explicitly.
|
||||
|
||||
import { existsSync, readFileSync, writeFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { createHash, randomUUID } from "node:crypto";
|
||||
import { networkInterfaces } from "node:os";
|
||||
|
||||
import { DAEMON_PATHS } from "./paths.js";
|
||||
|
||||
export type ClonePolicy = "refuse" | "warn" | "allow";
|
||||
|
||||
export interface FingerprintRecord {
|
||||
schema_version: 1;
|
||||
fingerprint: string; // sha256 hex
|
||||
host_id: string; // raw, for diagnostics
|
||||
stable_mac: string; // raw, for diagnostics
|
||||
written_at: string; // ISO date
|
||||
}
|
||||
|
||||
export interface FingerprintCheck {
|
||||
result: "first_run" | "match" | "mismatch" | "unavailable";
|
||||
current: FingerprintRecord;
|
||||
stored?: FingerprintRecord;
|
||||
}
|
||||
|
||||
const FILE_NAME = "host_fingerprint.json";
|
||||
|
||||
function path(): string { return join(DAEMON_PATHS.DAEMON_DIR, FILE_NAME); }
|
||||
|
||||
/** Compute (without writing) the current host fingerprint. */
|
||||
export function computeCurrentFingerprint(): FingerprintRecord {
|
||||
// Per spec §2.2 / followups doc: when neither host_id nor a stable MAC
|
||||
// are readable we fall back to a persisted random UUID. We DO NOT mint
|
||||
// a fresh random per call (that would make every restart look like a
|
||||
// clone). Instead, leave host_id empty when unknown — the MAC alone
|
||||
// identifies the host for accidental-clone detection.
|
||||
const host_id = readHostId() ?? "";
|
||||
const stable_mac = pickStableMac() ?? "";
|
||||
const fp = createHash("sha256").update(host_id, "utf8").update("\0").update(stable_mac, "utf8").digest("hex");
|
||||
return {
|
||||
schema_version: 1,
|
||||
fingerprint: fp,
|
||||
host_id,
|
||||
stable_mac,
|
||||
written_at: new Date().toISOString(),
|
||||
};
|
||||
}
|
||||
|
||||
// `randomUUID` is no longer used after the random-fallback fix; keep the
|
||||
// import only if other helpers need it.
|
||||
void randomUUID;
|
||||
|
||||
/** Read or write the persisted fingerprint and report the result. */
|
||||
export function checkFingerprint(): FingerprintCheck {
|
||||
const current = computeCurrentFingerprint();
|
||||
if (!existsSync(path())) {
|
||||
writeFileSync(path(), JSON.stringify(current, null, 2), { mode: 0o600 });
|
||||
return { result: "first_run", current };
|
||||
}
|
||||
let stored: FingerprintRecord;
|
||||
try { stored = JSON.parse(readFileSync(path(), "utf8")) as FingerprintRecord; }
|
||||
catch { return { result: "unavailable", current }; }
|
||||
if (stored.fingerprint === current.fingerprint) return { result: "match", current, stored };
|
||||
return { result: "mismatch", current, stored };
|
||||
}
|
||||
|
||||
/** Re-write the fingerprint file. Used by `daemon accept-host`. */
|
||||
export function acceptCurrentHost(): FingerprintRecord {
|
||||
const current = computeCurrentFingerprint();
|
||||
writeFileSync(path(), JSON.stringify(current, null, 2), { mode: 0o600 });
|
||||
return current;
|
||||
}
|
||||
|
||||
// ── platform helpers ───────────────────────────────────────────────────
|
||||
|
||||
function readHostId(): string | null {
|
||||
// Linux: /etc/machine-id (or /var/lib/dbus/machine-id).
|
||||
if (process.platform === "linux") {
|
||||
for (const p of ["/etc/machine-id", "/var/lib/dbus/machine-id"]) {
|
||||
try {
|
||||
const raw = readFileSync(p, "utf8").trim();
|
||||
if (raw) return `linux:${raw}`;
|
||||
} catch { /* try next */ }
|
||||
}
|
||||
return null;
|
||||
}
|
||||
// macOS: IOPlatformUUID via ioreg. We avoid spawning by checking ENV.
|
||||
if (process.platform === "darwin") {
|
||||
// No reliable file; fall back to MAC-only fingerprint.
|
||||
return null;
|
||||
}
|
||||
// Windows: HKLM\SOFTWARE\Microsoft\Cryptography\MachineGuid. Skip in v0.9.0.
|
||||
return null;
|
||||
}
|
||||
|
||||
function pickStableMac(): string | null {
|
||||
const ifs = networkInterfaces();
|
||||
const candidates: string[] = [];
|
||||
for (const [name, addrs] of Object.entries(ifs)) {
|
||||
if (!addrs) continue;
|
||||
if (isIgnoredInterface(name)) continue;
|
||||
for (const a of addrs) {
|
||||
if (a.internal) continue;
|
||||
if (!a.mac || a.mac === "00:00:00:00:00:00") continue;
|
||||
candidates.push(`${name}::${a.mac}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (candidates.length === 0) return null;
|
||||
candidates.sort(); // lex by interface name
|
||||
const first = candidates[0]!;
|
||||
const idx = first.indexOf("::");
|
||||
return idx >= 0 ? first.slice(idx + 2) : first;
|
||||
}
|
||||
|
||||
function isIgnoredInterface(name: string): boolean {
|
||||
return /^(lo|docker|br-|veth|tap|tun|tailscale|wg|utun|ppp|vboxnet|vmnet|awdl|llw)/i.test(name);
|
||||
}
|
||||
128
apps/cli/src/daemon/inbound.ts
Normal file
128
apps/cli/src/daemon/inbound.ts
Normal file
@@ -0,0 +1,128 @@
|
||||
// Decode incoming broker pushes and dedupe-insert them into the daemon
|
||||
// inbox. Publishes a `message` event to the daemon's event bus on every
|
||||
// new row (idempotent receives suppress the event).
|
||||
|
||||
import { randomUUID } from "node:crypto";
|
||||
|
||||
import type { SqliteDb } from "./db/sqlite.js";
|
||||
import { insertIfNew } from "./db/inbox.js";
|
||||
import type { EventBus } from "./events.js";
|
||||
import { decryptDirect } from "~/services/crypto/facade.js";
|
||||
|
||||
export interface InboundContext {
|
||||
db: SqliteDb;
|
||||
bus: EventBus;
|
||||
meshSlug: string;
|
||||
/** Daemon's mesh secret key hex, used to decrypt sealed DMs. */
|
||||
recipientSecretKeyHex?: string;
|
||||
/** Daemon's session secret key hex (rotates per connect). When the
|
||||
* sender encrypted to our session pubkey, decrypt with this instead. */
|
||||
sessionSecretKeyHex?: string;
|
||||
log?: (level: "info" | "warn" | "error", msg: string, meta?: Record<string, unknown>) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
* Spec §4.5: dedupe by `client_message_id` (broker echoes it from the
|
||||
* sender's daemon). When the broker doesn't yet propagate the field
|
||||
* (Sprint 7 prereq), fall back to the broker's `messageId` as the
|
||||
* dedupe key — at-least-once still holds; we just lose the
|
||||
* sender-attested form.
|
||||
*/
|
||||
export async function handleBrokerPush(msg: Record<string, unknown>, ctx: InboundContext): Promise<void> {
|
||||
// System/topology pushes (peer_join, tick, …) — emit verbatim.
|
||||
if (msg.subtype === "system" && typeof msg.event === "string") {
|
||||
ctx.bus.publish(mapSystemEventKind(msg.event), {
|
||||
mesh: ctx.meshSlug,
|
||||
event: msg.event,
|
||||
...(msg.eventData as Record<string, unknown> | undefined ?? {}),
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type !== "push") return;
|
||||
|
||||
const brokerMessageId = stringOrNull(msg.messageId);
|
||||
const senderPubkey = stringOrNull(msg.senderPubkey) ?? "";
|
||||
const senderName = stringOrNull(msg.senderName) ?? senderPubkey.slice(0, 8);
|
||||
const topic = stringOrNull(msg.topic);
|
||||
const replyToId = stringOrNull(msg.replyToId);
|
||||
const ciphertext = stringOrNull(msg.ciphertext) ?? "";
|
||||
const nonce = stringOrNull(msg.nonce) ?? "";
|
||||
const createdAt = stringOrNull(msg.createdAt);
|
||||
// Forward-compat: Sprint 7 brokers will send client_message_id alongside.
|
||||
const clientMessageId = stringOrNull(msg.client_message_id) ?? brokerMessageId ?? randomUUID();
|
||||
const body = await decryptOrFallback({
|
||||
ciphertext, nonce, senderPubkey, ctx,
|
||||
});
|
||||
|
||||
const id = randomUUID();
|
||||
const inserted = insertIfNew(ctx.db, {
|
||||
id,
|
||||
client_message_id: clientMessageId,
|
||||
broker_message_id: brokerMessageId,
|
||||
mesh: ctx.meshSlug,
|
||||
topic,
|
||||
sender_pubkey: senderPubkey,
|
||||
sender_name: senderName,
|
||||
body,
|
||||
meta: createdAt ? JSON.stringify({ created_at: createdAt }) : null,
|
||||
received_at: Date.now(),
|
||||
reply_to_id: replyToId,
|
||||
});
|
||||
|
||||
if (!inserted) return; // already had this id; no event
|
||||
|
||||
ctx.bus.publish("message", {
|
||||
id,
|
||||
mesh: ctx.meshSlug,
|
||||
client_message_id: clientMessageId,
|
||||
broker_message_id: brokerMessageId,
|
||||
sender_pubkey: senderPubkey,
|
||||
sender_name: senderName,
|
||||
topic,
|
||||
reply_to_id: replyToId,
|
||||
body,
|
||||
created_at: createdAt,
|
||||
});
|
||||
}
|
||||
|
||||
async function decryptOrFallback(args: {
|
||||
ciphertext: string;
|
||||
nonce: string;
|
||||
senderPubkey: string;
|
||||
ctx: InboundContext;
|
||||
}): Promise<string | null> {
|
||||
const { ciphertext, nonce, senderPubkey, ctx } = args;
|
||||
if (!ciphertext) return null;
|
||||
|
||||
// Try DM decrypt first (sender used crypto_box against our session/member key).
|
||||
if (nonce && senderPubkey) {
|
||||
const envelope = { nonce, ciphertext };
|
||||
// Try session key (sender encrypted to our session pubkey, the common case).
|
||||
if (ctx.sessionSecretKeyHex) {
|
||||
const pt = await decryptDirect(envelope, senderPubkey, ctx.sessionSecretKeyHex);
|
||||
if (pt !== null) return pt;
|
||||
}
|
||||
// Fall back to member key (sender encrypted to our stable mesh pubkey).
|
||||
if (ctx.recipientSecretKeyHex) {
|
||||
const pt = await decryptDirect(envelope, senderPubkey, ctx.recipientSecretKeyHex);
|
||||
if (pt !== null) return pt;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: broadcast/topic posts are base64 plaintext (existing CLI
|
||||
// pre-encryption convention for `*` and `@topic`). Sprint 7+ adds per-
|
||||
// topic symmetric keys.
|
||||
try { return Buffer.from(ciphertext, "base64").toString("utf8"); }
|
||||
catch (e) { ctx.log?.("warn", "inbound_b64_decode_failed", { err: String(e) }); return null; }
|
||||
}
|
||||
|
||||
function stringOrNull(v: unknown): string | null {
|
||||
return typeof v === "string" && v.length > 0 ? v : null;
|
||||
}
|
||||
|
||||
function mapSystemEventKind(event: string): "peer_join" | "peer_leave" | "system" {
|
||||
if (event === "peer_joined") return "peer_join";
|
||||
if (event === "peer_left") return "peer_leave";
|
||||
return "system";
|
||||
}
|
||||
68
apps/cli/src/daemon/ipc/client.ts
Normal file
68
apps/cli/src/daemon/ipc/client.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
import { request as httpRequest } from "node:http";
|
||||
|
||||
import { DAEMON_PATHS, DAEMON_TCP_HOST, DAEMON_TCP_DEFAULT_PORT } from "../paths.js";
|
||||
import { readLocalToken } from "../local-token.js";
|
||||
|
||||
export interface IpcRequestOptions {
|
||||
method?: "GET" | "POST" | "PATCH" | "DELETE";
|
||||
path: string;
|
||||
body?: unknown;
|
||||
/** Force TCP loopback instead of UDS (for tests / cross-container scenarios). */
|
||||
preferTcp?: boolean;
|
||||
timeoutMs?: number;
|
||||
}
|
||||
|
||||
export interface IpcResponse<T = unknown> {
|
||||
status: number;
|
||||
body: T;
|
||||
}
|
||||
|
||||
export class IpcError extends Error {
|
||||
constructor(public status: number, public payload: unknown, msg: string) {
|
||||
super(msg);
|
||||
}
|
||||
}
|
||||
|
||||
/** Small, dependency-free IPC client for talking to the local daemon. */
|
||||
export async function ipc<T = unknown>(opts: IpcRequestOptions): Promise<IpcResponse<T>> {
|
||||
const useTcp = !!opts.preferTcp;
|
||||
const headers: Record<string, string> = {
|
||||
accept: "application/json",
|
||||
host: "localhost",
|
||||
};
|
||||
|
||||
let bodyBuf: Buffer | undefined;
|
||||
if (opts.body !== undefined) {
|
||||
bodyBuf = Buffer.from(JSON.stringify(opts.body), "utf8");
|
||||
headers["content-type"] = "application/json";
|
||||
headers["content-length"] = String(bodyBuf.length);
|
||||
}
|
||||
|
||||
if (useTcp) {
|
||||
const tok = readLocalToken();
|
||||
if (!tok) throw new IpcError(0, null, "daemon local token not found; is the daemon running?");
|
||||
headers.authorization = `Bearer ${tok}`;
|
||||
}
|
||||
|
||||
return new Promise<IpcResponse<T>>((resolve, reject) => {
|
||||
const req = httpRequest(
|
||||
useTcp
|
||||
? { host: DAEMON_TCP_HOST, port: DAEMON_TCP_DEFAULT_PORT, path: opts.path, method: opts.method ?? "GET", headers }
|
||||
: { socketPath: DAEMON_PATHS.SOCK_FILE, path: opts.path, method: opts.method ?? "GET", headers },
|
||||
(res) => {
|
||||
const chunks: Buffer[] = [];
|
||||
res.on("data", (c) => chunks.push(c));
|
||||
res.on("end", () => {
|
||||
const raw = Buffer.concat(chunks).toString("utf8");
|
||||
let parsed: unknown = raw;
|
||||
try { parsed = raw.length > 0 ? JSON.parse(raw) : null; } catch { /* leave raw */ }
|
||||
resolve({ status: res.statusCode ?? 0, body: parsed as T });
|
||||
});
|
||||
},
|
||||
);
|
||||
req.setTimeout(opts.timeoutMs ?? 5_000, () => req.destroy(new Error("ipc_timeout")));
|
||||
req.on("error", (err) => reject(err));
|
||||
if (bodyBuf) req.write(bodyBuf);
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
150
apps/cli/src/daemon/ipc/handlers/send.ts
Normal file
150
apps/cli/src/daemon/ipc/handlers/send.ts
Normal file
@@ -0,0 +1,150 @@
|
||||
// IPC accept handler for POST /v1/send. Implements the §4.5.1 lookup table:
|
||||
// daemon-local idempotency over outbox states × fingerprint match/mismatch.
|
||||
//
|
||||
// Broker delivery (drain → broker WS) is a separate concern and not part of
|
||||
// this handler — this only serializes the daemon-local accept.
|
||||
|
||||
import { randomUUID } from "node:crypto";
|
||||
|
||||
import {
|
||||
findByClientId,
|
||||
fingerprintsEqual,
|
||||
insertPending,
|
||||
type OutboxRow,
|
||||
} from "../../db/outbox.js";
|
||||
import { inImmediateTx, type SqliteDb } from "../../db/sqlite.js";
|
||||
import {
|
||||
computeRequestFingerprint,
|
||||
fingerprintHexPrefix,
|
||||
type DestKind,
|
||||
type Priority,
|
||||
} from "../../fingerprint.js";
|
||||
|
||||
export interface SendRequest {
|
||||
to: string; // peer name | pubkey hex | @group | * | topic name
|
||||
message: string;
|
||||
priority?: Priority;
|
||||
meta?: Record<string, unknown>;
|
||||
reply_to_id?: string;
|
||||
/** Optional caller-supplied id. Wins over Idempotency-Key header. */
|
||||
client_message_id?: string;
|
||||
/** Destination kind + ref must be supplied by the IPC layer after parsing `to`. */
|
||||
destination_kind: DestKind;
|
||||
destination_ref: string;
|
||||
}
|
||||
|
||||
export type AcceptOutcome =
|
||||
| { kind: "accepted_pending"; status: 202; client_message_id: string }
|
||||
| { kind: "accepted_inflight"; status: 202; client_message_id: string }
|
||||
| { kind: "accepted_done"; status: 200; client_message_id: string; broker_message_id: string | null }
|
||||
| { kind: "conflict"; status: 409; reason: string; daemon_fingerprint_prefix: string; broker_message_id?: string | null };
|
||||
|
||||
export interface AcceptDeps {
|
||||
db: SqliteDb;
|
||||
/** Override for testing. */
|
||||
now?: () => number;
|
||||
/** Override for testing. */
|
||||
newId?: () => string;
|
||||
}
|
||||
|
||||
export const ENVELOPE_VERSION = 1;
|
||||
|
||||
/**
|
||||
* Daemon-local idempotency: serialized via BEGIN IMMEDIATE so concurrent
|
||||
* IPC requests with the same client_message_id produce one outcome.
|
||||
*/
|
||||
export function acceptSend(req: SendRequest, deps: AcceptDeps): AcceptOutcome {
|
||||
const now = (deps.now ?? Date.now)();
|
||||
const newId = deps.newId ?? randomUUID;
|
||||
|
||||
// Per spec, caller-supplied client_message_id wins; otherwise daemon mints one.
|
||||
const clientId = req.client_message_id?.trim() || ulidLike(newId);
|
||||
|
||||
const body = Buffer.from(req.message, "utf8");
|
||||
const fingerprint = computeRequestFingerprint({
|
||||
envelope_version: ENVELOPE_VERSION,
|
||||
destination_kind: req.destination_kind,
|
||||
destination_ref: req.destination_ref,
|
||||
reply_to_id: req.reply_to_id ?? null,
|
||||
priority: req.priority ?? "next",
|
||||
meta: req.meta ?? null,
|
||||
body,
|
||||
});
|
||||
|
||||
return inImmediateTx(deps.db, () => {
|
||||
const existing = findByClientId(deps.db, clientId);
|
||||
if (!existing) {
|
||||
insertPending(deps.db, {
|
||||
id: newId(),
|
||||
client_message_id: clientId,
|
||||
request_fingerprint: fingerprint,
|
||||
payload: body,
|
||||
now,
|
||||
});
|
||||
return { kind: "accepted_pending", status: 202, client_message_id: clientId };
|
||||
}
|
||||
|
||||
return decideForExistingRow(existing, fingerprint);
|
||||
});
|
||||
}
|
||||
|
||||
function decideForExistingRow(row: OutboxRow, fp: Buffer): AcceptOutcome {
|
||||
const match = fingerprintsEqual(fp, row.request_fingerprint);
|
||||
const fpPrefix = fingerprintHexPrefix(fp);
|
||||
|
||||
// Spec §4.5.1 lookup table.
|
||||
switch (row.status) {
|
||||
case "pending":
|
||||
return match
|
||||
? { kind: "accepted_pending", status: 202, client_message_id: row.client_message_id }
|
||||
: conflict("outbox_pending_fingerprint_mismatch", fpPrefix);
|
||||
|
||||
case "inflight":
|
||||
return match
|
||||
? { kind: "accepted_inflight", status: 202, client_message_id: row.client_message_id }
|
||||
: conflict("outbox_inflight_fingerprint_mismatch", fpPrefix);
|
||||
|
||||
case "done":
|
||||
return match
|
||||
? {
|
||||
kind: "accepted_done",
|
||||
status: 200,
|
||||
client_message_id: row.client_message_id,
|
||||
broker_message_id: row.broker_message_id,
|
||||
}
|
||||
: conflict("outbox_done_fingerprint_mismatch", fpPrefix, row.broker_message_id);
|
||||
|
||||
case "dead":
|
||||
return match
|
||||
? conflict("outbox_dead_fingerprint_match", fpPrefix, row.broker_message_id)
|
||||
: conflict("outbox_dead_fingerprint_mismatch", fpPrefix);
|
||||
|
||||
case "aborted":
|
||||
return match
|
||||
? conflict("outbox_aborted_fingerprint_match", fpPrefix)
|
||||
: conflict("outbox_aborted_fingerprint_mismatch", fpPrefix);
|
||||
|
||||
default: {
|
||||
// Exhaustiveness check.
|
||||
const _: never = row.status;
|
||||
throw new Error(`unknown outbox status: ${String(_)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function conflict(reason: string, fpPrefix: string, brokerMessageId: string | null = null): AcceptOutcome {
|
||||
return {
|
||||
kind: "conflict",
|
||||
status: 409,
|
||||
reason,
|
||||
daemon_fingerprint_prefix: fpPrefix,
|
||||
broker_message_id: brokerMessageId,
|
||||
};
|
||||
}
|
||||
|
||||
/** Tiny ULID-ish generator: 26-char Crockford-base32 from time + random. */
|
||||
function ulidLike(newId: () => string): string {
|
||||
// We don't ship a full ULID lib for one fallback path; uuid is fine here.
|
||||
// The wire-stable id is whatever we return; downstream just uses it as text.
|
||||
return newId();
|
||||
}
|
||||
464
apps/cli/src/daemon/ipc/server.ts
Normal file
464
apps/cli/src/daemon/ipc/server.ts
Normal file
@@ -0,0 +1,464 @@
|
||||
import { createServer, type IncomingMessage, type Server, type ServerResponse } from "node:http";
|
||||
import { chmodSync, existsSync, unlinkSync } from "node:fs";
|
||||
import { timingSafeEqual } from "node:crypto";
|
||||
|
||||
import { DAEMON_PATHS, DAEMON_TCP_HOST, DAEMON_TCP_DEFAULT_PORT } from "../paths.js";
|
||||
import type { SqliteDb } from "../db/sqlite.js";
|
||||
import { acceptSend, type SendRequest } from "./handlers/send.js";
|
||||
import { listInbox } from "../db/inbox.js";
|
||||
import { listOutbox, requeueDeadOrPending, type OutboxStatus } from "../db/outbox.js";
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { bindSseStream, type EventBus } from "../events.js";
|
||||
import type { DaemonBrokerClient } from "../broker.js";
|
||||
import { VERSION } from "~/constants/urls.js";
|
||||
|
||||
/**
|
||||
* Per spec §3.3:
|
||||
* - UDS reaches via filesystem perms (0600): no bearer required.
|
||||
* - TCP loopback + SSE require `Authorization: Bearer <local_token>`.
|
||||
* - Token in query string returns 400 + security log.
|
||||
* - Host header must be localhost / 127.0.0.1 / [::1] / empty.
|
||||
* - All endpoints auth-required by default; `/v1/health` opt-in public.
|
||||
*
|
||||
* v0.9.0 surface: /v1/version, /v1/health (auth-required), more added later.
|
||||
*/
|
||||
export interface IpcServerOptions {
|
||||
localToken: string;
|
||||
/** Bind a TCP loopback listener too (default true; container default is UDS-only). */
|
||||
tcpEnabled?: boolean;
|
||||
/** Override the default TCP port. */
|
||||
tcpPort?: number;
|
||||
/** Make /v1/health reachable without a token (k8s probe scenario). */
|
||||
publicHealthCheck?: boolean;
|
||||
/** Optional logger. Falls back to console.error for warnings/security events. */
|
||||
log?: (level: "info" | "warn" | "error", msg: string, meta?: Record<string, unknown>) => void;
|
||||
/** Outbox database handle for /v1/send. */
|
||||
outboxDb?: SqliteDb;
|
||||
/** Inbox database handle for /v1/inbox. */
|
||||
inboxDb?: SqliteDb;
|
||||
/** Event bus backing /v1/events SSE stream. */
|
||||
bus?: EventBus;
|
||||
/** Broker client (for peers/profile passthrough). */
|
||||
broker?: DaemonBrokerClient;
|
||||
/** Notify when a new outbox row was inserted (drains can wake). */
|
||||
onPendingInserted?: () => void;
|
||||
}
|
||||
|
||||
export interface IpcServerHandle {
|
||||
uds: Server;
|
||||
tcp: Server | null;
|
||||
/** Resolves once both listeners are live. */
|
||||
ready: Promise<void>;
|
||||
close: () => Promise<void>;
|
||||
}
|
||||
|
||||
export function startIpcServer(opts: IpcServerOptions): IpcServerHandle {
|
||||
const log = opts.log ?? defaultLogger;
|
||||
|
||||
const handler = makeHandler({
|
||||
localToken: opts.localToken,
|
||||
publicHealthCheck: !!opts.publicHealthCheck,
|
||||
log,
|
||||
outboxDb: opts.outboxDb,
|
||||
inboxDb: opts.inboxDb,
|
||||
bus: opts.bus,
|
||||
broker: opts.broker,
|
||||
onPendingInserted: opts.onPendingInserted,
|
||||
});
|
||||
|
||||
// --- UDS listener -------------------------------------------------------
|
||||
if (existsSync(DAEMON_PATHS.SOCK_FILE)) {
|
||||
// Possible stale socket from a previous crashed daemon. We hold the
|
||||
// singleton lock by the time we reach here, so it's safe to remove.
|
||||
try { unlinkSync(DAEMON_PATHS.SOCK_FILE); } catch { /* ignore */ }
|
||||
}
|
||||
const uds = createServer(handler);
|
||||
const udsReady = new Promise<void>((resolve, reject) => {
|
||||
uds.once("error", reject);
|
||||
uds.listen(DAEMON_PATHS.SOCK_FILE, () => {
|
||||
// Restrict the socket file itself; node creates it 0755 by default.
|
||||
try { chmodSync(DAEMON_PATHS.SOCK_FILE, 0o600); }
|
||||
catch (err) { log("warn", "uds_chmod_failed", { err: String(err) }); }
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
|
||||
// --- TCP listener (optional, off in container defaults) ----------------
|
||||
let tcp: Server | null = null;
|
||||
let tcpReady: Promise<void> = Promise.resolve();
|
||||
if (opts.tcpEnabled !== false) {
|
||||
tcp = createServer(handler);
|
||||
tcpReady = new Promise<void>((resolve, reject) => {
|
||||
tcp!.once("error", reject);
|
||||
tcp!.listen(opts.tcpPort ?? DAEMON_TCP_DEFAULT_PORT, DAEMON_TCP_HOST, () => resolve());
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
uds,
|
||||
tcp,
|
||||
ready: Promise.all([udsReady, tcpReady]).then(() => undefined),
|
||||
close: async () => {
|
||||
await Promise.allSettled([
|
||||
new Promise<void>((res) => uds.close(() => res())),
|
||||
tcp ? new Promise<void>((res) => tcp!.close(() => res())) : Promise.resolve(),
|
||||
]);
|
||||
try { unlinkSync(DAEMON_PATHS.SOCK_FILE); } catch { /* ignore */ }
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function defaultLogger(level: "info" | "warn" | "error", msg: string, meta?: Record<string, unknown>) {
|
||||
const line = JSON.stringify({ level, msg, ...meta, ts: new Date().toISOString() });
|
||||
if (level === "info") process.stdout.write(line + "\n");
|
||||
else process.stderr.write(line + "\n");
|
||||
}
|
||||
|
||||
function makeHandler(opts: {
|
||||
localToken: string;
|
||||
publicHealthCheck: boolean;
|
||||
log: (level: "info" | "warn" | "error", msg: string, meta?: Record<string, unknown>) => void;
|
||||
outboxDb?: SqliteDb;
|
||||
inboxDb?: SqliteDb;
|
||||
bus?: EventBus;
|
||||
broker?: DaemonBrokerClient;
|
||||
onPendingInserted?: () => void;
|
||||
}) {
|
||||
const tokenBytes = Buffer.from(opts.localToken, "utf8");
|
||||
|
||||
return async (req: IncomingMessage, res: ServerResponse) => {
|
||||
const url = new URL(req.url ?? "/", "http://daemon.local");
|
||||
|
||||
// Token in query string → security event + 400.
|
||||
if (url.searchParams.has("token")) {
|
||||
opts.log("warn", "ipc_token_in_query_string_rejected", { path: url.pathname });
|
||||
respond(res, 400, { error: "token must be in Authorization header, not query string" });
|
||||
return;
|
||||
}
|
||||
|
||||
// Host header check — only the loopback names allowed.
|
||||
const host = (req.headers.host ?? "").toLowerCase().split(":")[0]?.trim() ?? "";
|
||||
if (host && host !== "localhost" && host !== "127.0.0.1" && host !== "[::1]" && host !== "::1") {
|
||||
respond(res, 403, { error: "forbidden host" });
|
||||
return;
|
||||
}
|
||||
|
||||
// Origin allowlist — empty by default (no browsers should hit this).
|
||||
if (req.headers.origin) {
|
||||
respond(res, 403, { error: "forbidden origin" });
|
||||
return;
|
||||
}
|
||||
|
||||
// Authentication. UDS connections (over unix socket) skip the bearer
|
||||
// check because filesystem perms gate access; TCP requires it.
|
||||
const isUds = (req.socket as { remoteAddress?: string }).remoteAddress === undefined;
|
||||
const isPublicHealth = opts.publicHealthCheck && url.pathname === "/v1/health";
|
||||
if (!isUds && !isPublicHealth) {
|
||||
const authz = req.headers.authorization ?? "";
|
||||
const m = /^Bearer\s+(.+)$/.exec(authz.trim());
|
||||
if (!m || !m[1]) {
|
||||
respond(res, 401, { error: "missing bearer token" });
|
||||
return;
|
||||
}
|
||||
const provided = Buffer.from(m[1], "utf8");
|
||||
if (provided.length !== tokenBytes.length || !timingSafeEqual(provided, tokenBytes)) {
|
||||
opts.log("warn", "ipc_bearer_mismatch", { path: url.pathname });
|
||||
respond(res, 401, { error: "invalid bearer token" });
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Routing.
|
||||
if (req.method === "GET" && url.pathname === "/v1/version") {
|
||||
respond(res, 200, {
|
||||
daemon_version: VERSION,
|
||||
ipc_api: "v1",
|
||||
ipc_features: ["version", "health", "send", "inbox", "events", "peers", "profile"],
|
||||
schema_version: 1,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method === "GET" && url.pathname === "/v1/health") {
|
||||
respond(res, 200, { ok: true, pid: process.pid });
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method === "GET" && url.pathname === "/v1/events") {
|
||||
if (!opts.bus) {
|
||||
respond(res, 503, { error: "event bus not initialised" });
|
||||
return;
|
||||
}
|
||||
bindSseStream(res, opts.bus);
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method === "GET" && url.pathname === "/v1/peers") {
|
||||
if (!opts.broker) { respond(res, 503, { error: "broker not initialised" }); return; }
|
||||
try {
|
||||
const peers = await opts.broker.listPeers();
|
||||
respond(res, 200, { peers });
|
||||
} catch (e) {
|
||||
respond(res, 502, { error: "broker_unreachable", detail: String(e) });
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method === "POST" && url.pathname === "/v1/profile") {
|
||||
if (!opts.broker) { respond(res, 503, { error: "broker not initialised" }); return; }
|
||||
try {
|
||||
const body = await readJsonBody(req, 16 * 1024) as Record<string, unknown> | null;
|
||||
if (!body) { respond(res, 400, { error: "expected JSON object" }); return; }
|
||||
const updates: Record<string, unknown> = {};
|
||||
if (typeof body.summary === "string") opts.broker.setSummary(body.summary);
|
||||
if (body.status === "idle" || body.status === "working" || body.status === "dnd") opts.broker.setStatus(body.status);
|
||||
if (typeof body.visible === "boolean") opts.broker.setVisible(body.visible);
|
||||
const profile: { avatar?: string; title?: string; bio?: string; capabilities?: string[] } = {};
|
||||
if (typeof body.avatar === "string") profile.avatar = body.avatar;
|
||||
if (typeof body.title === "string") profile.title = body.title;
|
||||
if (typeof body.bio === "string") profile.bio = body.bio;
|
||||
if (Array.isArray(body.capabilities)) profile.capabilities = body.capabilities.filter((c) => typeof c === "string") as string[];
|
||||
if (Object.keys(profile).length > 0) opts.broker.setProfile(profile);
|
||||
Object.assign(updates, body);
|
||||
respond(res, 200, { ok: true, applied: Object.keys(updates) });
|
||||
} catch (e) {
|
||||
respond(res, 400, { error: String(e) });
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method === "GET" && url.pathname === "/v1/inbox") {
|
||||
if (!opts.inboxDb) {
|
||||
respond(res, 503, { error: "inbox not initialised" });
|
||||
return;
|
||||
}
|
||||
const sinceRaw = url.searchParams.get("since");
|
||||
const since = sinceRaw ? Date.parse(sinceRaw) : undefined;
|
||||
const topic = url.searchParams.get("topic") ?? undefined;
|
||||
const fromPubkey = url.searchParams.get("from") ?? undefined;
|
||||
const limitRaw = url.searchParams.get("limit");
|
||||
const limit = limitRaw ? Number.parseInt(limitRaw, 10) : undefined;
|
||||
const rows = listInbox(opts.inboxDb, {
|
||||
since: Number.isFinite(since) ? since : undefined,
|
||||
topic,
|
||||
fromPubkey,
|
||||
limit: Number.isFinite(limit ?? NaN) ? limit : undefined,
|
||||
});
|
||||
respond(res, 200, {
|
||||
items: rows.map((r) => ({
|
||||
id: r.id,
|
||||
client_message_id: r.client_message_id,
|
||||
broker_message_id: r.broker_message_id,
|
||||
mesh: r.mesh,
|
||||
topic: r.topic,
|
||||
sender_pubkey: r.sender_pubkey,
|
||||
sender_name: r.sender_name,
|
||||
body: r.body,
|
||||
received_at: new Date(r.received_at).toISOString(),
|
||||
reply_to_id: r.reply_to_id,
|
||||
})),
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method === "GET" && url.pathname === "/v1/outbox") {
|
||||
if (!opts.outboxDb) { respond(res, 503, { error: "outbox not initialised" }); return; }
|
||||
const statusParam = url.searchParams.get("status") ?? undefined;
|
||||
const allowed: OutboxStatus[] = ["pending","inflight","done","dead","aborted"];
|
||||
const status = (statusParam && (allowed as string[]).includes(statusParam))
|
||||
? statusParam as OutboxStatus
|
||||
: undefined;
|
||||
const limitRaw = url.searchParams.get("limit");
|
||||
const limit = limitRaw ? Number.parseInt(limitRaw, 10) : undefined;
|
||||
const rows = listOutbox(opts.outboxDb, {
|
||||
status,
|
||||
limit: Number.isFinite(limit ?? NaN) ? limit : undefined,
|
||||
});
|
||||
respond(res, 200, {
|
||||
items: rows.map((r) => ({
|
||||
id: r.id,
|
||||
client_message_id: r.client_message_id,
|
||||
status: r.status,
|
||||
attempts: r.attempts,
|
||||
enqueued_at: new Date(r.enqueued_at).toISOString(),
|
||||
next_attempt_at: new Date(r.next_attempt_at).toISOString(),
|
||||
delivered_at: r.delivered_at ? new Date(r.delivered_at).toISOString() : null,
|
||||
broker_message_id: r.broker_message_id,
|
||||
last_error: r.last_error,
|
||||
aborted_at: r.aborted_at ? new Date(r.aborted_at).toISOString() : null,
|
||||
aborted_by: r.aborted_by,
|
||||
superseded_by: r.superseded_by,
|
||||
payload_bytes: r.payload?.byteLength ?? 0,
|
||||
})),
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method === "POST" && url.pathname === "/v1/outbox/requeue") {
|
||||
if (!opts.outboxDb) { respond(res, 503, { error: "outbox not initialised" }); return; }
|
||||
try {
|
||||
const body = await readJsonBody(req, 4 * 1024) as Record<string, unknown> | null;
|
||||
if (!body || typeof body.id !== "string") { respond(res, 400, { error: "missing 'id'" }); return; }
|
||||
const newId = typeof body.new_client_message_id === "string" && body.new_client_message_id.trim()
|
||||
? body.new_client_message_id.trim()
|
||||
: randomUUID();
|
||||
const result = requeueDeadOrPending(opts.outboxDb, {
|
||||
id: body.id,
|
||||
newClientMessageId: newId,
|
||||
newRowId: randomUUID(),
|
||||
now: Date.now(),
|
||||
abortedBy: typeof body.aborted_by === "string" ? body.aborted_by : "operator",
|
||||
});
|
||||
if (!result) {
|
||||
respond(res, 409, { error: "row not found, already aborted, or already done" });
|
||||
return;
|
||||
}
|
||||
respond(res, 200, {
|
||||
aborted_row_id: result.abortedRowId,
|
||||
new_row_id: result.newRowId,
|
||||
new_client_message_id: result.newClientMessageId,
|
||||
});
|
||||
opts.onPendingInserted?.();
|
||||
} catch (e) {
|
||||
respond(res, 400, { error: String(e) });
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method === "POST" && url.pathname === "/v1/send") {
|
||||
if (!opts.outboxDb) {
|
||||
respond(res, 503, { error: "outbox not initialised" });
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const body = await readJsonBody(req, 256 * 1024);
|
||||
const parsed = parseSendRequest(body, req.headers["idempotency-key"]);
|
||||
if ("error" in parsed) {
|
||||
respond(res, 400, { error: parsed.error });
|
||||
return;
|
||||
}
|
||||
const outcome = acceptSend(parsed.req, { db: opts.outboxDb });
|
||||
switch (outcome.kind) {
|
||||
case "accepted_pending":
|
||||
respond(res, outcome.status, {
|
||||
client_message_id: outcome.client_message_id,
|
||||
status: "queued",
|
||||
});
|
||||
opts.onPendingInserted?.();
|
||||
return;
|
||||
case "accepted_inflight":
|
||||
respond(res, outcome.status, {
|
||||
client_message_id: outcome.client_message_id,
|
||||
status: "inflight",
|
||||
});
|
||||
return;
|
||||
case "accepted_done":
|
||||
respond(res, outcome.status, {
|
||||
client_message_id: outcome.client_message_id,
|
||||
broker_message_id: outcome.broker_message_id,
|
||||
duplicate: true,
|
||||
});
|
||||
return;
|
||||
case "conflict":
|
||||
respond(res, outcome.status, {
|
||||
error: "idempotency_key_reused",
|
||||
conflict: outcome.reason,
|
||||
daemon_fingerprint_prefix: outcome.daemon_fingerprint_prefix,
|
||||
broker_message_id: outcome.broker_message_id ?? null,
|
||||
});
|
||||
return;
|
||||
}
|
||||
} catch (err) {
|
||||
opts.log("error", "ipc_send_failed", { err: String(err) });
|
||||
respond(res, 500, { error: "internal" });
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
respond(res, 404, { error: "not found" });
|
||||
};
|
||||
}
|
||||
|
||||
async function readJsonBody(req: IncomingMessage, maxBytes: number): Promise<unknown> {
|
||||
const chunks: Buffer[] = [];
|
||||
let total = 0;
|
||||
for await (const chunk of req) {
|
||||
const buf = chunk as Buffer;
|
||||
total += buf.length;
|
||||
if (total > maxBytes) throw new Error("payload_too_large");
|
||||
chunks.push(buf);
|
||||
}
|
||||
if (total === 0) return null;
|
||||
const text = Buffer.concat(chunks).toString("utf8");
|
||||
try { return JSON.parse(text); }
|
||||
catch { throw new Error("invalid_json"); }
|
||||
}
|
||||
|
||||
interface ParsedSend { req: SendRequest }
|
||||
interface ParseError { error: string }
|
||||
|
||||
function parseSendRequest(body: unknown, idempotencyHeader: string | string[] | undefined): ParsedSend | ParseError {
|
||||
if (!body || typeof body !== "object") return { error: "expected JSON object" };
|
||||
const b = body as Record<string, unknown>;
|
||||
|
||||
const to = typeof b.to === "string" ? b.to.trim() : "";
|
||||
const message = typeof b.message === "string" ? b.message : "";
|
||||
if (!to) return { error: "missing 'to'" };
|
||||
if (!message) return { error: "missing 'message'" };
|
||||
|
||||
const priority = b.priority;
|
||||
if (priority !== undefined && priority !== "now" && priority !== "next" && priority !== "low") {
|
||||
return { error: "priority must be 'now' | 'next' | 'low'" };
|
||||
}
|
||||
|
||||
const meta = b.meta;
|
||||
if (meta !== undefined && meta !== null && (typeof meta !== "object" || Array.isArray(meta))) {
|
||||
return { error: "'meta' must be an object" };
|
||||
}
|
||||
|
||||
// Resolve destination_kind / destination_ref from the `to` shape.
|
||||
// For v0.9.0 we accept three forms:
|
||||
// "@<topic>" → topic
|
||||
// "*" → broadcast (modeled as topic *)
|
||||
// anything else → dm to peer name|pubkey (resolution happens later)
|
||||
let destination_kind: SendRequest["destination_kind"];
|
||||
let destination_ref: string;
|
||||
if (to.startsWith("@")) {
|
||||
destination_kind = "topic";
|
||||
destination_ref = to.slice(1);
|
||||
} else if (to === "*") {
|
||||
destination_kind = "topic";
|
||||
destination_ref = "*";
|
||||
} else {
|
||||
destination_kind = "dm";
|
||||
destination_ref = to;
|
||||
}
|
||||
|
||||
const headerId = Array.isArray(idempotencyHeader) ? idempotencyHeader[0] : idempotencyHeader;
|
||||
const client_message_id = typeof b.client_message_id === "string" && b.client_message_id.trim()
|
||||
? b.client_message_id.trim()
|
||||
: (typeof headerId === "string" && headerId.trim() ? headerId.trim() : undefined);
|
||||
|
||||
const reply_to_id = typeof b.reply_to_id === "string" ? b.reply_to_id : undefined;
|
||||
|
||||
return {
|
||||
req: {
|
||||
to,
|
||||
message,
|
||||
priority: priority as SendRequest["priority"] | undefined,
|
||||
meta: (meta as Record<string, unknown> | undefined) ?? undefined,
|
||||
reply_to_id,
|
||||
client_message_id,
|
||||
destination_kind,
|
||||
destination_ref,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function respond(res: ServerResponse, status: number, body: unknown) {
|
||||
const json = JSON.stringify(body);
|
||||
res.statusCode = status;
|
||||
res.setHeader("Content-Type", "application/json");
|
||||
res.setHeader("Content-Length", Buffer.byteLength(json));
|
||||
res.end(json);
|
||||
}
|
||||
26
apps/cli/src/daemon/local-token.ts
Normal file
26
apps/cli/src/daemon/local-token.ts
Normal file
@@ -0,0 +1,26 @@
|
||||
import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
||||
import { dirname } from "node:path";
|
||||
import { randomBytes } from "node:crypto";
|
||||
|
||||
import { DAEMON_PATHS } from "./paths.js";
|
||||
|
||||
/**
|
||||
* Local IPC bearer token. Mode 0600. Rotated by deleting the file and
|
||||
* restarting the daemon.
|
||||
*/
|
||||
export function readLocalToken(): string | null {
|
||||
try {
|
||||
return readFileSync(DAEMON_PATHS.TOKEN_FILE, "utf8").trim();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function ensureLocalToken(): string {
|
||||
const existing = readLocalToken();
|
||||
if (existing) return existing;
|
||||
mkdirSync(dirname(DAEMON_PATHS.TOKEN_FILE), { recursive: true, mode: 0o700 });
|
||||
const tok = randomBytes(32).toString("base64url");
|
||||
writeFileSync(DAEMON_PATHS.TOKEN_FILE, tok + "\n", { mode: 0o600 });
|
||||
return tok;
|
||||
}
|
||||
59
apps/cli/src/daemon/lock.ts
Normal file
59
apps/cli/src/daemon/lock.ts
Normal file
@@ -0,0 +1,59 @@
|
||||
import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
|
||||
import { dirname } from "node:path";
|
||||
|
||||
import { DAEMON_PATHS } from "./paths.js";
|
||||
|
||||
/**
|
||||
* Single-instance lock via PID file. Returns:
|
||||
* - 'acquired' — we hold the lock now, file written.
|
||||
* - 'already-running' — another live process owns it.
|
||||
* - 'stale' — file existed but the recorded PID is dead;
|
||||
* caller should treat as acquired (we overwrote it).
|
||||
*/
|
||||
export type LockResult = "acquired" | "already-running" | "stale";
|
||||
|
||||
export function acquireSingletonLock(): { result: LockResult; pid: number } {
|
||||
mkdirSync(dirname(DAEMON_PATHS.PID_FILE), { recursive: true, mode: 0o700 });
|
||||
|
||||
if (existsSync(DAEMON_PATHS.PID_FILE)) {
|
||||
const raw = readFileSync(DAEMON_PATHS.PID_FILE, "utf8").trim();
|
||||
const oldPid = Number.parseInt(raw, 10);
|
||||
if (Number.isFinite(oldPid) && oldPid > 0 && isProcessAlive(oldPid)) {
|
||||
return { result: "already-running", pid: oldPid };
|
||||
}
|
||||
// stale → unlink and re-acquire
|
||||
try { unlinkSync(DAEMON_PATHS.PID_FILE); } catch { /* race with another acquirer; tolerate */ }
|
||||
writeFileSync(DAEMON_PATHS.PID_FILE, String(process.pid), { mode: 0o600 });
|
||||
return { result: "stale", pid: process.pid };
|
||||
}
|
||||
|
||||
writeFileSync(DAEMON_PATHS.PID_FILE, String(process.pid), { mode: 0o600 });
|
||||
return { result: "acquired", pid: process.pid };
|
||||
}
|
||||
|
||||
export function releaseSingletonLock(): void {
|
||||
try {
|
||||
const raw = readFileSync(DAEMON_PATHS.PID_FILE, "utf8").trim();
|
||||
if (Number.parseInt(raw, 10) === process.pid) unlinkSync(DAEMON_PATHS.PID_FILE);
|
||||
} catch { /* file already gone, fine */ }
|
||||
}
|
||||
|
||||
export function readRunningPid(): number | null {
|
||||
try {
|
||||
const raw = readFileSync(DAEMON_PATHS.PID_FILE, "utf8").trim();
|
||||
const pid = Number.parseInt(raw, 10);
|
||||
if (Number.isFinite(pid) && pid > 0 && isProcessAlive(pid)) return pid;
|
||||
} catch { /* no pid file */ }
|
||||
return null;
|
||||
}
|
||||
|
||||
function isProcessAlive(pid: number): boolean {
|
||||
try {
|
||||
// signal 0: no-op; throws if process doesn't exist or we lack permission.
|
||||
// EPERM means it does exist (just not ours), so treat as alive.
|
||||
process.kill(pid, 0);
|
||||
return true;
|
||||
} catch (err) {
|
||||
return (err as NodeJS.ErrnoException).code === "EPERM";
|
||||
}
|
||||
}
|
||||
16
apps/cli/src/daemon/paths.ts
Normal file
16
apps/cli/src/daemon/paths.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
import { join } from "node:path";
|
||||
|
||||
import { PATHS } from "~/constants/paths.js";
|
||||
|
||||
export const DAEMON_PATHS = {
|
||||
get DAEMON_DIR() { return join(PATHS.CONFIG_DIR, "daemon"); },
|
||||
get PID_FILE() { return join(this.DAEMON_DIR, "daemon.pid"); },
|
||||
get SOCK_FILE() { return join(this.DAEMON_DIR, "daemon.sock"); },
|
||||
get TOKEN_FILE() { return join(this.DAEMON_DIR, "local-token"); },
|
||||
get OUTBOX_DB() { return join(this.DAEMON_DIR, "outbox.db"); },
|
||||
get INBOX_DB() { return join(this.DAEMON_DIR, "inbox.db"); },
|
||||
get LOG_FILE() { return join(this.DAEMON_DIR, "daemon.log"); },
|
||||
} as const;
|
||||
|
||||
export const DAEMON_TCP_HOST = "127.0.0.1";
|
||||
export const DAEMON_TCP_DEFAULT_PORT = 47823;
|
||||
200
apps/cli/src/daemon/run.ts
Normal file
200
apps/cli/src/daemon/run.ts
Normal file
@@ -0,0 +1,200 @@
|
||||
import { existsSync, mkdirSync, readFileSync } from "node:fs";
|
||||
|
||||
import { DAEMON_PATHS } from "./paths.js";
|
||||
import { acquireSingletonLock, releaseSingletonLock } from "./lock.js";
|
||||
import { ensureLocalToken } from "./local-token.js";
|
||||
import { startIpcServer } from "./ipc/server.js";
|
||||
import { openSqlite, type SqliteDb } from "./db/sqlite.js";
|
||||
import { migrateOutbox } from "./db/outbox.js";
|
||||
import { migrateInbox } from "./db/inbox.js";
|
||||
import { DaemonBrokerClient } from "./broker.js";
|
||||
import { startDrainWorker, type DrainHandle } from "./drain.js";
|
||||
import { handleBrokerPush } from "./inbound.js";
|
||||
import { EventBus } from "./events.js";
|
||||
import { checkFingerprint, type ClonePolicy } from "./identity.js";
|
||||
import { readConfig } from "~/services/config/facade.js";
|
||||
|
||||
export interface RunDaemonOptions {
|
||||
/** Disable TCP loopback (UDS-only). Defaults true in container envs. */
|
||||
tcpEnabled?: boolean;
|
||||
publicHealthCheck?: boolean;
|
||||
/** Mesh slug to attach to. Required when the user has joined multiple meshes. */
|
||||
mesh?: string;
|
||||
/** Daemon's display name on the mesh. */
|
||||
displayName?: string;
|
||||
/** Behavior on host_fingerprint mismatch. Defaults 'refuse'. */
|
||||
clonePolicy?: ClonePolicy;
|
||||
}
|
||||
|
||||
/** Detect a few common container environments to pick UDS-only by default. */
|
||||
function detectContainer(): boolean {
|
||||
if (process.env.KUBERNETES_SERVICE_HOST) return true;
|
||||
if (process.env.CONTAINER === "1") return true;
|
||||
try {
|
||||
if (existsSync("/.dockerenv")) return true;
|
||||
const cg = readFileSync("/proc/1/cgroup", "utf8");
|
||||
if (/(docker|kubepods|containerd)/.test(cg)) return true;
|
||||
} catch { /* not linux or no /proc */ }
|
||||
return false;
|
||||
}
|
||||
|
||||
export async function runDaemon(opts: RunDaemonOptions = {}): Promise<number> {
|
||||
mkdirSync(DAEMON_PATHS.DAEMON_DIR, { recursive: true, mode: 0o700 });
|
||||
|
||||
const lock = acquireSingletonLock();
|
||||
if (lock.result === "already-running") {
|
||||
process.stderr.write(`daemon already running (pid ${lock.pid})\n`);
|
||||
return 1;
|
||||
}
|
||||
if (lock.result === "stale") {
|
||||
process.stderr.write(`recovered stale pid file; starting fresh\n`);
|
||||
}
|
||||
|
||||
// Accidental-clone detection (spec §2.2). Default policy: refuse.
|
||||
const fpCheck = checkFingerprint();
|
||||
const policy: ClonePolicy = opts.clonePolicy ?? "refuse";
|
||||
if (fpCheck.result === "mismatch") {
|
||||
const msg = `host_fingerprint mismatch: this daemon dir was started on a different host.`;
|
||||
if (policy === "refuse") {
|
||||
process.stderr.write(`${msg}\n`);
|
||||
process.stderr.write(` stored host_id: ${fpCheck.stored?.host_id}\n`);
|
||||
process.stderr.write(` current host_id: ${fpCheck.current.host_id}\n`);
|
||||
process.stderr.write(`Run \`claudemesh daemon accept-host\` to write a fresh fingerprint, or\n`);
|
||||
process.stderr.write(`run \`claudemesh daemon remint\` to mint a new keypair (Sprint 7+).\n`);
|
||||
releaseSingletonLock();
|
||||
return 4;
|
||||
}
|
||||
if (policy === "warn") {
|
||||
process.stderr.write(`WARN: ${msg} (continuing per [clone] policy=warn)\n`);
|
||||
}
|
||||
// 'allow' is silent.
|
||||
}
|
||||
if (fpCheck.result === "first_run") {
|
||||
process.stdout.write(JSON.stringify({
|
||||
msg: "host_fingerprint_written", fingerprint_prefix: fpCheck.current.fingerprint.slice(0, 16), ts: new Date().toISOString(),
|
||||
}) + "\n");
|
||||
}
|
||||
|
||||
const localToken = ensureLocalToken();
|
||||
const tcpEnabled = opts.tcpEnabled ?? !detectContainer();
|
||||
|
||||
let outboxDb: SqliteDb;
|
||||
let inboxDb: SqliteDb;
|
||||
try {
|
||||
outboxDb = await openSqlite(DAEMON_PATHS.OUTBOX_DB);
|
||||
migrateOutbox(outboxDb);
|
||||
inboxDb = await openSqlite(DAEMON_PATHS.INBOX_DB);
|
||||
migrateInbox(inboxDb);
|
||||
} catch (err) {
|
||||
process.stderr.write(`db open failed: ${String(err)}\n`);
|
||||
releaseSingletonLock();
|
||||
return 1;
|
||||
}
|
||||
|
||||
const bus = new EventBus();
|
||||
|
||||
// Pick the mesh. If the user joined exactly one, use it; otherwise
|
||||
// require --mesh. Daemon CAN start with no mesh — the outbox will
|
||||
// accept rows but `dead` them after retries because the broker is
|
||||
// never reachable. Better to fail fast.
|
||||
const cfg = readConfig();
|
||||
let mesh = null as null | typeof cfg.meshes[number];
|
||||
if (opts.mesh) {
|
||||
mesh = cfg.meshes.find((m) => m.slug === opts.mesh) ?? null;
|
||||
if (!mesh) {
|
||||
process.stderr.write(`mesh not found: ${opts.mesh}\n`);
|
||||
process.stderr.write(`joined meshes: ${cfg.meshes.map((m) => m.slug).join(", ") || "(none)"}\n`);
|
||||
releaseSingletonLock();
|
||||
try { outboxDb.close(); } catch { /* ignore */ }
|
||||
return 2;
|
||||
}
|
||||
} else if (cfg.meshes.length === 1) {
|
||||
mesh = cfg.meshes[0]!;
|
||||
} else if (cfg.meshes.length === 0) {
|
||||
process.stderr.write(`no mesh joined; run \`claudemesh join <invite-url>\` first\n`);
|
||||
releaseSingletonLock();
|
||||
try { outboxDb.close(); } catch { /* ignore */ }
|
||||
return 2;
|
||||
} else {
|
||||
process.stderr.write(`multiple meshes joined; pass --mesh <slug>\n`);
|
||||
process.stderr.write(`available: ${cfg.meshes.map((m) => m.slug).join(", ")}\n`);
|
||||
releaseSingletonLock();
|
||||
try { outboxDb.close(); } catch { /* ignore */ }
|
||||
return 2;
|
||||
}
|
||||
|
||||
// Connect to broker (non-fatal: connection failures get retried;
|
||||
// outbox keeps queuing during outages).
|
||||
const broker = new DaemonBrokerClient(mesh, {
|
||||
displayName: opts.displayName,
|
||||
onStatusChange: (s) => {
|
||||
process.stdout.write(JSON.stringify({
|
||||
msg: "broker_status", status: s, mesh: mesh!.slug, ts: new Date().toISOString(),
|
||||
}) + "\n");
|
||||
bus.publish("broker_status", { mesh: mesh!.slug, status: s });
|
||||
},
|
||||
onPush: (m) => {
|
||||
const sessionKeys = broker.getSessionKeys();
|
||||
void handleBrokerPush(m, {
|
||||
db: inboxDb,
|
||||
bus,
|
||||
meshSlug: mesh!.slug,
|
||||
recipientSecretKeyHex: mesh!.secretKey,
|
||||
sessionSecretKeyHex: sessionKeys?.sessionSecretKey,
|
||||
});
|
||||
},
|
||||
});
|
||||
broker.connect().catch((err) => process.stderr.write(`broker connect failed: ${String(err)}\n`));
|
||||
|
||||
// Start the drain worker.
|
||||
let drain: DrainHandle | null = null;
|
||||
drain = startDrainWorker({ db: outboxDb, broker });
|
||||
|
||||
const ipc = startIpcServer({
|
||||
localToken,
|
||||
tcpEnabled,
|
||||
publicHealthCheck: opts.publicHealthCheck,
|
||||
outboxDb,
|
||||
inboxDb,
|
||||
bus,
|
||||
broker,
|
||||
onPendingInserted: () => drain?.wake(),
|
||||
});
|
||||
|
||||
try {
|
||||
await ipc.ready;
|
||||
} catch (err) {
|
||||
process.stderr.write(`ipc listen failed: ${String(err)}\n`);
|
||||
releaseSingletonLock();
|
||||
return 1;
|
||||
}
|
||||
|
||||
process.stdout.write(JSON.stringify({
|
||||
msg: "daemon_started",
|
||||
pid: process.pid,
|
||||
sock: DAEMON_PATHS.SOCK_FILE,
|
||||
tcp: tcpEnabled ? `127.0.0.1:47823` : null,
|
||||
mesh: mesh.slug,
|
||||
ts: new Date().toISOString(),
|
||||
}) + "\n");
|
||||
|
||||
let shuttingDown = false;
|
||||
const shutdown = async (sig: string) => {
|
||||
if (shuttingDown) return;
|
||||
shuttingDown = true;
|
||||
process.stdout.write(JSON.stringify({ msg: "daemon_shutdown", signal: sig, ts: new Date().toISOString() }) + "\n");
|
||||
if (drain) await drain.close();
|
||||
await broker.close();
|
||||
await ipc.close();
|
||||
try { outboxDb.close(); } catch { /* ignore */ }
|
||||
try { inboxDb.close(); } catch { /* ignore */ }
|
||||
releaseSingletonLock();
|
||||
process.exit(0);
|
||||
};
|
||||
|
||||
process.on("SIGINT", () => shutdown("SIGINT"));
|
||||
process.on("SIGTERM", () => shutdown("SIGTERM"));
|
||||
|
||||
// Hold the event loop open until a signal arrives.
|
||||
return new Promise<number>(() => { /* never resolves; signals call process.exit */ });
|
||||
}
|
||||
198
apps/cli/src/daemon/service-install.ts
Normal file
198
apps/cli/src/daemon/service-install.ts
Normal file
@@ -0,0 +1,198 @@
|
||||
// Service-install for daemon mode (spec §9). Two platforms:
|
||||
// - macOS: ~/Library/LaunchAgents/com.claudemesh.daemon.plist (launchctl bootstrap)
|
||||
// - Linux: ~/.config/systemd/user/claudemesh-daemon.service (systemctl --user enable)
|
||||
//
|
||||
// Both run as the invoking user, redirect stdout/stderr to ~/.claudemesh/
|
||||
// daemon/daemon.log, restart on crash, and start at login. CI envs are
|
||||
// refused unless --allow-ci-persistent is passed (spec §9 / §16.3).
|
||||
|
||||
import { existsSync, mkdirSync, writeFileSync, unlinkSync, readFileSync } from "node:fs";
|
||||
import { execSync } from "node:child_process";
|
||||
import { homedir } from "node:os";
|
||||
import { join, dirname } from "node:path";
|
||||
|
||||
import { DAEMON_PATHS } from "./paths.js";
|
||||
|
||||
export type ServicePlatform = "darwin" | "linux";
|
||||
export interface InstallResult {
|
||||
platform: ServicePlatform;
|
||||
unitPath: string;
|
||||
/** Shell snippet that the operator can run to bring the service up now. */
|
||||
bootCommand: string;
|
||||
}
|
||||
|
||||
const SERVICE_LABEL = "com.claudemesh.daemon";
|
||||
const SYSTEMD_UNIT = "claudemesh-daemon.service";
|
||||
|
||||
export function detectPlatform(): ServicePlatform | null {
|
||||
if (process.platform === "darwin") return "darwin";
|
||||
if (process.platform === "linux") return "linux";
|
||||
return null;
|
||||
}
|
||||
|
||||
function isCi(): boolean {
|
||||
return !!(process.env.CI || process.env.GITHUB_ACTIONS || process.env.GITLAB_CI || process.env.BUILDKITE
|
||||
|| process.env.CIRCLECI || process.env.JENKINS_URL);
|
||||
}
|
||||
|
||||
export interface InstallArgs {
|
||||
/** Path to the `claudemesh` binary, e.g. /opt/homebrew/bin/claudemesh */
|
||||
binaryPath: string;
|
||||
/** Mesh slug to attach to. */
|
||||
meshSlug: string;
|
||||
/** Optional display name. */
|
||||
displayName?: string;
|
||||
/** Override the auto-detected CI refusal. */
|
||||
allowCi?: boolean;
|
||||
}
|
||||
|
||||
export function installService(args: InstallArgs): InstallResult {
|
||||
const platform = detectPlatform();
|
||||
if (!platform) throw new Error(`unsupported platform: ${process.platform}`);
|
||||
if (isCi() && !args.allowCi) {
|
||||
throw new Error("Refusing to install persistent service in CI; pass --allow-ci-persistent to override.");
|
||||
}
|
||||
if (!existsSync(args.binaryPath)) {
|
||||
throw new Error(`binary not found at ${args.binaryPath}`);
|
||||
}
|
||||
// Make sure the daemon dir exists so the launchd/systemd log paths resolve.
|
||||
mkdirSync(DAEMON_PATHS.DAEMON_DIR, { recursive: true, mode: 0o700 });
|
||||
|
||||
if (platform === "darwin") return installDarwin(args);
|
||||
return installLinux(args);
|
||||
}
|
||||
|
||||
export function uninstallService(): { platform: ServicePlatform | null; removed: string[] } {
|
||||
const platform = detectPlatform();
|
||||
const removed: string[] = [];
|
||||
if (platform === "darwin") {
|
||||
const p = darwinPlistPath();
|
||||
try { execSync(`launchctl bootout gui/$(id -u)/${SERVICE_LABEL}`, { stdio: "ignore" }); } catch { /* not loaded */ }
|
||||
if (existsSync(p)) { unlinkSync(p); removed.push(p); }
|
||||
} else if (platform === "linux") {
|
||||
const p = linuxUnitPath();
|
||||
try { execSync(`systemctl --user disable --now ${SYSTEMD_UNIT}`, { stdio: "ignore" }); } catch { /* not loaded */ }
|
||||
if (existsSync(p)) { unlinkSync(p); removed.push(p); }
|
||||
}
|
||||
return { platform, removed };
|
||||
}
|
||||
|
||||
// ── macOS ──────────────────────────────────────────────────────────────
|
||||
|
||||
function darwinPlistPath(): string {
|
||||
return join(homedir(), "Library", "LaunchAgents", `${SERVICE_LABEL}.plist`);
|
||||
}
|
||||
|
||||
function installDarwin(args: InstallArgs): InstallResult {
|
||||
const plist = darwinPlistPath();
|
||||
mkdirSync(dirname(plist), { recursive: true });
|
||||
const log = DAEMON_PATHS.LOG_FILE;
|
||||
const meshArgs = [
|
||||
"<string>daemon</string>",
|
||||
"<string>up</string>",
|
||||
"<string>--mesh</string>",
|
||||
`<string>${escapeXml(args.meshSlug)}</string>`,
|
||||
...(args.displayName ? ["<string>--name</string>", `<string>${escapeXml(args.displayName)}</string>`] : []),
|
||||
].join("\n ");
|
||||
|
||||
const xml = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>Label</key>
|
||||
<string>${SERVICE_LABEL}</string>
|
||||
<key>ProgramArguments</key>
|
||||
<array>
|
||||
<string>${escapeXml(args.binaryPath)}</string>
|
||||
${meshArgs}
|
||||
</array>
|
||||
<key>RunAtLoad</key>
|
||||
<true/>
|
||||
<key>KeepAlive</key>
|
||||
<true/>
|
||||
<key>StandardOutPath</key>
|
||||
<string>${escapeXml(log)}</string>
|
||||
<key>StandardErrorPath</key>
|
||||
<string>${escapeXml(log)}</string>
|
||||
<key>WorkingDirectory</key>
|
||||
<string>${escapeXml(homedir())}</string>
|
||||
<key>EnvironmentVariables</key>
|
||||
<dict>
|
||||
<key>HOME</key>
|
||||
<string>${escapeXml(homedir())}</string>
|
||||
<key>PATH</key>
|
||||
<string>/usr/local/bin:/opt/homebrew/bin:/usr/bin:/bin:/usr/sbin:/sbin</string>
|
||||
</dict>
|
||||
</dict>
|
||||
</plist>
|
||||
`;
|
||||
writeFileSync(plist, xml, { mode: 0o644 });
|
||||
|
||||
return {
|
||||
platform: "darwin",
|
||||
unitPath: plist,
|
||||
bootCommand: `launchctl bootstrap gui/$(id -u) ${shellQuote(plist)}`,
|
||||
};
|
||||
}
|
||||
|
||||
// ── Linux ──────────────────────────────────────────────────────────────
|
||||
|
||||
function linuxUnitPath(): string {
|
||||
return join(homedir(), ".config", "systemd", "user", SYSTEMD_UNIT);
|
||||
}
|
||||
|
||||
function installLinux(args: InstallArgs): InstallResult {
|
||||
const unit = linuxUnitPath();
|
||||
mkdirSync(dirname(unit), { recursive: true });
|
||||
const execArgs = [
|
||||
"daemon", "up",
|
||||
"--mesh", args.meshSlug,
|
||||
...(args.displayName ? ["--name", args.displayName] : []),
|
||||
].map(shellQuote).join(" ");
|
||||
|
||||
const content = `[Unit]
|
||||
Description=claudemesh daemon (peer mesh runtime)
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=${shellQuote(args.binaryPath)} ${execArgs}
|
||||
Restart=always
|
||||
RestartSec=3
|
||||
StandardOutput=append:${DAEMON_PATHS.LOG_FILE}
|
||||
StandardError=append:${DAEMON_PATHS.LOG_FILE}
|
||||
Environment=PATH=/usr/local/bin:/usr/bin:/bin
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
`;
|
||||
writeFileSync(unit, content, { mode: 0o644 });
|
||||
|
||||
return {
|
||||
platform: "linux",
|
||||
unitPath: unit,
|
||||
bootCommand: `systemctl --user daemon-reload && systemctl --user enable --now ${SYSTEMD_UNIT}`,
|
||||
};
|
||||
}
|
||||
|
||||
// ── helpers ────────────────────────────────────────────────────────────
|
||||
|
||||
function escapeXml(s: string): string {
|
||||
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """);
|
||||
}
|
||||
|
||||
function shellQuote(s: string): string {
|
||||
if (/^[\w@%+=:,./-]+$/.test(s)) return s;
|
||||
return "'" + s.replace(/'/g, "'\"'\"'") + "'";
|
||||
}
|
||||
|
||||
/** Diagnostic helper: dump current install status for `claudemesh daemon status --json`. */
|
||||
export function readInstalledUnit(): { platform: ServicePlatform | null; path: string | null; content: string | null } {
|
||||
const platform = detectPlatform();
|
||||
if (!platform) return { platform: null, path: null, content: null };
|
||||
const path = platform === "darwin" ? darwinPlistPath() : linuxUnitPath();
|
||||
if (!existsSync(path)) return { platform, path: null, content: null };
|
||||
try { return { platform, path, content: readFileSync(path, "utf8") }; }
|
||||
catch { return { platform, path, content: null }; }
|
||||
}
|
||||
@@ -377,6 +377,30 @@ async function main(): Promise<void> {
|
||||
case "logout": { const { logout } = await import("~/commands/logout.js"); process.exit(await logout()); break; }
|
||||
case "whoami": { const { whoami } = await import("~/commands/whoami.js"); process.exit(await whoami({ json: !!flags.json })); break; }
|
||||
|
||||
// Daemon (v0.9.0)
|
||||
case "daemon": {
|
||||
const { runDaemonCommand } = await import("~/commands/daemon.js");
|
||||
const sub = positionals[0];
|
||||
const rest = positionals.slice(1);
|
||||
const outboxStatus =
|
||||
flags.failed ? "dead" :
|
||||
flags.pending ? "pending" :
|
||||
flags.inflight ? "inflight" :
|
||||
flags.done ? "done" :
|
||||
flags.aborted ? "aborted" : undefined;
|
||||
const code = await runDaemonCommand(sub, {
|
||||
json: !!flags.json,
|
||||
noTcp: !!flags["no-tcp"],
|
||||
publicHealth: !!flags["public-health"],
|
||||
mesh: flags.mesh as string | undefined,
|
||||
displayName: flags.name as string | undefined,
|
||||
outboxStatus,
|
||||
newClientId: flags["new-client-id"] as string | undefined,
|
||||
}, rest);
|
||||
process.exit(code);
|
||||
break;
|
||||
}
|
||||
|
||||
// Setup
|
||||
case "install": { const { runInstall } = await import("~/commands/install.js"); runInstall(positionals); break; }
|
||||
case "uninstall": { const { uninstall } = await import("~/commands/uninstall.js"); process.exit(await uninstall()); break; }
|
||||
|
||||
68
apps/cli/src/services/bridge/daemon-route.ts
Normal file
68
apps/cli/src/services/bridge/daemon-route.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
// Try forwarding a send through the local daemon's IPC. Returns null if
|
||||
// the daemon isn't running or the daemon's mesh doesn't match the target
|
||||
// mesh — the caller falls back to the bridge or cold path.
|
||||
|
||||
import { existsSync } from "node:fs";
|
||||
|
||||
import { ipc } from "~/daemon/ipc/client.js";
|
||||
import { DAEMON_PATHS } from "~/daemon/paths.js";
|
||||
|
||||
export type DaemonSendOk = {
|
||||
ok: true;
|
||||
messageId: string;
|
||||
duplicate?: boolean;
|
||||
status?: "queued" | "inflight";
|
||||
};
|
||||
export type DaemonSendErr = { ok: false; error: string };
|
||||
export type DaemonSendResult = DaemonSendOk | DaemonSendErr;
|
||||
|
||||
export async function trySendViaDaemon(args: {
|
||||
to: string;
|
||||
message: string;
|
||||
priority: "now" | "next" | "low";
|
||||
/** Caller-stable id for cross-invocation idempotency. Optional. */
|
||||
idempotencyKey?: string;
|
||||
/** When set, only forward to the daemon if it's attached to this mesh.
|
||||
* We can't query the daemon's mesh today (no IPC route exposes it),
|
||||
* so for v0.9.0 this is informational; the caller already picked the
|
||||
* right mesh by either flag or single-mesh-default. */
|
||||
expectedMesh?: string;
|
||||
}): Promise<DaemonSendResult | null> {
|
||||
if (!existsSync(DAEMON_PATHS.SOCK_FILE)) return null;
|
||||
|
||||
try {
|
||||
const res = await ipc<{
|
||||
client_message_id?: string;
|
||||
status?: "queued" | "inflight";
|
||||
broker_message_id?: string;
|
||||
duplicate?: boolean;
|
||||
error?: string;
|
||||
}>({
|
||||
method: "POST",
|
||||
path: "/v1/send",
|
||||
timeoutMs: 3_000,
|
||||
body: {
|
||||
to: args.to,
|
||||
message: args.message,
|
||||
priority: args.priority,
|
||||
...(args.idempotencyKey ? { client_message_id: args.idempotencyKey } : {}),
|
||||
},
|
||||
});
|
||||
|
||||
if (res.status === 202 || res.status === 200) {
|
||||
return {
|
||||
ok: true,
|
||||
messageId: res.body.broker_message_id ?? res.body.client_message_id ?? "",
|
||||
duplicate: res.body.duplicate,
|
||||
status: res.body.status,
|
||||
};
|
||||
}
|
||||
return { ok: false, error: res.body.error ?? `daemon http ${res.status}` };
|
||||
} catch (err) {
|
||||
// Connection errors → daemon went away mid-call. Treat as "not present"
|
||||
// so the caller falls back rather than failing.
|
||||
const msg = String(err);
|
||||
if (/ENOENT|ECONNREFUSED|ipc_timeout/.test(msg)) return null;
|
||||
return { ok: false, error: msg };
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user