diff --git a/SPEC.md b/SPEC.md index 202a37f..664b5e9 100644 --- a/SPEC.md +++ b/SPEC.md @@ -28,7 +28,7 @@ A peer is a Claude Code session connected to a mesh. Ephemeral — comes and goe Two-layer identity: - **Member identity** — permanent, created by `claudemesh join`. Keypair stored in `~/.claudemesh/config.json`. Proves authorization to connect. -- **Session identity** — ephemeral, generated on every `claudemesh launch`. Fresh ed25519 keypair per session. Provides routing and E2E encryption. Two sessions from the same member have distinct session keys — they can message each other. +- **Session identity** — anchored on Claude Code's session UUID (the same identity `--resume` is built on). An ed25519 keypair is generated once per `(mesh, session UUID)` and persisted under `~/.claudemesh/sessions//.json`, so relaunching or resuming the same session reuses the same `sessionPubkey`. Provides routing and E2E encryption. Two distinct sessions from the same member have distinct session keys — they can message each other. Because a DM is sealed to the recipient's `sessionPubkey`, a stable key is what lets queued messages both route to and decrypt on the returning session; the broker enforces one live presence per session pubkey. ### Peer attributes @@ -39,7 +39,7 @@ Two-layer identity: | groups | `--groups` flag, wizard, or `join_group` | No | Routing labels with optional per-group role | | status | Hook-driven | No | idle / working / dnd | | summary | `set_summary` tool call | No | 1-2 sentence description of current work | -| sessionPubkey | Generated on connect | No | Ephemeral ed25519 pubkey for routing + crypto | +| sessionPubkey | Persisted per `(mesh, session UUID)` | Yes (per session UUID) | ed25519 pubkey for routing + crypto; stable across relaunch/`--resume` | | memberId | From `claudemesh join` | Yes | Permanent mesh membership identity | ### Launch diff --git a/apps/broker/src/index.ts b/apps/broker/src/index.ts index 8c6f4eb..b697e71 100644 --- a/apps/broker/src/index.ts +++ b/apps/broker/src/index.ts @@ -2191,23 +2191,38 @@ async function handleSessionHello( // session leave. for (const [pid, oldConn] of connections) { if (oldConn.meshId !== hello.meshId) continue; - if (oldConn.leaseState !== "offline") continue; if (oldConn.sessionPubkey !== hello.sessionPubkey) continue; + // Same sessionPubkey = same logical session. The CLI now anchors the + // session keypair on Claude Code's session UUID and persists it, so a + // matching pubkey is always the same peer relaunching/resuming — never + // a coincidental collision. Reattach whether the old lease is in its + // 90s grace window OR still nominally "online" (a duplicate/relaunch + // that raced ahead of the old socket's close). The new WS is + // authoritative: cancel any eviction timer, close the stale socket if + // it differs, swap in the new WS, restore online. This is the "one + // presence per session pubkey" invariant — it kills the same-name + // ghost that used to win queued-DM claim races. + const wasState = oldConn.leaseState; if (oldConn.evictionTimer) { clearTimeout(oldConn.evictionTimer); oldConn.evictionTimer = null; } + if (oldConn.ws !== ws) { + try { oldConn.ws.close(1000, "session_replaced"); } catch { /* already dead */ } + } oldConn.ws = ws; oldConn.leaseState = "online"; oldConn.leaseUntil = 0; oldConn.lastPongAt = Date.now(); // Refresh mutable fields from the new hello. + oldConn.sessionId = hello.sessionId; oldConn.cwd = hello.cwd; if (hello.displayName) oldConn.displayName = hello.displayName; - log.info("session_hello reattach (lease)", { + log.info("session_hello reattach", { presence_id: pid, session_pubkey: hello.sessionPubkey.slice(0, 12), + was: wasState, }); void restorePresence(pid); void maybePushQueuedMessages(pid); diff --git a/apps/cli/package.json b/apps/cli/package.json index 8c10023..462a7ce 100644 --- a/apps/cli/package.json +++ b/apps/cli/package.json @@ -1,6 +1,6 @@ { "name": "claudemesh-cli", - "version": "1.34.18", + "version": "1.35.0", "description": "Peer mesh for Claude Code sessions — CLI + MCP server.", "keywords": [ "claude-code", diff --git a/apps/cli/src/commands/launch.ts b/apps/cli/src/commands/launch.ts index c1bfaff..c276bde 100644 --- a/apps/cli/src/commands/launch.ts +++ b/apps/cli/src/commands/launch.ts @@ -42,6 +42,37 @@ export interface LaunchFlags { quiet?: boolean; } +/** + * Resolve the most-recently-active Claude Code session UUID for a cwd by + * inspecting `~/.claude/projects//.jsonl`. Claude Code + * encodes the project dir as the absolute path with every `/` → `-`. + * + * Used by `--continue` (which otherwise gives us no UUID to anchor on) so + * a continued session re-attaches to the same claudemesh peer it last + * represented. Returns undefined when the project dir is absent/empty — + * the caller then falls back to an ephemeral identity. + */ +function resolveLatestSessionUuid(cwd: string): string | undefined { + try { + const slug = cwd.replace(/\//g, "-"); + const dir = join(homedir(), ".claude", "projects", slug); + if (!existsSync(dir)) return undefined; + const uuidRe = /^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.jsonl$/i; + let newest: { id: string; mtime: number } | null = null; + for (const entry of readdirSync(dir)) { + const m = uuidRe.exec(entry); + if (!m) continue; + try { + const mtime = statSync(join(dir, entry)).mtimeMs; + if (!newest || mtime > newest.mtime) newest = { id: m[1]!, mtime }; + } catch { /* file vanished mid-scan — skip */ } + } + return newest?.id; + } catch { + return undefined; + } +} + // --- Interactive mesh picker --- /** @@ -754,8 +785,28 @@ export async function runLaunch(flags: LaunchFlags, rawArgs: string[]): Promise< // the TDZ → ReferenceError swallowed by the surrounding catch. // The IPC registration has been silently failing every launch // since 1.29.0. Hoist the declaration up so it actually runs. + // Session identity is anchored on Claude Code's session UUID — the + // stable thing `--resume` is built on — so the same logical peer keeps + // one identity (and one persisted keypair) across relaunches: + // - fresh launch: mint a UUID and force it on claude via --session-id. + // - --resume V: register V (the returning peer), let claude resume it. + // - --continue: resolve the most-recent session UUID in this cwd so + // we re-attach to the same peer instead of minting a + // throwaway id (the bug that orphaned queued DMs and + // spawned same-name ghosts on every relaunch). const isResume = args.resume !== null || args.continueSession; - const claudeSessionId = isResume ? undefined : randomUUID(); + let claudeSessionId: string | undefined; + if (args.resume) { + claudeSessionId = args.resume; + } else if (args.continueSession) { + claudeSessionId = resolveLatestSessionUuid(process.cwd()); + } else { + claudeSessionId = randomUUID(); + } + // Only fresh launches may dictate the UUID via --session-id; --resume + // and --continue carry their own session selection and claude rejects + // --session-id alongside them. + const passSessionIdFlag = !isResume; let sessionTokenFilePath: string | null = null; let sessionTokenForCleanup: string | null = null; try { @@ -780,7 +831,13 @@ export async function runLaunch(flags: LaunchFlags, rawArgs: string[]): Promise< try { const { generateKeypair } = await import("~/services/crypto/facade.js"); const { signParentAttestation } = await import("~/services/broker/session-hello-sig.js"); - const sessionKp = await generateKeypair(); + // Persisted, UUID-anchored keypair so relaunch/--resume reuse the + // same sessionPubkey (queued DMs route AND decrypt). Falls back to + // an ephemeral keypair when we couldn't resolve a stable UUID + // (e.g. --continue with no prior session in this cwd). + const sessionKp = claudeSessionId + ? await (await import("~/services/session/keypair-store.js")).loadOrCreateSessionKeypair(mesh.slug, claudeSessionId) + : await generateKeypair(); const att = await signParentAttestation({ parentMemberPubkey: mesh.pubkey, parentSecretKey: mesh.secretKey, @@ -917,7 +974,7 @@ export async function runLaunch(flags: LaunchFlags, rawArgs: string[]): Promise< const claudeArgs = [ "--dangerously-load-development-channels", "server:claudemesh", - ...(claudeSessionId ? ["--session-id", claudeSessionId] : []), + ...(passSessionIdFlag && claudeSessionId ? ["--session-id", claudeSessionId] : []), ...(args.resume ? ["--resume", args.resume] : []), ...(args.continueSession ? ["--continue"] : []), ...(args.skipPermConfirm ? ["--dangerously-skip-permissions"] : []), diff --git a/apps/cli/src/daemon/run.ts b/apps/cli/src/daemon/run.ts index 48774ae..47cb8cc 100644 --- a/apps/cli/src/daemon/run.ts +++ b/apps/cli/src/daemon/run.ts @@ -230,6 +230,20 @@ export async function runDaemon(opts: RunDaemonOptions = {}): Promise { } prior.close().catch(() => { /* ignore */ }); } + // Also drop any stale WS holding this session pubkey under a + // DIFFERENT token. With UUID-anchored persistent keypairs a relaunch + // reuses the pubkey, so without this the old SessionBrokerClient + // would linger connected (the broker then sees two presences for one + // pubkey — the same-name ghost that stole queued DMs). Dedup by + // pubkey closes it before the new WS opens. + const priorByPubkey = sessionBrokersByPubkey.get(info.presence.sessionPubkey); + if (priorByPubkey && priorByPubkey !== prior) { + for (const [tok, c] of sessionBrokers) { + if (c === priorByPubkey) { sessionBrokers.delete(tok); break; } + } + sessionBrokersByPubkey.delete(info.presence.sessionPubkey); + priorByPubkey.close().catch(() => { /* ignore */ }); + } // 1.32.1 — wire push delivery. Messages targeted at the launched // session's pubkey land on THIS WS, not on the member-keyed one, // so without this forward they'd silently disappear (the bug that diff --git a/apps/cli/src/services/session/keypair-store.ts b/apps/cli/src/services/session/keypair-store.ts new file mode 100644 index 0000000..596a8f5 --- /dev/null +++ b/apps/cli/src/services/session/keypair-store.ts @@ -0,0 +1,147 @@ +/** + * Persistent per-session ed25519 keypairs, keyed by Claude Code's + * session UUID. + * + * Background. Until this module landed, `claudemesh launch` minted a + * FRESH ephemeral session keypair on every invocation (see + * SPEC.md §"Session identity"). That made a peer's routing/crypto + * identity unstable across relaunch and `--resume`: a DM is sealed to + * the recipient's `sessionPubkey` (crypto_box; see services/crypto/box.ts), + * so when the key rotated, any message queued for the old pubkey became + * undecryptable AND the old presence lingered as a ghost on the broker. + * + * The fix anchors session identity on the stable thing Claude Code + * itself uses for resume: the session UUID (scoped to the project/cwd). + * The keypair for a given (mesh, sessionUuid) is generated once and + * persisted, so: + * - relaunching / `--resume`-ing the same session reuses the SAME + * pubkey → the broker reattaches the existing presence and queued + * DMs both route AND decrypt; + * - a genuinely new session (fresh UUID) gets a fresh keypair → it is + * correctly a distinct peer. + * + * Storage. `~/.claudemesh/sessions//.json`, the + * file mode 0o600 inside a 0o700 dir — same secret-hygiene as the IPC + * token store. The secret key lives on disk (like the member key + * already does in the mesh config); the threat-model delta over the old + * ephemeral scheme is small and was an accepted trade for reliable + * delivery. `CLAUDEMESH_SESSIONS_DIR` overrides the root for tests. + */ + +import { randomBytes } from "node:crypto"; +import { existsSync, mkdirSync, readFileSync, renameSync, rmSync, writeFileSync } from "node:fs"; +import { homedir } from "node:os"; +import { join } from "node:path"; + +import { generateKeypair, type Ed25519Keypair } from "~/services/crypto/facade.js"; + +const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i; +const SLUG_RE = /^[a-z0-9._-]+$/i; + +interface StoredKeypair { + version: 1; + meshSlug: string; + sessionId: string; + publicKey: string; + secretKey: string; + createdAt: string; +} + +/** Root dir for persisted session keypairs. Stable per-machine; does + * NOT honor the per-launch `CLAUDEMESH_CONFIG_DIR` tmpdir (those are + * ephemeral and would defeat persistence). */ +export function sessionsDir(): string { + return ( + process.env.CLAUDEMESH_SESSIONS_DIR || + join(homedir(), ".claudemesh", "sessions") + ); +} + +function keyFilePath(meshSlug: string, sessionId: string): string { + return join(sessionsDir(), meshSlug, `${sessionId}.json`); +} + +/** Read a persisted keypair, returning null (never throwing) when the + * file is missing, unreadable, malformed, or carries an invalid key. */ +function readValidKeypair(file: string): Ed25519Keypair | null { + try { + if (!existsSync(file)) return null; + const parsed = JSON.parse(readFileSync(file, "utf8")) as Partial; + if ( + parsed && + typeof parsed.publicKey === "string" && + /^[0-9a-f]{64}$/.test(parsed.publicKey) && + typeof parsed.secretKey === "string" && + /^[0-9a-f]{128}$/.test(parsed.secretKey) + ) { + return { publicKey: parsed.publicKey, secretKey: parsed.secretKey }; + } + } catch { + // Unreadable / corrupt — caller treats as absent and rewrites. + } + return null; +} + +/** + * Return the persisted keypair for (meshSlug, sessionId), creating and + * writing one on first use. Re-reads from disk every call so concurrent + * launches of the same session converge on one identity rather than + * racing to mint divergent keys. + * + * Falls back to an in-memory ephemeral keypair (the legacy behaviour) + * when the identifiers are unusable or disk I/O fails — a launch must + * never be blocked by a keystore problem. + */ +export async function loadOrCreateSessionKeypair( + meshSlug: string, + sessionId: string, +): Promise { + // Defensive validation: these compose into a filesystem path, so a + // malformed slug/uuid must never escape the sessions dir. + if (!SLUG_RE.test(meshSlug) || !UUID_RE.test(sessionId)) { + return generateKeypair(); + } + + const file = keyFilePath(meshSlug, sessionId); + const existing = readValidKeypair(file); + if (existing) return existing; + + const kp = await generateKeypair(); + try { + mkdirSync(join(sessionsDir(), meshSlug), { recursive: true, mode: 0o700 }); + const stored: StoredKeypair = { + version: 1, + meshSlug, + sessionId, + publicKey: kp.publicKey, + secretKey: kp.secretKey, + createdAt: new Date().toISOString(), + }; + // Write to a temp sibling then rename for atomicity, so a concurrent + // reader never sees a half-written file. + const tmp = `${file}.${randomBytes(6).toString("hex")}.tmp`; + writeFileSync(tmp, JSON.stringify(stored), { mode: 0o600 }); + try { + // Re-check: another launch may have won the race and created the + // canonical file with a VALID keypair while we were generating — + // prefer it. A corrupt/invalid existing file is not a winner; fall + // through and overwrite it via the atomic rename below. + if (existsSync(file)) { + const won = readValidKeypair(file); + if (won) { + try { rmSync(tmp, { force: true }); } catch { /* ignore */ } + return won; + } + } + // renameSync is atomic on the same filesystem. + renameSync(tmp, file); + } catch { + // rename failed — best effort, the in-memory keypair is still valid + // for this launch. + } + } catch { + // mkdir/write failed — return the freshly generated keypair anyway so + // the launch proceeds (degrades to ephemeral, same as legacy). + } + return kp; +} diff --git a/apps/cli/tests/unit/keypair-store.test.ts b/apps/cli/tests/unit/keypair-store.test.ts new file mode 100644 index 0000000..9dafee5 --- /dev/null +++ b/apps/cli/tests/unit/keypair-store.test.ts @@ -0,0 +1,96 @@ +/** + * Persisted, UUID-anchored session keypairs (delivery-reliability fix). + * + * The keystore is what makes a peer's sessionPubkey stable across + * relaunch/--resume, so queued DMs (sealed to that pubkey) both route to + * and decrypt on the returning session. Verifies: + * - the same (mesh, uuid) returns the SAME keypair across calls and + * across a fresh module read (persisted to disk); + * - distinct uuids / meshes get distinct keypairs; + * - malformed identifiers fall back to an ephemeral keypair and never + * escape the sessions dir; + * - a corrupt on-disk file is transparently rewritten. + */ + +import { mkdtempSync, rmSync, writeFileSync, existsSync, readdirSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, expect, test } from "vitest"; + +import { + loadOrCreateSessionKeypair, + sessionsDir, +} from "../../src/services/session/keypair-store.js"; + +const UUID_A = "11111111-2222-3333-4444-555555555555"; +const UUID_B = "66666666-7777-8888-9999-aaaaaaaaaaaa"; + +let dir: string; + +beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "cm-keystore-")); + process.env.CLAUDEMESH_SESSIONS_DIR = dir; +}); + +afterEach(() => { + delete process.env.CLAUDEMESH_SESSIONS_DIR; + rmSync(dir, { recursive: true, force: true }); +}); + +describe("loadOrCreateSessionKeypair", () => { + test("same (mesh, uuid) is stable across calls", async () => { + const a = await loadOrCreateSessionKeypair("flexicar", UUID_A); + const b = await loadOrCreateSessionKeypair("flexicar", UUID_A); + expect(a.publicKey).toBe(b.publicKey); + expect(a.secretKey).toBe(b.secretKey); + expect(a.publicKey).toMatch(/^[0-9a-f]{64}$/); + expect(a.secretKey).toMatch(/^[0-9a-f]{128}$/); + }); + + test("persists to disk under sessionsDir//.json", async () => { + await loadOrCreateSessionKeypair("flexicar", UUID_A); + const file = join(sessionsDir(), "flexicar", `${UUID_A}.json`); + expect(existsSync(file)).toBe(true); + }); + + test("distinct uuids get distinct keys", async () => { + const a = await loadOrCreateSessionKeypair("flexicar", UUID_A); + const b = await loadOrCreateSessionKeypair("flexicar", UUID_B); + expect(a.publicKey).not.toBe(b.publicKey); + }); + + test("distinct meshes get distinct keys for the same uuid", async () => { + const a = await loadOrCreateSessionKeypair("flexicar", UUID_A); + const b = await loadOrCreateSessionKeypair("other-mesh", UUID_A); + expect(a.publicKey).not.toBe(b.publicKey); + }); + + test("malformed uuid falls back to ephemeral, writes nothing", async () => { + const a = await loadOrCreateSessionKeypair("flexicar", "not-a-uuid"); + const b = await loadOrCreateSessionKeypair("flexicar", "not-a-uuid"); + expect(a.publicKey).toMatch(/^[0-9a-f]{64}$/); + // Ephemeral → not persisted → each call is fresh. + expect(a.publicKey).not.toBe(b.publicKey); + expect(existsSync(join(dir, "flexicar"))).toBe(false); + }); + + test("path-traversal slug is rejected (ephemeral, no escape)", async () => { + const a = await loadOrCreateSessionKeypair("../../etc", UUID_A); + expect(a.publicKey).toMatch(/^[0-9a-f]{64}$/); + // Nothing written under the sessions dir for a rejected slug. + expect(readdirSync(dir)).toHaveLength(0); + }); + + test("corrupt on-disk file is rewritten and yields a valid key", async () => { + const a = await loadOrCreateSessionKeypair("flexicar", UUID_A); + const file = join(sessionsDir(), "flexicar", `${UUID_A}.json`); + writeFileSync(file, "{ this is not valid json", "utf8"); + const b = await loadOrCreateSessionKeypair("flexicar", UUID_A); + expect(b.publicKey).toMatch(/^[0-9a-f]{64}$/); + // Rewritten to a fresh, internally-consistent keypair (distinct from + // the now-clobbered original). + expect(b.publicKey).not.toBe(a.publicKey); + const c = await loadOrCreateSessionKeypair("flexicar", UUID_A); + expect(c.publicKey).toBe(b.publicKey); + }); +});