feat(cli): self-renewing session attestation + honest send + hide daemons (1.37.0)
Fixes the "peers stop receiving after a while / like expiring" class of bugs. - Session attestation self-renewal (root cause of the expiry). The daemon minted a 12h-TTL parent attestation once at `claudemesh launch` and replayed the same stale token on every WS reconnect. Past launch+12h the broker rejected session_hello with `expired`, after which the daemon reconnect- looped forever with the dead token and the session silently fell off the mesh — its ephemeral pubkey lingering in rosters, undeliverable. Trigger was any reconnect past 12h: a network blip, a sleep/wake, or (most reliably) a broker redeploy that drops every WS at once, killing all sessions >12h old together. buildHello now re-mints a fresh attestation per (re)connect from the in-memory mesh.secretKey (already used at daemon rehydration), so presence is self-healing across reconnects/redeploys. The 12h security bound is preserved — live tokens stay short-lived, just refreshed on use. This is what lets sessions stay on the mesh for days; the existing keepalive (30s ping) + auto-reconnect (exp backoff) + 90s broker grace were already sound — the stale attestation was the single point of failure defeating them. - Honest send status. The daemon outbox path returned `queued` optimistically and the drain retried failures (incl. "no connected peer") async forever, so a bare `✔ sent` for an offline or stale-session-key target was misleading. Direct sends now pre-check the live roster: offline/unknown key → `⚠ queued — no connected peer matches this key` + ephemeral-key explanation; online → `✔ sent to <name> (online)`. Applies to both daemon and cold paths; JSON gains `recipientOnline` + `status`. - Hide control-plane daemons from peer list + target resolution. The control-plane filter was human-output-only; `peer list --json` still leaked the daemon's row, making it look like an addressable peer. Now filtered from JSON too (--all still shows it). Name/prefix resolution and the --self fan-out filter now exclude control-plane rows by peerRole (the reliable marker) rather than the channel string. - --self no-op warning. --self only governs own-member-key fan-out; passed with a session pubkey it was silently ignored. It now warns it had no effect and sends normally (messaging a specific session pubkey needs no flag). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "claudemesh-cli",
|
||||
"version": "1.36.0",
|
||||
"version": "1.37.0",
|
||||
"description": "Peer mesh for Claude Code sessions — CLI + MCP server.",
|
||||
"keywords": [
|
||||
"claude-code",
|
||||
|
||||
@@ -241,14 +241,6 @@ export async function runPeers(flags: PeersFlags): Promise<void> {
|
||||
try {
|
||||
const peers = await listPeersForMesh(slug);
|
||||
|
||||
if (wantsJson) {
|
||||
const projected = fieldList
|
||||
? peers.map((p) => projectFields(p, fieldList))
|
||||
: peers;
|
||||
allJson.push({ mesh: slug, peers: projected });
|
||||
continue;
|
||||
}
|
||||
|
||||
// Hide control-plane rows by default — they're infrastructure
|
||||
// (daemon-WS member-keyed presence), not interactive peers, and
|
||||
// they confused users into thinking the daemon counted as a
|
||||
@@ -258,10 +250,22 @@ export async function runPeers(flags: PeersFlags): Promise<void> {
|
||||
// 2026-05-04). annotateSelf() filled in 'session' for older
|
||||
// brokers that don't emit peerRole yet, so this filter is
|
||||
// backwards-compatible by construction — legacy rows show up.
|
||||
//
|
||||
// Applied to JSON too (was human-output-only): `peer list --json`
|
||||
// leaking the daemon's control-plane row is what made the daemon
|
||||
// look like an addressable peer and sent DMs into a black hole.
|
||||
const visible = flags.all
|
||||
? peers
|
||||
: peers.filter((p) => p.peerRole !== "control-plane");
|
||||
|
||||
if (wantsJson) {
|
||||
const projected = fieldList
|
||||
? visible.map((p) => projectFields(p, fieldList))
|
||||
: visible;
|
||||
allJson.push({ mesh: slug, peers: projected });
|
||||
continue;
|
||||
}
|
||||
|
||||
// Sort: this-session first, then your-other-sessions, then real
|
||||
// peers. Within each group, idle/working ahead of dnd. Inside the
|
||||
// groups, leave broker order. The point is: when you run peer
|
||||
|
||||
@@ -78,6 +78,11 @@ export async function runSend(flags: SendFlags, to: string, message: string): Pr
|
||||
if (peers === null) continue; // daemon unreachable for this query
|
||||
daemonReachable = true;
|
||||
for (const p of peers) {
|
||||
// Never resolve a name/prefix to a control-plane daemon row — it's
|
||||
// infrastructure, not an addressable peer, and matching it sends a
|
||||
// DM that the daemon swallows. (peerRole is the reliable marker;
|
||||
// the daemon's own row is control-plane.)
|
||||
if ((p as { peerRole?: string }).peerRole === "control-plane") continue;
|
||||
const pk = ((p as { pubkey?: string }).pubkey ?? "").toLowerCase();
|
||||
const mpk = ((p as { memberPubkey?: string }).memberPubkey ?? "").toLowerCase();
|
||||
const dn = (p as { displayName?: string }).displayName ?? "?";
|
||||
@@ -165,10 +170,14 @@ export async function runSend(flags: SendFlags, to: string, message: string): Pr
|
||||
const session = await getSessionInfo();
|
||||
const ownSessionPk = session?.presence?.sessionPubkey?.toLowerCase();
|
||||
const siblings = peers.filter((p) => {
|
||||
const r = p as { memberPubkey?: string; pubkey?: string; channel?: string };
|
||||
const r = p as { memberPubkey?: string; pubkey?: string; channel?: string; peerRole?: string };
|
||||
if (!r.pubkey) return false;
|
||||
if (ownSessionPk && r.pubkey.toLowerCase() === ownSessionPk) return false;
|
||||
if (r.channel === "claudemesh-daemon") return false;
|
||||
// Exclude the daemon's own control-plane presence row. peerRole is
|
||||
// the reliable marker (the live daemon row is control-plane even
|
||||
// when its channel reads "claudemesh-session"); keep the channel
|
||||
// check too for older brokers that don't emit peerRole.
|
||||
if (r.peerRole === "control-plane" || r.channel === "claudemesh-daemon") return false;
|
||||
return r.memberPubkey?.toLowerCase() === to.toLowerCase();
|
||||
});
|
||||
if (siblings.length === 0) {
|
||||
@@ -214,6 +223,45 @@ export async function runSend(flags: SendFlags, to: string, message: string): Pr
|
||||
}
|
||||
}
|
||||
|
||||
// --self only governs the own-member-key fan-out above, which returns
|
||||
// early. Reaching here with --self still set means the target was NOT
|
||||
// your own member pubkey, so the flag did nothing. Say so rather than
|
||||
// ignoring it silently — the old behavior made `send --self <session-
|
||||
// pubkey>` look like it controlled routing when it was inert. Messaging
|
||||
// a specific session pubkey (including one of your own sibling sessions)
|
||||
// needs no flag and just works.
|
||||
if (flags.self) {
|
||||
render.warn("--self had no effect: it only applies when the target is your own member pubkey (fan-out to your sibling sessions). Sending to this specific pubkey directly.");
|
||||
}
|
||||
|
||||
// Honest-delivery pre-check (direct sends only). The daemon path below
|
||||
// queues into the local outbox and returns `queued` optimistically; the
|
||||
// drain then delivers async and retries failures (incl. "no connected
|
||||
// peer") forever. So a bare "sent" line was misleading — a DM to an
|
||||
// offline or stale-session-key target looked delivered but never was.
|
||||
// Resolve the live roster once to learn whether `to` is addressable
|
||||
// right now; this only shapes the confirmation wording (the send still
|
||||
// queues regardless, preserving store-and-forward for genuinely-offline
|
||||
// peers). null = unknown (not a direct DM, or daemon unreachable).
|
||||
let recipientOnline: boolean | null = null;
|
||||
let recipientName: string | undefined;
|
||||
if (isDirect && meshSlug) {
|
||||
const { tryListPeersViaDaemon } = await import("~/services/bridge/daemon-route.js");
|
||||
const peers = await tryListPeersViaDaemon(meshSlug);
|
||||
if (peers !== null) {
|
||||
const lower = to.toLowerCase();
|
||||
const match = peers.find((p) => {
|
||||
const r = p as { pubkey?: string; memberPubkey?: string; peerRole?: string };
|
||||
if (r.peerRole === "control-plane") return false;
|
||||
return r.pubkey?.toLowerCase() === lower || r.memberPubkey?.toLowerCase() === lower;
|
||||
});
|
||||
recipientOnline = !!match;
|
||||
recipientName = match ? (match as { displayName?: string }).displayName : undefined;
|
||||
}
|
||||
}
|
||||
const offlineHint =
|
||||
"Session pubkeys are ephemeral — a key from an ended session never reconnects, so the message can't be delivered. Re-fetch a live target with `claudemesh peer list --json`.";
|
||||
|
||||
// Daemon path — preferred when a long-lived daemon is local. UDS at
|
||||
// ~/.claudemesh/daemon/daemon.sock; ~1ms round-trip; persists outbox
|
||||
// across CLI invocations so a `claudemesh send` survives a daemon
|
||||
@@ -222,8 +270,18 @@ export async function runSend(flags: SendFlags, to: string, message: string): Pr
|
||||
const dr = await trySendViaDaemon({ to, message, priority, expectedMesh: meshSlug ?? undefined });
|
||||
if (dr !== null) {
|
||||
if (dr.ok) {
|
||||
if (flags.json) console.log(JSON.stringify({ ok: true, messageId: dr.messageId, target: to, via: "daemon", duplicate: !!dr.duplicate }));
|
||||
else render.ok(`sent to ${to} (daemon)`, dr.messageId ? dim(dr.messageId.slice(0, 8)) : undefined);
|
||||
if (flags.json) {
|
||||
console.log(JSON.stringify({ ok: true, messageId: dr.messageId, target: to, via: "daemon", duplicate: !!dr.duplicate, status: dr.status, recipientOnline }));
|
||||
} else if (recipientOnline === false) {
|
||||
render.warn(`queued for ${recipientName ?? to.slice(0, 16) + "…"} — no connected peer matches this key on "${meshSlug}".`);
|
||||
render.hint(offlineHint);
|
||||
} else {
|
||||
const who = recipientName ? `${recipientName} (${to.slice(0, 16)}…)` : to;
|
||||
// recipientOnline === true → peer is present, delivery imminent.
|
||||
// null → daemon couldn't tell (e.g. roster query failed); keep
|
||||
// the neutral "(daemon)" transport tag rather than overclaiming.
|
||||
render.ok(`sent to ${who}${recipientOnline === true ? " (online)" : " (daemon)"}`, dr.messageId ? dim(dr.messageId.slice(0, 8)) : undefined);
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Daemon answered but rejected (409 idempotency, 400 schema). Surface; do not fall through.
|
||||
@@ -269,9 +327,13 @@ export async function runSend(flags: SendFlags, to: string, message: string): Pr
|
||||
const result = await client.send(targetSpec, message, priority);
|
||||
if (result.ok) {
|
||||
if (flags.json) {
|
||||
console.log(JSON.stringify({ ok: true, messageId: result.messageId, target: to }));
|
||||
console.log(JSON.stringify({ ok: true, messageId: result.messageId, target: to, recipientOnline }));
|
||||
} else if (recipientOnline === false) {
|
||||
render.warn(`queued for ${recipientName ?? to} — no connected peer matches this key on "${meshSlug ?? flags.mesh ?? "default"}".`);
|
||||
render.hint(offlineHint);
|
||||
} else {
|
||||
render.ok(`sent to ${to}`, result.messageId ? dim(result.messageId.slice(0, 8)) : undefined);
|
||||
const who = recipientName ? `${recipientName} (${to.slice(0, 16)}…)` : to;
|
||||
render.ok(`sent to ${who}${recipientOnline === true ? " (online)" : ""}`, result.messageId ? dim(result.messageId.slice(0, 8)) : undefined);
|
||||
}
|
||||
} else {
|
||||
if (flags.json) {
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
import { hostname as osHostname } from "node:os";
|
||||
|
||||
import type { JoinedMesh } from "~/services/config/facade.js";
|
||||
import { signSessionHello } from "~/services/broker/session-hello-sig.js";
|
||||
import { signSessionHello, signParentAttestation } from "~/services/broker/session-hello-sig.js";
|
||||
import { connectWsWithBackoff, type WsLifecycle, type WsStatus } from "./ws-lifecycle.js";
|
||||
import type { BrokerSendArgs, BrokerSendResult } from "./broker.js";
|
||||
|
||||
@@ -149,13 +149,35 @@ export class SessionBrokerClient {
|
||||
sessionPubkey: this.opts.sessionPubkey,
|
||||
sessionSecretKey: this.opts.sessionSecretKey,
|
||||
});
|
||||
// Re-mint the parent attestation fresh on every (re)connect rather
|
||||
// than reusing the one signed at `claudemesh launch`. The minted
|
||||
// attestation has a 12h TTL; reusing the stored instance meant any
|
||||
// reconnect past launch+12h — a network blip, a sleep/wake, or
|
||||
// (most commonly) a broker redeploy that drops every WS at once —
|
||||
// was rejected by the broker with `expired`, after which the daemon
|
||||
// reconnect-looped forever with the same dead token and the session
|
||||
// silently fell off the mesh (its ephemeral pubkey lingering in
|
||||
// peer rosters, undeliverable). The member secret key is in memory
|
||||
// (`mesh.secretKey`, already used at daemon rehydration), so the
|
||||
// daemon can self-renew: fresh-minting keeps live attestations
|
||||
// short-lived AND makes presence self-healing across reconnects.
|
||||
let parentAttestation = this.opts.parentAttestation;
|
||||
try {
|
||||
parentAttestation = await signParentAttestation({
|
||||
parentMemberPubkey: this.opts.mesh.pubkey,
|
||||
parentSecretKey: this.opts.mesh.secretKey,
|
||||
sessionPubkey: this.opts.sessionPubkey,
|
||||
});
|
||||
} catch (e) {
|
||||
this.log("warn", "parent attestation re-mint failed; reusing stored token (may be expired)", { err: String(e) });
|
||||
}
|
||||
return {
|
||||
type: "session_hello",
|
||||
meshId: this.opts.mesh.meshId,
|
||||
parentMemberId: this.opts.mesh.memberId,
|
||||
parentMemberPubkey: this.opts.mesh.pubkey,
|
||||
sessionPubkey: this.opts.sessionPubkey,
|
||||
parentAttestation: this.opts.parentAttestation,
|
||||
parentAttestation,
|
||||
displayName: this.opts.displayName,
|
||||
sessionId: this.opts.sessionId,
|
||||
pid: this.opts.pid,
|
||||
|
||||
Reference in New Issue
Block a user