feat(cli): self-renewing session attestation + honest send + hide daemons (1.37.0)
Fixes the "peers stop receiving after a while / like expiring" class of bugs. - Session attestation self-renewal (root cause of the expiry). The daemon minted a 12h-TTL parent attestation once at `claudemesh launch` and replayed the same stale token on every WS reconnect. Past launch+12h the broker rejected session_hello with `expired`, after which the daemon reconnect- looped forever with the dead token and the session silently fell off the mesh — its ephemeral pubkey lingering in rosters, undeliverable. Trigger was any reconnect past 12h: a network blip, a sleep/wake, or (most reliably) a broker redeploy that drops every WS at once, killing all sessions >12h old together. buildHello now re-mints a fresh attestation per (re)connect from the in-memory mesh.secretKey (already used at daemon rehydration), so presence is self-healing across reconnects/redeploys. The 12h security bound is preserved — live tokens stay short-lived, just refreshed on use. This is what lets sessions stay on the mesh for days; the existing keepalive (30s ping) + auto-reconnect (exp backoff) + 90s broker grace were already sound — the stale attestation was the single point of failure defeating them. - Honest send status. The daemon outbox path returned `queued` optimistically and the drain retried failures (incl. "no connected peer") async forever, so a bare `✔ sent` for an offline or stale-session-key target was misleading. Direct sends now pre-check the live roster: offline/unknown key → `⚠ queued — no connected peer matches this key` + ephemeral-key explanation; online → `✔ sent to <name> (online)`. Applies to both daemon and cold paths; JSON gains `recipientOnline` + `status`. - Hide control-plane daemons from peer list + target resolution. The control-plane filter was human-output-only; `peer list --json` still leaked the daemon's row, making it look like an addressable peer. Now filtered from JSON too (--all still shows it). Name/prefix resolution and the --self fan-out filter now exclude control-plane rows by peerRole (the reliable marker) rather than the channel string. - --self no-op warning. --self only governs own-member-key fan-out; passed with a session pubkey it was silently ignored. It now warns it had no effect and sends normally (messaging a specific session pubkey needs no flag). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "claudemesh-cli",
|
"name": "claudemesh-cli",
|
||||||
"version": "1.36.0",
|
"version": "1.37.0",
|
||||||
"description": "Peer mesh for Claude Code sessions — CLI + MCP server.",
|
"description": "Peer mesh for Claude Code sessions — CLI + MCP server.",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"claude-code",
|
"claude-code",
|
||||||
|
|||||||
@@ -241,14 +241,6 @@ export async function runPeers(flags: PeersFlags): Promise<void> {
|
|||||||
try {
|
try {
|
||||||
const peers = await listPeersForMesh(slug);
|
const peers = await listPeersForMesh(slug);
|
||||||
|
|
||||||
if (wantsJson) {
|
|
||||||
const projected = fieldList
|
|
||||||
? peers.map((p) => projectFields(p, fieldList))
|
|
||||||
: peers;
|
|
||||||
allJson.push({ mesh: slug, peers: projected });
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Hide control-plane rows by default — they're infrastructure
|
// Hide control-plane rows by default — they're infrastructure
|
||||||
// (daemon-WS member-keyed presence), not interactive peers, and
|
// (daemon-WS member-keyed presence), not interactive peers, and
|
||||||
// they confused users into thinking the daemon counted as a
|
// they confused users into thinking the daemon counted as a
|
||||||
@@ -258,10 +250,22 @@ export async function runPeers(flags: PeersFlags): Promise<void> {
|
|||||||
// 2026-05-04). annotateSelf() filled in 'session' for older
|
// 2026-05-04). annotateSelf() filled in 'session' for older
|
||||||
// brokers that don't emit peerRole yet, so this filter is
|
// brokers that don't emit peerRole yet, so this filter is
|
||||||
// backwards-compatible by construction — legacy rows show up.
|
// backwards-compatible by construction — legacy rows show up.
|
||||||
|
//
|
||||||
|
// Applied to JSON too (was human-output-only): `peer list --json`
|
||||||
|
// leaking the daemon's control-plane row is what made the daemon
|
||||||
|
// look like an addressable peer and sent DMs into a black hole.
|
||||||
const visible = flags.all
|
const visible = flags.all
|
||||||
? peers
|
? peers
|
||||||
: peers.filter((p) => p.peerRole !== "control-plane");
|
: peers.filter((p) => p.peerRole !== "control-plane");
|
||||||
|
|
||||||
|
if (wantsJson) {
|
||||||
|
const projected = fieldList
|
||||||
|
? visible.map((p) => projectFields(p, fieldList))
|
||||||
|
: visible;
|
||||||
|
allJson.push({ mesh: slug, peers: projected });
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Sort: this-session first, then your-other-sessions, then real
|
// Sort: this-session first, then your-other-sessions, then real
|
||||||
// peers. Within each group, idle/working ahead of dnd. Inside the
|
// peers. Within each group, idle/working ahead of dnd. Inside the
|
||||||
// groups, leave broker order. The point is: when you run peer
|
// groups, leave broker order. The point is: when you run peer
|
||||||
|
|||||||
@@ -78,6 +78,11 @@ export async function runSend(flags: SendFlags, to: string, message: string): Pr
|
|||||||
if (peers === null) continue; // daemon unreachable for this query
|
if (peers === null) continue; // daemon unreachable for this query
|
||||||
daemonReachable = true;
|
daemonReachable = true;
|
||||||
for (const p of peers) {
|
for (const p of peers) {
|
||||||
|
// Never resolve a name/prefix to a control-plane daemon row — it's
|
||||||
|
// infrastructure, not an addressable peer, and matching it sends a
|
||||||
|
// DM that the daemon swallows. (peerRole is the reliable marker;
|
||||||
|
// the daemon's own row is control-plane.)
|
||||||
|
if ((p as { peerRole?: string }).peerRole === "control-plane") continue;
|
||||||
const pk = ((p as { pubkey?: string }).pubkey ?? "").toLowerCase();
|
const pk = ((p as { pubkey?: string }).pubkey ?? "").toLowerCase();
|
||||||
const mpk = ((p as { memberPubkey?: string }).memberPubkey ?? "").toLowerCase();
|
const mpk = ((p as { memberPubkey?: string }).memberPubkey ?? "").toLowerCase();
|
||||||
const dn = (p as { displayName?: string }).displayName ?? "?";
|
const dn = (p as { displayName?: string }).displayName ?? "?";
|
||||||
@@ -165,10 +170,14 @@ export async function runSend(flags: SendFlags, to: string, message: string): Pr
|
|||||||
const session = await getSessionInfo();
|
const session = await getSessionInfo();
|
||||||
const ownSessionPk = session?.presence?.sessionPubkey?.toLowerCase();
|
const ownSessionPk = session?.presence?.sessionPubkey?.toLowerCase();
|
||||||
const siblings = peers.filter((p) => {
|
const siblings = peers.filter((p) => {
|
||||||
const r = p as { memberPubkey?: string; pubkey?: string; channel?: string };
|
const r = p as { memberPubkey?: string; pubkey?: string; channel?: string; peerRole?: string };
|
||||||
if (!r.pubkey) return false;
|
if (!r.pubkey) return false;
|
||||||
if (ownSessionPk && r.pubkey.toLowerCase() === ownSessionPk) return false;
|
if (ownSessionPk && r.pubkey.toLowerCase() === ownSessionPk) return false;
|
||||||
if (r.channel === "claudemesh-daemon") return false;
|
// Exclude the daemon's own control-plane presence row. peerRole is
|
||||||
|
// the reliable marker (the live daemon row is control-plane even
|
||||||
|
// when its channel reads "claudemesh-session"); keep the channel
|
||||||
|
// check too for older brokers that don't emit peerRole.
|
||||||
|
if (r.peerRole === "control-plane" || r.channel === "claudemesh-daemon") return false;
|
||||||
return r.memberPubkey?.toLowerCase() === to.toLowerCase();
|
return r.memberPubkey?.toLowerCase() === to.toLowerCase();
|
||||||
});
|
});
|
||||||
if (siblings.length === 0) {
|
if (siblings.length === 0) {
|
||||||
@@ -214,6 +223,45 @@ export async function runSend(flags: SendFlags, to: string, message: string): Pr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --self only governs the own-member-key fan-out above, which returns
|
||||||
|
// early. Reaching here with --self still set means the target was NOT
|
||||||
|
// your own member pubkey, so the flag did nothing. Say so rather than
|
||||||
|
// ignoring it silently — the old behavior made `send --self <session-
|
||||||
|
// pubkey>` look like it controlled routing when it was inert. Messaging
|
||||||
|
// a specific session pubkey (including one of your own sibling sessions)
|
||||||
|
// needs no flag and just works.
|
||||||
|
if (flags.self) {
|
||||||
|
render.warn("--self had no effect: it only applies when the target is your own member pubkey (fan-out to your sibling sessions). Sending to this specific pubkey directly.");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Honest-delivery pre-check (direct sends only). The daemon path below
|
||||||
|
// queues into the local outbox and returns `queued` optimistically; the
|
||||||
|
// drain then delivers async and retries failures (incl. "no connected
|
||||||
|
// peer") forever. So a bare "sent" line was misleading — a DM to an
|
||||||
|
// offline or stale-session-key target looked delivered but never was.
|
||||||
|
// Resolve the live roster once to learn whether `to` is addressable
|
||||||
|
// right now; this only shapes the confirmation wording (the send still
|
||||||
|
// queues regardless, preserving store-and-forward for genuinely-offline
|
||||||
|
// peers). null = unknown (not a direct DM, or daemon unreachable).
|
||||||
|
let recipientOnline: boolean | null = null;
|
||||||
|
let recipientName: string | undefined;
|
||||||
|
if (isDirect && meshSlug) {
|
||||||
|
const { tryListPeersViaDaemon } = await import("~/services/bridge/daemon-route.js");
|
||||||
|
const peers = await tryListPeersViaDaemon(meshSlug);
|
||||||
|
if (peers !== null) {
|
||||||
|
const lower = to.toLowerCase();
|
||||||
|
const match = peers.find((p) => {
|
||||||
|
const r = p as { pubkey?: string; memberPubkey?: string; peerRole?: string };
|
||||||
|
if (r.peerRole === "control-plane") return false;
|
||||||
|
return r.pubkey?.toLowerCase() === lower || r.memberPubkey?.toLowerCase() === lower;
|
||||||
|
});
|
||||||
|
recipientOnline = !!match;
|
||||||
|
recipientName = match ? (match as { displayName?: string }).displayName : undefined;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const offlineHint =
|
||||||
|
"Session pubkeys are ephemeral — a key from an ended session never reconnects, so the message can't be delivered. Re-fetch a live target with `claudemesh peer list --json`.";
|
||||||
|
|
||||||
// Daemon path — preferred when a long-lived daemon is local. UDS at
|
// Daemon path — preferred when a long-lived daemon is local. UDS at
|
||||||
// ~/.claudemesh/daemon/daemon.sock; ~1ms round-trip; persists outbox
|
// ~/.claudemesh/daemon/daemon.sock; ~1ms round-trip; persists outbox
|
||||||
// across CLI invocations so a `claudemesh send` survives a daemon
|
// across CLI invocations so a `claudemesh send` survives a daemon
|
||||||
@@ -222,8 +270,18 @@ export async function runSend(flags: SendFlags, to: string, message: string): Pr
|
|||||||
const dr = await trySendViaDaemon({ to, message, priority, expectedMesh: meshSlug ?? undefined });
|
const dr = await trySendViaDaemon({ to, message, priority, expectedMesh: meshSlug ?? undefined });
|
||||||
if (dr !== null) {
|
if (dr !== null) {
|
||||||
if (dr.ok) {
|
if (dr.ok) {
|
||||||
if (flags.json) console.log(JSON.stringify({ ok: true, messageId: dr.messageId, target: to, via: "daemon", duplicate: !!dr.duplicate }));
|
if (flags.json) {
|
||||||
else render.ok(`sent to ${to} (daemon)`, dr.messageId ? dim(dr.messageId.slice(0, 8)) : undefined);
|
console.log(JSON.stringify({ ok: true, messageId: dr.messageId, target: to, via: "daemon", duplicate: !!dr.duplicate, status: dr.status, recipientOnline }));
|
||||||
|
} else if (recipientOnline === false) {
|
||||||
|
render.warn(`queued for ${recipientName ?? to.slice(0, 16) + "…"} — no connected peer matches this key on "${meshSlug}".`);
|
||||||
|
render.hint(offlineHint);
|
||||||
|
} else {
|
||||||
|
const who = recipientName ? `${recipientName} (${to.slice(0, 16)}…)` : to;
|
||||||
|
// recipientOnline === true → peer is present, delivery imminent.
|
||||||
|
// null → daemon couldn't tell (e.g. roster query failed); keep
|
||||||
|
// the neutral "(daemon)" transport tag rather than overclaiming.
|
||||||
|
render.ok(`sent to ${who}${recipientOnline === true ? " (online)" : " (daemon)"}`, dr.messageId ? dim(dr.messageId.slice(0, 8)) : undefined);
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Daemon answered but rejected (409 idempotency, 400 schema). Surface; do not fall through.
|
// Daemon answered but rejected (409 idempotency, 400 schema). Surface; do not fall through.
|
||||||
@@ -269,9 +327,13 @@ export async function runSend(flags: SendFlags, to: string, message: string): Pr
|
|||||||
const result = await client.send(targetSpec, message, priority);
|
const result = await client.send(targetSpec, message, priority);
|
||||||
if (result.ok) {
|
if (result.ok) {
|
||||||
if (flags.json) {
|
if (flags.json) {
|
||||||
console.log(JSON.stringify({ ok: true, messageId: result.messageId, target: to }));
|
console.log(JSON.stringify({ ok: true, messageId: result.messageId, target: to, recipientOnline }));
|
||||||
|
} else if (recipientOnline === false) {
|
||||||
|
render.warn(`queued for ${recipientName ?? to} — no connected peer matches this key on "${meshSlug ?? flags.mesh ?? "default"}".`);
|
||||||
|
render.hint(offlineHint);
|
||||||
} else {
|
} else {
|
||||||
render.ok(`sent to ${to}`, result.messageId ? dim(result.messageId.slice(0, 8)) : undefined);
|
const who = recipientName ? `${recipientName} (${to.slice(0, 16)}…)` : to;
|
||||||
|
render.ok(`sent to ${who}${recipientOnline === true ? " (online)" : ""}`, result.messageId ? dim(result.messageId.slice(0, 8)) : undefined);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (flags.json) {
|
if (flags.json) {
|
||||||
|
|||||||
@@ -43,7 +43,7 @@
|
|||||||
import { hostname as osHostname } from "node:os";
|
import { hostname as osHostname } from "node:os";
|
||||||
|
|
||||||
import type { JoinedMesh } from "~/services/config/facade.js";
|
import type { JoinedMesh } from "~/services/config/facade.js";
|
||||||
import { signSessionHello } from "~/services/broker/session-hello-sig.js";
|
import { signSessionHello, signParentAttestation } from "~/services/broker/session-hello-sig.js";
|
||||||
import { connectWsWithBackoff, type WsLifecycle, type WsStatus } from "./ws-lifecycle.js";
|
import { connectWsWithBackoff, type WsLifecycle, type WsStatus } from "./ws-lifecycle.js";
|
||||||
import type { BrokerSendArgs, BrokerSendResult } from "./broker.js";
|
import type { BrokerSendArgs, BrokerSendResult } from "./broker.js";
|
||||||
|
|
||||||
@@ -149,13 +149,35 @@ export class SessionBrokerClient {
|
|||||||
sessionPubkey: this.opts.sessionPubkey,
|
sessionPubkey: this.opts.sessionPubkey,
|
||||||
sessionSecretKey: this.opts.sessionSecretKey,
|
sessionSecretKey: this.opts.sessionSecretKey,
|
||||||
});
|
});
|
||||||
|
// Re-mint the parent attestation fresh on every (re)connect rather
|
||||||
|
// than reusing the one signed at `claudemesh launch`. The minted
|
||||||
|
// attestation has a 12h TTL; reusing the stored instance meant any
|
||||||
|
// reconnect past launch+12h — a network blip, a sleep/wake, or
|
||||||
|
// (most commonly) a broker redeploy that drops every WS at once —
|
||||||
|
// was rejected by the broker with `expired`, after which the daemon
|
||||||
|
// reconnect-looped forever with the same dead token and the session
|
||||||
|
// silently fell off the mesh (its ephemeral pubkey lingering in
|
||||||
|
// peer rosters, undeliverable). The member secret key is in memory
|
||||||
|
// (`mesh.secretKey`, already used at daemon rehydration), so the
|
||||||
|
// daemon can self-renew: fresh-minting keeps live attestations
|
||||||
|
// short-lived AND makes presence self-healing across reconnects.
|
||||||
|
let parentAttestation = this.opts.parentAttestation;
|
||||||
|
try {
|
||||||
|
parentAttestation = await signParentAttestation({
|
||||||
|
parentMemberPubkey: this.opts.mesh.pubkey,
|
||||||
|
parentSecretKey: this.opts.mesh.secretKey,
|
||||||
|
sessionPubkey: this.opts.sessionPubkey,
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
this.log("warn", "parent attestation re-mint failed; reusing stored token (may be expired)", { err: String(e) });
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
type: "session_hello",
|
type: "session_hello",
|
||||||
meshId: this.opts.mesh.meshId,
|
meshId: this.opts.mesh.meshId,
|
||||||
parentMemberId: this.opts.mesh.memberId,
|
parentMemberId: this.opts.mesh.memberId,
|
||||||
parentMemberPubkey: this.opts.mesh.pubkey,
|
parentMemberPubkey: this.opts.mesh.pubkey,
|
||||||
sessionPubkey: this.opts.sessionPubkey,
|
sessionPubkey: this.opts.sessionPubkey,
|
||||||
parentAttestation: this.opts.parentAttestation,
|
parentAttestation,
|
||||||
displayName: this.opts.displayName,
|
displayName: this.opts.displayName,
|
||||||
sessionId: this.opts.sessionId,
|
sessionId: this.opts.sessionId,
|
||||||
pid: this.opts.pid,
|
pid: this.opts.pid,
|
||||||
|
|||||||
Reference in New Issue
Block a user