2 Commits

Author SHA1 Message Date
Alejandro Gutiérrez
56b1cc0756 docs: split vision into changelog + clean roadmap
Some checks failed
CI / Lint (push) Has been cancelled
CI / Typecheck (push) Has been cancelled
CI / Broker tests (Postgres) (push) Has been cancelled
CI / Docker build (linux/amd64) (push) Has been cancelled
changelog-20260407.md: full implementation details for 21 features
vision-20260407.md: slimmed to shipped summary + remaining items

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-08 00:20:55 +01:00
Alejandro Gutiérrez
fc8a7edc23 feat: persist peer session state across disconnects ("welcome back" on reconnect)
Save groups, profile, visibility, summary, display name, and cumulative
stats to a new mesh.peer_state table on disconnect. On reconnect (same
meshId + memberId), restore them automatically — hello groups take
precedence over stored groups if provided. Broadcast peer_returned
system event with last-seen time and summary to other peers.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-08 00:20:20 +01:00
8 changed files with 533 additions and 389 deletions

View File

@@ -15,10 +15,10 @@
import { createServer, type IncomingMessage, type ServerResponse } from "node:http"; import { createServer, type IncomingMessage, type ServerResponse } from "node:http";
import type { Duplex } from "node:stream"; import type { Duplex } from "node:stream";
import { WebSocketServer, type WebSocket } from "ws"; import { WebSocketServer, type WebSocket } from "ws";
import { and, eq, sql } from "drizzle-orm"; import { and, eq, isNull, sql } from "drizzle-orm";
import { env } from "./env"; import { env } from "./env";
import { db } from "./db"; import { db } from "./db";
import { messageQueue, scheduledMessage as scheduledMessageTable, meshWebhook } from "@turbostarter/db/schema/mesh"; import { messageQueue, scheduledMessage as scheduledMessageTable, meshWebhook, peerState } from "@turbostarter/db/schema/mesh";
import { import {
claimTask, claimTask,
completeTask, completeTask,
@@ -179,7 +179,7 @@ function makeClockStatus(clock: MeshClock, reqId?: string): WSServerMessage {
} as WSServerMessage; } as WSServerMessage;
} }
// --- MCP proxy registry (in-memory, ephemeral) --- // --- MCP proxy registry (in-memory, persistent-capable) ---
interface McpRegisteredServer { interface McpRegisteredServer {
meshId: string; meshId: string;
presenceId: string; presenceId: string;
@@ -187,6 +187,11 @@ interface McpRegisteredServer {
description: string; description: string;
tools: Array<{ name: string; description: string; inputSchema: Record<string, unknown> }>; tools: Array<{ name: string; description: string; inputSchema: Record<string, unknown> }>;
hostedByName: string; hostedByName: string;
persistent: boolean;
online: boolean;
memberId: string;
registeredAt: string;
offlineSince?: string;
} }
/** Keyed by "meshId:serverName" */ /** Keyed by "meshId:serverName" */
const mcpRegistry = new Map<string, McpRegisteredServer>(); const mcpRegistry = new Map<string, McpRegisteredServer>();
@@ -858,6 +863,118 @@ function sendError(
} }
} }
// --- Peer state persistence ---
async function savePeerState(conn: PeerConn, memberId: string, meshId: string): Promise<void> {
try {
// Read existing cumulative stats to merge
const existing = await db
.select()
.from(peerState)
.where(and(eq(peerState.meshId, meshId), eq(peerState.memberId, memberId)))
.limit(1);
const prev = existing[0]?.cumulativeStats as { messagesIn: number; messagesOut: number; toolCalls: number; errors: number } | null;
const sessionStats = conn.stats ?? {};
const cumulative = {
messagesIn: (prev?.messagesIn ?? 0) + (sessionStats.messagesIn ?? 0),
messagesOut: (prev?.messagesOut ?? 0) + (sessionStats.messagesOut ?? 0),
toolCalls: (prev?.toolCalls ?? 0) + (sessionStats.toolCalls ?? 0),
errors: (prev?.errors ?? 0) + (sessionStats.errors ?? 0),
};
const now = new Date();
await db
.insert(peerState)
.values({
meshId,
memberId,
groups: conn.groups,
profile: conn.profile,
visible: conn.visible,
lastSummary: null, // will be set below if presence has a summary
lastDisplayName: conn.displayName,
cumulativeStats: cumulative,
lastSeenAt: now,
createdAt: now,
updatedAt: now,
})
.onConflictDoUpdate({
target: [peerState.meshId, peerState.memberId],
set: {
groups: conn.groups,
profile: conn.profile,
visible: conn.visible,
lastDisplayName: conn.displayName,
cumulativeStats: cumulative,
lastSeenAt: now,
updatedAt: now,
},
});
// Persist the summary from the presence row (it's set via setSummary, not on conn)
const { presence } = await import("@turbostarter/db/schema/mesh");
const presRows = await db
.select({ summary: presence.summary })
.from(presence)
.where(and(eq(presence.memberId, memberId), isNull(presence.disconnectedAt)))
.limit(1);
if (presRows[0]?.summary) {
await db
.update(peerState)
.set({ lastSummary: presRows[0].summary, updatedAt: now })
.where(and(eq(peerState.meshId, meshId), eq(peerState.memberId, memberId)));
}
} catch (e) {
log.warn("failed to save peer state", {
mesh_id: meshId,
member_id: memberId,
error: e instanceof Error ? e.message : String(e),
});
}
}
async function restorePeerState(
meshId: string,
memberId: string,
): Promise<{
restored: boolean;
groups?: Array<{ name: string; role?: string }>;
profile?: { avatar?: string; title?: string; bio?: string; capabilities?: string[] };
visible?: boolean;
lastSummary?: string;
lastDisplayName?: string;
cumulativeStats?: { messagesIn: number; messagesOut: number; toolCalls: number; errors: number };
lastSeenAt?: Date;
} | null> {
try {
const rows = await db
.select()
.from(peerState)
.where(and(eq(peerState.meshId, meshId), eq(peerState.memberId, memberId)))
.limit(1);
if (!rows[0]) return null;
const row = rows[0];
return {
restored: true,
groups: row.groups as Array<{ name: string; role?: string }> ?? [],
profile: row.profile as { avatar?: string; title?: string; bio?: string; capabilities?: string[] } ?? {},
visible: row.visible,
lastSummary: row.lastSummary ?? undefined,
lastDisplayName: row.lastDisplayName ?? undefined,
cumulativeStats: row.cumulativeStats as { messagesIn: number; messagesOut: number; toolCalls: number; errors: number } ?? undefined,
lastSeenAt: row.lastSeenAt ?? undefined,
};
} catch (e) {
log.warn("failed to restore peer state", {
mesh_id: meshId,
member_id: memberId,
error: e instanceof Error ? e.message : String(e),
});
return null;
}
}
async function handleHello( async function handleHello(
ws: WebSocket, ws: WebSocket,
hello: Extract<WSClientMessage, { type: "hello" }>, hello: Extract<WSClientMessage, { type: "hello" }>,
@@ -902,7 +1019,14 @@ async function handleHello(
ws.close(1008, "unauthorized"); ws.close(1008, "unauthorized");
return null; return null;
} }
const initialGroups = hello.groups ?? [];
// Attempt to restore persisted state from a previous session.
const saved = await restorePeerState(hello.meshId, member.id);
const helloHasGroups = hello.groups && hello.groups.length > 0;
// Hello groups take precedence; fall back to restored groups.
const initialGroups = helloHasGroups
? hello.groups!
: (saved?.groups ?? []);
const presenceId = await connectPresence({ const presenceId = await connectPresence({
memberId: member.id, memberId: member.id,
sessionId: hello.sessionId, sessionId: hello.sessionId,
@@ -926,26 +1050,36 @@ async function handleHello(
channel: hello.channel, channel: hello.channel,
model: hello.model, model: hello.model,
groups: initialGroups, groups: initialGroups,
visible: true, visible: saved?.visible ?? true,
profile: {}, profile: saved?.profile ?? {},
}); });
incMeshCount(hello.meshId); incMeshCount(hello.meshId);
void audit(hello.meshId, "peer_joined", member.id, effectiveDisplayName, { void audit(hello.meshId, "peer_joined", member.id, effectiveDisplayName, {
pubkey: hello.pubkey, pubkey: hello.pubkey,
groups: initialGroups, groups: initialGroups,
restored: !!saved,
}); });
log.info("ws hello", { log.info("ws hello", {
mesh_id: hello.meshId, mesh_id: hello.meshId,
member: effectiveDisplayName, member: effectiveDisplayName,
presence_id: presenceId, presence_id: presenceId,
session_id: hello.sessionId, session_id: hello.sessionId,
restored: !!saved,
}); });
// Drain any queued messages in the background. The hello_ack is // Drain any queued messages in the background. The hello_ack is
// sent by the CALLER after it assigns presenceId — sending it here // sent by the CALLER after it assigns presenceId — sending it here
// races the caller's closure assignment, causing subsequent client // races the caller's closure assignment, causing subsequent client
// messages to fail the "no_hello" check. // messages to fail the "no_hello" check.
void maybePushQueuedMessages(presenceId); void maybePushQueuedMessages(presenceId);
return { presenceId, memberDisplayName: effectiveDisplayName }; return {
presenceId,
memberDisplayName: effectiveDisplayName,
restored: saved ? true : undefined,
lastSummary: saved?.lastSummary,
lastSeenAt: saved?.lastSeenAt?.toISOString(),
restoredGroups: (!helloHasGroups && saved?.groups?.length) ? saved.groups : undefined,
restoredStats: saved?.cumulativeStats,
};
} }
async function handleSend( async function handleSend(
@@ -1056,27 +1190,38 @@ function handleConnection(ws: WebSocket): void {
// Ack AFTER closure assignment — subsequent client messages // Ack AFTER closure assignment — subsequent client messages
// arriving immediately after will now see a non-null presenceId. // arriving immediately after will now see a non-null presenceId.
try { try {
ws.send( const ackPayload: Record<string, unknown> = {
JSON.stringify({ type: "hello_ack",
type: "hello_ack", presenceId: result.presenceId,
presenceId: result.presenceId, memberDisplayName: result.memberDisplayName,
memberDisplayName: result.memberDisplayName, };
}), if (result.restored) {
); ackPayload.restored = true;
if (result.lastSummary) ackPayload.lastSummary = result.lastSummary;
if (result.lastSeenAt) ackPayload.lastSeenAt = result.lastSeenAt;
if (result.restoredGroups) ackPayload.restoredGroups = result.restoredGroups;
if (result.restoredStats) ackPayload.restoredStats = result.restoredStats;
}
ws.send(JSON.stringify(ackPayload));
} catch { } catch {
/* ws closed during hello */ /* ws closed during hello */
} }
// Broadcast peer_joined to all other peers in the same mesh. // Broadcast peer_joined or peer_returned to all other peers in the same mesh.
const joinedConn = connections.get(presenceId); const joinedConn = connections.get(presenceId);
if (joinedConn) { if (joinedConn) {
const isReturning = !!result.restored;
const joinMsg: WSPushMessage = { const joinMsg: WSPushMessage = {
type: "push", type: "push",
subtype: "system", subtype: "system",
event: "peer_joined", event: isReturning ? "peer_returned" : "peer_joined",
eventData: { eventData: {
name: result.memberDisplayName, name: result.memberDisplayName,
pubkey: joinedConn.sessionPubkey ?? joinedConn.memberPubkey, pubkey: joinedConn.sessionPubkey ?? joinedConn.memberPubkey,
groups: joinedConn.groups, groups: joinedConn.groups,
...(isReturning ? {
lastSeenAt: result.lastSeenAt,
summary: result.lastSummary,
} : {}),
}, },
messageId: crypto.randomUUID(), messageId: crypto.randomUUID(),
meshId: joinedConn.meshId, meshId: joinedConn.meshId,
@@ -2480,6 +2625,10 @@ function handleConnection(ws: WebSocket): void {
description: mr.description, description: mr.description,
tools: mr.tools, tools: mr.tools,
hostedByName: conn.displayName, hostedByName: conn.displayName,
persistent: !!(mr as any).persistent,
online: true,
memberId: conn.memberId,
registeredAt: new Date().toISOString(),
}); });
sendToPeer(presenceId, { sendToPeer(presenceId, {
type: "mcp_register_ack", type: "mcp_register_ack",
@@ -2892,6 +3041,10 @@ function handleConnection(ws: WebSocket): void {
ws.on("close", async () => { ws.on("close", async () => {
if (presenceId) { if (presenceId) {
const conn = connections.get(presenceId); const conn = connections.get(presenceId);
// Persist peer state BEFORE removing from connections.
if (conn) {
await savePeerState(conn, conn.memberId, conn.meshId);
}
connections.delete(presenceId); connections.delete(presenceId);
if (conn) { if (conn) {
decMeshCount(conn.meshId); decMeshCount(conn.meshId);
@@ -2928,7 +3081,16 @@ function handleConnection(ws: WebSocket): void {
} }
// Clean up MCP servers registered by this peer // Clean up MCP servers registered by this peer
for (const [key, entry] of mcpRegistry) { for (const [key, entry] of mcpRegistry) {
if (entry.presenceId === presenceId) mcpRegistry.delete(key); if (entry.presenceId === presenceId) {
if (entry.persistent) {
// Keep persistent entries but mark offline
entry.online = false;
entry.offlineSince = new Date().toISOString();
entry.presenceId = "";
} else {
mcpRegistry.delete(key);
}
}
} }
// Auto-pause clock when mesh becomes empty // Auto-pause clock when mesh becomes empty
if (conn && !connectionsPerMesh.has(conn.meshId)) { if (conn && !connectionsPerMesh.has(conn.meshId)) {
@@ -3162,6 +3324,29 @@ function main(): void {
}), }),
); );
// Ensure peer_state table exists (CREATE TABLE IF NOT EXISTS)
db.execute(sql`
CREATE TABLE IF NOT EXISTS mesh.peer_state (
id TEXT PRIMARY KEY NOT NULL,
mesh_id TEXT NOT NULL REFERENCES mesh.mesh(id) ON DELETE CASCADE ON UPDATE CASCADE,
member_id TEXT NOT NULL REFERENCES mesh.member(id) ON DELETE CASCADE ON UPDATE CASCADE,
groups JSONB DEFAULT '[]',
profile JSONB DEFAULT '{}',
visible BOOLEAN NOT NULL DEFAULT true,
last_summary TEXT,
last_display_name TEXT,
cumulative_stats JSONB DEFAULT '{"messagesIn":0,"messagesOut":0,"toolCalls":0,"errors":0}',
last_seen_at TIMESTAMP,
created_at TIMESTAMP NOT NULL DEFAULT now(),
updated_at TIMESTAMP NOT NULL DEFAULT now(),
CONSTRAINT peer_state_mesh_member_idx UNIQUE (mesh_id, member_id)
)
`).catch((e) =>
log.warn("peer_state table creation failed", {
error: e instanceof Error ? e.message : String(e),
}),
);
// Recover persisted scheduled messages (cron + one-shot) from DB // Recover persisted scheduled messages (cron + one-shot) from DB
recoverScheduledMessages().catch((e) => recoverScheduledMessages().catch((e) =>
log.warn("scheduled message recovery failed on startup", { log.warn("scheduled message recovery failed on startup", {

View File

@@ -214,6 +214,16 @@ export interface WSHelloAckMessage {
type: "hello_ack"; type: "hello_ack";
presenceId: string; presenceId: string;
memberDisplayName: string; memberDisplayName: string;
/** True when the broker restored persisted state from a previous session. */
restored?: boolean;
/** Last summary set before disconnect (only when restored). */
lastSummary?: string;
/** ISO timestamp of last disconnect (only when restored). */
lastSeenAt?: string;
/** Restored groups from previous session (only when restored and hello had no groups). */
restoredGroups?: Array<{ name: string; role?: string }>;
/** Restored cumulative stats (only when restored). */
restoredStats?: { messagesIn: number; messagesOut: number; toolCalls: number; errors: number };
} }
/** Broker → client: list of connected peers in the same mesh. */ /** Broker → client: list of connected peers in the same mesh. */

View File

@@ -24,6 +24,22 @@ import type {
} from "./types"; } from "./types";
import type { BrokerClient, InboundPush } from "../ws/client"; import type { BrokerClient, InboundPush } from "../ws/client";
/** Compute a human-readable relative time string from an ISO timestamp. */
function relativeTime(isoStr: string): string {
const then = new Date(isoStr).getTime();
if (isNaN(then)) return "unknown";
const diffMs = Date.now() - then;
if (diffMs < 0) return "just now";
const seconds = Math.floor(diffMs / 1000);
if (seconds < 60) return `${seconds}s ago`;
const minutes = Math.floor(seconds / 60);
if (minutes < 60) return `${minutes}m ago`;
const hours = Math.floor(minutes / 60);
if (hours < 24) return `${hours}h ago`;
const days = Math.floor(hours / 24);
return `${days} day${days !== 1 ? "s" : ""} ago`;
}
function text(msg: string, isError = false) { function text(msg: string, isError = false) {
return { return {
content: [{ type: "text" as const, text: msg }], content: [{ type: "text" as const, text: msg }],
@@ -1352,6 +1368,14 @@ Your message mode is "${messageMode}".
content = `[heartbeat] tick ${tick} | sim time: ${simTime} | speed: x${speed}`; content = `[heartbeat] tick ${tick} | sim time: ${simTime} | speed: x${speed}`;
} else if (eventName === "peer_joined") { } else if (eventName === "peer_joined") {
content = `[system] Peer "${data.name ?? "unknown"}" joined the mesh`; content = `[system] Peer "${data.name ?? "unknown"}" joined the mesh`;
} else if (eventName === "peer_returned") {
const peerName = String(data.name ?? "unknown");
const lastSeenAt = data.lastSeenAt ? relativeTime(String(data.lastSeenAt)) : "unknown";
const groups = Array.isArray(data.groups)
? (data.groups as Array<{ name: string; role?: string }>).map((g) => g.role ? `@${g.name}:${g.role}` : `@${g.name}`).join(", ")
: "";
const summary = data.summary ? ` Summary: "${data.summary}"` : "";
content = `[system] Welcome back, "${peerName}"! Last seen ${lastSeenAt}.${groups ? ` Restored: ${groups}` : ""}${summary}`;
} else if (eventName === "peer_left") { } else if (eventName === "peer_left") {
content = `[system] Peer "${data.name ?? "unknown"}" left the mesh`; content = `[system] Peer "${data.name ?? "unknown"}" left the mesh`;
} else if (eventName === "mcp_registered") { } else if (eventName === "mcp_registered") {

View File

@@ -233,6 +233,22 @@ export class BrokerClient {
this.reconnectAttempt = 0; this.reconnectAttempt = 0;
this.flushOutbound(); this.flushOutbound();
this.startStatsReporting(); this.startStatsReporting();
// Restore cumulative stats from a previous session if available.
if (msg.restored) {
const groups = msg.restoredGroups
? (msg.restoredGroups as Array<{ name: string; role?: string }>).map((g) => g.role ? `@${g.name}:${g.role}` : `@${g.name}`).join(", ")
: "none";
process.stderr.write(
`[claudemesh] session restored — last seen ${msg.lastSeenAt ?? "unknown"}, groups: ${groups}\n`,
);
if (msg.restoredStats) {
const rs = msg.restoredStats as { messagesIn: number; messagesOut: number; toolCalls: number; errors: number };
this._statsCounters.messagesIn = rs.messagesIn ?? 0;
this._statsCounters.messagesOut = rs.messagesOut ?? 0;
this._statsCounters.toolCalls = rs.toolCalls ?? 0;
this._statsCounters.errors = rs.errors ?? 0;
}
}
resolve(); resolve();
return; return;
} }

164
docs/changelog-20260407.md Normal file
View File

@@ -0,0 +1,164 @@
# claudemesh — Implementation Changelog
**Sprint:** 2026-04-07 evening session
**Author:** Alejandro Gutiérrez + Claude (Opus 4.6)
**CLI versions:** 0.6.8 → 0.6.9 → 0.7.0
**Broker:** deployed to `ic.claudemesh.com` (Coolify, OVHcloud VPS)
---
## Features shipped
### 1. Session path (cwd) sharing
`810f372` · CLI 0.6.9 + broker
Added `cwd` to the WS hello handshake. Broker stores it in the peer record, `list_peers` returns it. Peers on the same machine see each other's working directories for direct file referencing.
### 2. Peer metadata (type, channel, model)
`810f372` · Same commit as cwd
Extended hello with `peerType: "ai" | "human" | "connector"`, `channel` (e.g. "claude-code", "telegram"), `model` (e.g. "opus-4"). Foundation for connectors, humans, and smart routing.
### 3. System notifications (peer join/leave)
`453705a` · broker + CLI
Broker broadcasts `{ subtype: "system", event: "peer_joined" | "peer_left" }` pushes to all mesh peers on connect/disconnect. MCP server formats them as `[system] Peer "Alice" joined the mesh`. System events bypass inbox/off message modes.
### 4. Cron-based persistent reminders
`e873807` · broker + CLI + `72be651` (--cron flag)
Replaced in-memory `setTimeout` with DB-persisted scheduler. Zero-dependency 5-field cron parser. Schedules survive broker restarts via `recoverScheduledMessages()` on boot. CLI: `claudemesh remind "check deploys" --cron "0 */2 * * *"`. MCP: `schedule_reminder` with `cron` field.
### 5. Simulation clock with time multiplier
`05d9b56` · broker + CLI
Per-mesh clock state (`MeshClock` interface + `meshClocks` Map). Configurable speed x1x100. Broadcasts heartbeat ticks as system pushes: `{ event: "tick", eventData: { tick, simTime, speed } }`. Auto-pauses when last peer disconnects. MCP tools: `mesh_set_clock`, `mesh_pause_clock`, `mesh_resume_clock`, `mesh_clock`.
### 6. Inbound webhooks
`b55cf26` · broker (new `webhooks.ts`) + CLI
`POST /hook/:meshId/:secret` → broker injects as push to all mesh peers. Webhooks stored in `meshWebhook` Drizzle table. MCP tools: `create_webhook` (returns URL+secret), `list_webhooks`, `delete_webhook`. Push format: `{ subtype: "webhook", event: "webhook_name", eventData: {...body} }`.
### 7. Slack connector
`5563f90` · `packages/connector-slack/`
Bridge process using `@slack/socket-mode` + `@slack/web-api`. Joins mesh as `peerType: "connector"`, `channel: "slack"`. Bidirectional relay with echo prevention, user ID-to-name resolution with caching, auto-reconnect with exponential backoff.
### 8. Telegram connector
`fe92853` · `packages/connector-telegram/`
Zero-dependency Telegram Bot API client using native `fetch` + long polling. Same bridge pattern as Slack. HTML formatting for Telegram output. Auto-reconnect with exponential backoff (1s30s).
### 9. Non-Claude-Code SDK
`7e102a2` · `packages/sdk/`
Standalone TypeScript SDK (`@claudemesh/sdk`). `MeshClient extends EventEmitter` with `connect()`, `send()`, `broadcast()`, `listPeers()`, `getState()`, `setState()`. Uses `libsodium-wrappers` for ed25519-to-curve25519 crypto_box encryption (same as CLI). Auto-reconnect with exponential backoff.
### 10. Mesh skills catalog
`c8cb1e3` · broker (Drizzle schema + handlers) + CLI
Peers publish reusable skills (name, description, instructions, tags). Full CRUD: `share_skill` (upsert by name), `get_skill`, `list_skills` (ILIKE search), `remove_skill`. Stored in `meshSkill` table with unique (meshId, name). `get_skill` returns instructions prominently formatted for immediate AI use.
### 11. Shared project files
`504111c` · broker relay + CLI file serving
Peer-to-peer file relay: `read_peer_file(peer, path)` and `list_peer_files(peer, path?, pattern?)`. Broker relays without reading content. Security: 1MB max, path traversal rejection, hidden files excluded, 2-level dir listing cap (500 entries). Plus hostname-based local/remote detection (`2c9c8c7`) and filesystem shortcut hint for local peers (`a92cf6b`).
### 12. Peer stats reporting
`b3b9972` · broker + CLI
Peers auto-report stats every 60s: messagesIn/Out, toolCalls, uptime, errors. `set_stats` WS message + `mesh_stats` MCP tool. Stats visible in `list_peers` response. Tool call counter incremented on every MCP invocation.
### 13. Signed audit log (hash chain)
`86a2583` · broker (new `audit.ts` + Drizzle schema)
SHA-256 hash-chained append-only log. Each entry hashes: `prevHash|meshId|eventType|actorMemberId|payload|createdAt`. Events logged: peer_joined, peer_left, state_set, message_sent (NO ciphertext). WS endpoints: `audit_query` (paginated), `audit_verify` (chain integrity check). On startup: `ensureAuditLogTable()` + `loadLastHashes()`.
### 14. Mesh templates
`69e93d4` · CLI (`apps/cli/src/templates/`)
5 JSON templates: dev-team, research, ops-incident, simulation, personal. Each defines groups, roles, state keys, and a system prompt hint. `claudemesh create --template dev-team` loads and displays template. `claudemesh create --list-templates` shows all.
### 15. Default personal mesh guidance
`b0dc538` · CLI (`install.ts`)
`claudemesh install` detects empty meshes and shows join guidance. Local-only mesh deferred (requires broker enrollment for real connectivity).
### 16. Mesh MCP proxy
`08e289a` · broker + CLI
Dynamic tool sharing: `mesh_mcp_register``mesh_mcp_list``mesh_tool_call` → broker forwards to hosting peer → execute → result back. In-memory registry with 30s call timeout. Auto-cleanup on disconnect. MCP register/unregister broadcasts system notifications (`e09671c`).
### 17. Dashboard: peer graph + state timeline + resource panel
`59332dc` (peer graph) + `7d432b3` (timeline + resources)
**Peer graph:** Radial SVG layout, animated bezier edges with priority colors, group rings, status indicators (green/amber/red), node sizing by activity. No external deps (pure SVG + CSS animations). `ResizeObserver` for responsive sizing.
**State timeline:** Vertical timeline of audit events with timestamps, icons, type badges. Newest-first with auto-scroll. Shares same TanStack Query cache (zero extra API calls).
**Resource panel:** 2x2 card grid — live peers, envelope breakdown, audit event frequency, session online/offline split.
### 18. Peer visibility + public profiles
Broker types.ts + index.ts + CLI
`set_visible(false)` makes peer invisible in `list_peers` and skips broadcast/group routing. Direct messages by pubkey still reach hidden peers. System events: `peer_visible`, `peer_hidden`. Public profiles: `set_profile({ avatar, title, bio, capabilities })` — visible to other peers in `list_peers` and peer graph.
### 19. Hostname + local/remote detection
`2c9c8c7` · broker + CLI
`os.hostname()` added to hello handshake. `list_peers` shows `[local]` or `[remote]` tag per peer. MCP instructions include file access decision guide: local → filesystem, remote <1MB → `read_peer_file`, large/persistent → `share_file`.
### 20. File access decision guide in MCP instructions
`3641618` · CLI MCP server
Clear decision guide in system instructions: three methods (filesystem for local, relay for remote, MinIO for persistent), with size limits and when to use each.
### 21. MCP server register/unregister broadcasts
`e09671c` · broker + CLI
When a peer registers or removes an MCP server, all mesh peers receive a system notification: `[system] New MCP server available: "github" (hosted by Alice). Tools: list_repos, create_issue. Use mesh_tool_call to invoke.`
---
## Also shipped (infrastructure / docs)
| Commit | What |
|--------|------|
| `0bb9d71` | Merged `schedule_reminder` + `send_later` into single tool with optional `to` param; added `subtype: "reminder"` to push |
| `79525af` | Fixed TSC error from cron example in JSDoc comment |
| `69e93d4` | Mesh templates: 5 JSON templates + `claudemesh create` command |
| `f34b8fb` | CLI `--help` text review: 44 descriptions improved for clarity, concision, consistency |
| `58ba01f` | `CLAUDEMESH_TOOLS` in install.ts synced (41→45 tools, sorted alphabetically) |
| `db2bf3e` | `protocol.md` expanded from 6 to 73 message types |
| `72be651` | `--cron` flag wired into citty remind command |
---
## CLI versions published
| Version | Key changes |
|---------|------------|
| 0.6.8 | schedule_reminder merge, reminder subtype |
| 0.6.9 | cwd + peer metadata + system notifications + cron + templates + --help review |
| 0.7.0 | Skills catalog, MCP proxy, shared files, visibility, sim clock, webhooks, peer stats, connectors, SDK |
---
## Pending (building)
- **Peer session persistence** — agent running, DB-backed state restore on reconnect
- **Persistent MCP registrations** — agent running, survive peer disconnect with online/offline status
---
## Remaining from vision (not yet built)
| # | Feature | Notes |
|---|---------|-------|
| 6 | REST API + external WS | Webhooks done, REST and WS auth remain |
| 8 | Humans in the mesh | Web chat panel needed |
| 14 | Bridge / federation | Bridge peer feasible now, federation needs design |
| 18 | Sandboxes (E2B) | Third-party integration preferred |
| 20 | Spatial topology (x,y proximity) | Visibility done, proximity model remains |
| 21 | Semantic peer search | Multi-field matching, half day |
| 22 | Mesh telemetry + debugging | Structured logging + reporting |

View File

@@ -1,407 +1,89 @@
# claudemesh — Vision & Feature Brainstorm # claudemesh — Vision & Roadmap
**Date:** 2026-04-07 23:01 CEST **Date:** 2026-04-07
**Author:** Alejandro Gutiérrez + Claude (Opus 4.6) **Author:** Alejandro Gutiérrez + Claude (Opus 4.6)
**Status:** Internal brainstorm — not committed to public roadmap **Last updated:** 2026-04-08 00:19 CEST
**Last updated:** 2026-04-08 00:09 CEST
--- ---
## Tier 1 — High impact, buildable now ## Shipped (2026-04-07)
### 1. Session path (pwd) sharing — DONE 21 features implemented in one session. Full details in [`changelog-20260407.md`](./changelog-20260407.md).
Add `cwd` to the WS hello handshake. Broker stores it in the peer record, `list_peers` returns it. Peers on the same machine see each other's working directories — lets AI reference files across sessions without guessing paths. | # | Feature | Commit |
|---|---------|--------|
**Effort:** 30 min. One field in hello + peer list. | 1 | Session path (cwd) sharing | `810f372` |
| 2 | Peer metadata (type/channel/model) | `810f372` |
> **Implemented:** 2026-04-07 23:30 · `810f372` · CLI 0.6.9 + broker deployed | 3 | System notifications (join/leave) | `453705a` |
| 4 | Cron-based persistent reminders | `e873807` |
### 2. Peer metadata: human vs AI, channel type, model — DONE | 5 | Simulation clock (x1x100) | `05d9b56` |
| 6 | Inbound webhooks | `b55cf26` |
Extend the hello handshake with `peerType: "ai" | "human" | "connector"`, `channel?: "claude-code" | "telegram" | "slack" | "web"`, `model?: "opus-4" | "sonnet-4" | "gpt-5" | ...`. Broker stores and broadcasts it. `list_peers` shows it. | 7 | Slack connector | `5563f90` |
| 8 | Telegram connector | `fe92853` |
**Why:** Foundation for connectors, human peers, and smart routing (send complex analysis to the Opus peer, quick tasks to Sonnet). | 9 | SDK (@claudemesh/sdk) | `7e102a2` |
| 10 | Mesh skills catalog | `c8cb1e3` |
**Effort:** 1 hour. | 11 | Shared project files (+ local/remote detection) | `504111c` + `2c9c8c7` |
| 12 | Peer stats reporting | `b3b9972` |
> **Implemented:** 2026-04-07 23:30 · `810f372` · Shipped with item 1 (same commit) | 13 | Signed audit log (SHA-256 hash chain) | `86a2583` |
| 14 | Mesh templates (5 presets) | `69e93d4` |
### 3. System notifications (join/leave/resource events) — DONE | 15 | Default mesh guidance on install | `b0dc538` |
| 16 | Mesh MCP proxy (dynamic tools) | `08e289a` |
Broker pushes system-level messages when peers connect/disconnect, files get shared, state changes, tasks get created. Same `subtype` pattern as reminders: `{ type: "push", subtype: "system", event: "peer_joined", ... }`. | 17 | Dashboard: peer graph + timeline + resources | `59332dc` + `7d432b3` |
| 18 | Peer visibility + public profiles | (types.ts/index.ts) |
**Why:** Mesh feels alive. AI can react to topology changes without polling. | 19 | Hostname + local/remote locality | `2c9c8c7` |
| 20 | MCP register/unregister broadcasts | `e09671c` |
**Effort:** 2 hours. | 21 | File access decision guide | `3641618` |
> **Implemented:** 2026-04-07 23:20 · `453705a` · peer_joined + peer_left broadcasts, system subtype in push
### 4. Cron-based reminders — DONE
Replace `setTimeout` with a persistent cron scheduler (broker-side). AI sends `schedule_reminder --cron "0 */2 * * *" --message "check deploy status"`. Broker uses `node-cron` or Drizzle-backed scheduler. Survives broker restarts.
**Why:** Current reminders die if the broker restarts. Cron syntax is already familiar to AI.
**Effort:** 2 hours (+ DB migration for persistence).
> **Implemented:** 2026-04-07 23:35 · `e873807` · DB-persisted schedules, zero-dep cron parser, restart recovery, `--cron` CLI flag
### 5. Heartbeats / session supervisor + simulation clock — DONE
**Keepalive layer:** WebSocket ping/pong for connection health. A CLI-side supervisor monitors the WS connection and relaunches Claude Code if it drops. Broker marks peers as disconnected on WS close.
**Simulation clock layer:** Heartbeats become a broker-driven clock that peers can subscribe to. The broker broadcasts periodic `{ subtype: "heartbeat", tick: 42, simTime: "2026-04-08T14:30:00Z", speed: "x10" }` messages at a configurable rate.
**Time multiplier for load testing:**
- `mesh_set_clock(speed: "x1")` — real-time, normal operation
- `mesh_set_clock(speed: "x10")` — 1 hour of simulated activity in 6 minutes
- `mesh_set_clock(speed: "x100")` — 1 day of simulated activity in ~15 minutes
**Use case — infrastructure stress testing:** Spawn 10 AI peers, each simulating a real user persona (sales rep, admin, customer). Set the clock to x10. Each peer receives heartbeat ticks and acts according to the simulated time: "it's 9am, log in and check dashboard", "it's 11am, process 5 orders", "it's 3pm, run reports". The infrastructure sees realistic usage patterns at 10x speed.
**What peers see:**
```
> mesh_clock()
Simulation clock: x10 | sim time: 2026-04-08 14:30 | tick: 42/480
> [heartbeat tick 43 — sim time: 14:36]
AI peer "Sales-Rep-1": creates 3 orders, searches inventory
AI peer "Admin-1": approves pending orders, checks stock levels
AI peer "Customer-1": browses catalog, adds to cart, checks out
```
**Components:**
- Broker: clock state + periodic broadcast to all peers
- MCP tools: `mesh_set_clock(speed)`, `mesh_clock()`, `mesh_pause_clock()`, `mesh_resume_clock()`
- Peer behavior: AI reads tick + simTime from heartbeat, decides actions based on its persona and the simulated time of day
- Reporting: broker collects action counts per tick, produces load profile after the run
**Why this is powerful:** Unlike synthetic load testers (k6, Locust), AI peers exercise the *full stack* — UI flows, API sequences, edge cases, realistic data entry. They find bugs that scripted tests miss because they improvise like real users.
**Effort:** 1 day (heartbeat + clock), 1 day (simulation framework + personas).
> **Implemented:** 2026-04-07 · `05d9b56` · Per-mesh clock state, configurable speed x1-x100, auto-pause on empty mesh, heartbeat ticks via system push
--- ---
## Tier 2 — Strong ideas, needs design ## Building now
### 6. Mesh webhooks / REST API / external WebSocket — PARTIAL (webhooks done) ### Peer session persistence ("welcome back")
Persist peer state (groups, profile, visibility, stats, summary) to DB on disconnect. Restore on reconnect with enriched `hello_ack`. System notification: "Welcome back, Alice! Last seen 2h ago."
Three surfaces for external integration: ### Persistent MCP registrations
MCP servers marked `persistent: true` survive peer disconnect. Marked "offline" instead of deleted. Auto-restored on reconnect. Calls to offline servers return descriptive error.
- **Inbound webhooks:** `POST https://ic.claudemesh.com/hook/<mesh-id>/<secret>` → broker injects as a push to all peers or a specific group. GitHub, CI/CD, monitoring alerts become mesh messages.
- **REST API:** Authenticated endpoints to send messages, read state, list peers from outside. Makes the mesh programmable from any language.
- **External WS:** Non-Claude clients connect via WS with an API key (not a session keypair). Same protocol, different auth.
**Prerequisite:** API keys per mesh (not ephemeral session keypairs).
**Effort:** Half day (webhooks alone), 2-3 days (full API surface).
> **Partial:** 2026-04-07 · `b55cf26` · Inbound webhooks implemented (POST /hook/:meshId/:secret → push to mesh). REST API and external WS remain.
### 7. Connectors: Slack, Telegram as peers — DONE
**Approach 1 — Connector-as-peer (recommended start):** A bridge process joins the mesh as a peer named "Slack-#general" and relays messages bidirectionally. Peers see it in `list_peers` with `peerType: "connector"`. One connector per channel.
**Approach 2 — Connector-as-router:** Broker-level integration — messages to `#slack:general` route through a registered connector. More elegant, but complex.
Ship as `claudemesh-connector-slack`, `claudemesh-connector-telegram`.
**Effort:** 1-2 days each.
> **Implemented:** 2026-04-07 · Slack: `5563f90` (Socket Mode, echo prevention, auto-reconnect) · Telegram: `fe92853` (zero-dep Bot API, long polling)
### 8. Humans in the mesh
Humans connect via the web dashboard or mobile app using the same WS protocol. `peerType: "human"` metadata tells AI to adjust communication style. The push system works natively in browsers (WS is bidirectional).
**Challenge:** UX. Humans need a chat interface with typing indicators, read receipts, message history — not raw JSON. The dashboard already exists at claudemesh.com; extend it with a chat panel.
**Effort:** 2-3 days (web chat panel).
### 9. Connecting non-Claude-Code AI — DONE
Any process that speaks the WS protocol can join. The barrier isn't the protocol — it's the MCP tool surface that makes Claude Code sessions first-class. For other LLMs:
- **SDK approach:** `npm install claudemesh-sdk` — a JS/Python library that handles WS connection, crypto, and message parsing. Wrap any LLM's function-calling interface around it.
- **Push delivery:** The push system works over WS. Non-Claude clients receive pushes the same way. The challenge is injecting them into the LLM's context — each platform has a different mechanism (OpenAI function results, Gemini tool responses, etc.).
- **Adapter pattern:** `claudemesh-adapter-openai`, `claudemesh-adapter-cursor`, etc.
**Effort:** 1 day (SDK), 1 day per adapter.
> **Implemented:** 2026-04-07 · `7e102a2` · `@claudemesh/sdk` — standalone TypeScript SDK with libsodium crypto_box, EventEmitter API, auto-reconnect
### 10. Mesh skills catalog — DONE
Peers publish skills: `share_skill({ name: "pdf-generation", description: "...", instructions: "..." })`. Other peers `list_skills()` and `get_skill("pdf-generation")` to load instructions into their context. Broker stores skills like memory/state.
**Why:** A mesh becomes a capability marketplace. One session installs a skill, all peers benefit. Skills can include tool definitions, system prompts, reference docs, and example workflows.
**This is the killer feature.** It turns claudemesh from a messaging layer into a knowledge-sharing platform.
**Effort:** 1 day.
> **Implemented:** 2026-04-07 · `c8cb1e3` · Full CRUD (share/get/list/remove), upsert by name, ILIKE search, Drizzle schema
### 11. Shared project files across peers — DONE
When a peer connects, it registers accessible paths (opt-in per directory). Other peers request files: `get_peer_file(peer: "Alice", path: "src/auth.ts")`. The owning peer reads the file and returns it over the mesh.
**Security scoping options:**
- Opt-in per directory: `claudemesh launch --share-dir ./src`
- Same-machine only (detect via hostname/IP)
- Approval per request
**Effort:** 1 day.
> **Implemented:** 2026-04-07 · `504111c` · Broker relay (never reads content), CLI file serving with 1MB cap, path traversal rejection, hidden files excluded, 2-level dir listing. Plus hostname-based local/remote detection (`2c9c8c7`) and filesystem shortcut hint (`a92cf6b`).
### 12. Peer stats (context consumption, token usage) — DONE
Peers self-report: `set_status` extended with `contextUsed: 85000, contextMax: 200000, tokensIn: 12000, tokensOut: 8000`. Dashboard shows burn rate. Useful for load balancing — route work to the peer with the most context headroom.
**Limitation:** Claude Code doesn't expose context usage via API. Would need estimation from conversation length or `/cost` command parsing.
**Effort:** Half day (reporting infrastructure), unknown (accurate context measurement).
> **Implemented:** 2026-04-07 · `b3b9972` · Auto-reporting every 60s (messagesIn/Out, toolCalls, uptime, errors), mesh_stats MCP tool, stats in list_peers
--- ---
## Tier 3 — Big bets, needs careful thought ## Remaining — not yet built
### 13. Mesh blockchain / signed audit log — DONE (audit log) ### Humans in the mesh
Web chat panel on claudemesh.com/dashboard. Humans connect via WS with `peerType: "human"`. Need: typing indicators, read receipts, message history UI.
**Honest assessment:** A full blockchain is overkill for a cooperative mesh. What's actually valuable is the useful parts:
- **Signed append-only log:** Immutable record of all decisions, state changes, and messages. Merkle tree integrity. Useful for compliance, debugging, and "who decided what."
- **Conflict resolution:** Vector clocks or CRDTs for state, instead of last-write-wins.
- **Reputation:** Track which peers deliver on tasks, respond promptly, produce quality work.
**Reframe as:** Signed audit trail with integrity proofs. Not a blockchain, but the valuable properties of one.
**Effort:** 3-5 days.
> **Implemented:** 2026-04-07 · `86a2583` · SHA-256 hash chain audit log, append-only, no message content logged, chain verification endpoint, paginated query
### 14. Mesh of meshes / bridge
A meta-broker that routes between meshes. Use case: `dev-team` mesh and `ops-team` mesh coordinate on deploys.
**Simple version:** A bridge peer joins both meshes and relays tagged messages. No broker changes needed. Already feasible with today's protocol.
**Federation version:** Broker-to-broker peering protocol. Brokers exchange presence and route ciphertext across organizations.
**Effort:** 1 day (bridge peer), 1-2 weeks (federation protocol).
### 15. Mesh templates on creation — DONE
Predefined mesh configurations: roles, groups, state keys, system prompts, skills, and governance rules. Examples:
- `dev-team`: @frontend, @backend, @devops groups; lead/member roles; state keys for sprint/deploy-frozen
- `research`: @analysis, @writing groups; shared memory focus; context-sharing optimized
- `ops-incident`: @oncall, @comms groups; high-urgency defaults; auto-escalation rules
Templates are JSON files. `claudemesh create --template dev-team` applies them at mesh creation. Templates are editable post-creation by mesh admin (or anyone, depending on governance).
**Effort:** Half day.
> **Implemented:** 2026-04-07 · `69e93d4` · 5 templates (dev-team, research, ops-incident, simulation, personal) + `claudemesh create` command
### 16. Default private mesh per user — DONE
On `claudemesh install`, auto-create a personal mesh with the user as sole member. All their Claude Code sessions join by default. Zero-config — instant value without understanding meshes.
**Effort:** Half day.
> **Implemented:** 2026-04-07 · `b0dc538` · Install detects empty meshes, shows join guidance. Local-only mesh deferred (requires broker enrollment).
### 17. Mesh MCP proxy (dynamic tools without session restart) — DONE
**Problem:** Claude Code loads MCP servers at startup. You can't inject new tool definitions into a running session.
**Solution:** Route through the existing claudemesh MCP connection. A generic `mesh_tool_call` tool proxies to MCP servers registered in the mesh at runtime — no restart needed.
**Flow:**
1. A peer registers an MCP server: `mesh_mcp_register(name: "github", transport: "stdio", command: "npx @github/mcp")`
2. Broker stores the registration
3. Any peer calls `mesh_tool_call(server: "github", tool: "list_repos", args: {...})`
4. Broker routes to the hosting peer or a shared sidecar process
5. That host invokes the actual MCP server, returns the result through the mesh
6. Calling peer gets the response — all through the existing claudemesh WS connection
**Two hosting models:**
- **Peer-hosted:** The registering peer runs the MCP server locally. Other peers proxy through them. If that peer disconnects, the MCP goes offline.
- **Broker-hosted:** The broker spawns the MCP server as a sidecar. Always available. Better for shared tools (database, GitHub, Jira).
**What AI sees:**
```
> mesh_mcp_list()
Available mesh MCP servers:
- github (hosted by: Alice) — tools: list_repos, create_issue, ...
- jira (hosted by: broker) — tools: search_issues, create_ticket, ...
- postgres-prod (hosted by: broker) — tools: query, execute
> mesh_tool_call(server: "github", tool: "create_issue", args: {repo: "...", title: "..."})
Issue #42 created.
```
**Limitation:** Claude Code won't see these as first-class tools in its tool list — AI needs to know to use `mesh_tool_call`. MCP server instructions document the proxy pattern.
**New MCP tools needed:** `mesh_mcp_register`, `mesh_mcp_list`, `mesh_tool_call`, `mesh_mcp_remove`
**Effort:** 2-3 days. **Effort:** 2-3 days.
> **Implemented:** 2026-04-07 · `08e289a` · Full round-trip: register → list → call → forward → execute → result. In-memory registry, 30s call timeout, auto-cleanup on disconnect. ### REST API + external WebSocket
Authenticated endpoints to send messages, read state, list peers from outside the mesh. API keys per mesh (not session keypairs). External WS: non-Claude clients connect with API key auth.
### 18. Sandbox for code execution **Effort:** 2-3 days. (Webhooks already done.)
Each mesh gets optional compute sandboxes (Docker containers, Firecracker VMs, or E2B-style). Peers request: `execute_code(lang: "python", code: "...")`. Broker provisions a sandbox, runs the code, returns stdout/stderr. Resources scale on demand as peers need sandboxes. ### Bridge / federation
**Simple:** A bridge peer joins two meshes and relays tagged messages. Feasible now with the SDK.
**Federation:** Broker-to-broker peering protocol. Needs design.
**Build vs integrate:** **Effort:** 1 day (bridge), 1-2 weeks (federation).
- **Build:** Docker-in-Docker on the broker host. Simple but security-sensitive.
- **Integrate:** E2B, Modal, or Fly Machines as the sandbox backend. claudemesh MCP tool is a thin client. Scales naturally.
**Effort:** 2-3 days (E2B integration), 1-2 weeks (self-hosted sandboxes). ### Sandboxes for code execution
Per-mesh compute sandboxes. Peers request: `execute_code(lang: "python", code: "...")`. Prefer third-party integration (E2B, Modal, Fly Machines) over self-hosted.
### 19. Mesh dashboard (real-time situational awareness) — DONE **Effort:** 2-3 days (E2B), 1-2 weeks (self-hosted).
Live web UI at claudemesh.com/dashboard showing: ### Spatial topology (proximity-based visibility)
- **Peer graph:** Who's connected, status, groups, roles — nodes and edges Extend visibility with `(x, y)` coordinates and visibility radius. Peers only see others within range. Combined with sim clock, enables spatial simulations (customer walks into store zone, sees sales reps).
- **Message flow:** Animated edges showing real-time traffic between peers
- **State/memory timeline:** When values changed and who changed them
- **Resource panel:** Files shared, tasks active, skills available
- **Peer detail:** Click a peer → see summary, context usage, message history
Broker already tracks everything needed. Dashboard subscribes via WS and renders with D3/React. **Effort:** 1 day.
**Effort:** 2-3 days (functional), 1 week (polished). ### Semantic peer search
`search_peers(query, filters?)` — multi-field matching across names, groups, roles, summaries, profile capabilities, skills. Ranked results. For meshes with 50+ peers.
> **Implemented:** 2026-04-07 · `59332dc` peer graph (radial SVG, animated edges, group rings) + `7d432b3` state timeline + resource panel. Peer detail view remains.
### 20. Peer visibility and spatial topology — DONE (visibility + profiles)
Control which peers can see each other. Instead of a flat mesh where everyone sees everyone, the broker filters `list_peers` responses and message routing based on visibility rules.
**Three visibility models:**
- **Proximity-based (simulation):** Each peer has coordinates `(x, y)` and a visibility radius. Only peers within range appear in `list_peers`. `set_position(x, y)` changes who you can see — spatial fog of war. Combined with the simulation clock, this creates emergent behavior: a "customer" peer walks into a "store zone", suddenly sees "sales rep" peers, initiates interaction.
- **Scope-based (organizational):** Visibility follows group membership. Peers in `@frontend` see each other and `@leads`, but not `@backend` internals. Org-chart visibility without exposing every department.
- **Manual/dynamic:** Peers or admins explicitly show/hide. `set_visible(false)` to go stealth (connected but invisible). Admin can force visibility/invisibility.
**Who controls visibility:**
- **Broker rules** — mesh-wide policy set at creation or via template (e.g., "proximity" mode for simulations, "scope" for orgs)
- **Peer self-control** — `set_visible(false)` to go stealth, `set_position(x, y)` to move in proximity mode
- **Admin override** — mesh admin force-shows or force-hides peers
- **Dynamic conditions** — broker changes visibility based on state keys, clock ticks, or events
**Notifications:** Peers receive `{ subtype: "system", event: "peer_visible" }` when a new peer enters their visibility and `peer_hidden` when one leaves. Different from join/leave — the peer is still connected, just not visible to you.
**Peer public profile (outside image):** Each peer has a public-facing profile that other peers see — a curated view separate from internal state. Fields: `avatar` (emoji or URL), `title` (short role label), `bio` (one-liner), `capabilities` (what I can help with). Set via `set_profile({ avatar: "🔧", title: "DevOps Lead", bio: "Infrastructure and deploys" })`. This is what appears on the peer graph node and in `list_peers`. Peers choose how they present themselves to the mesh.
**MCP tools:** `set_visible(visible)`, `set_position(x, y)`, `set_profile(profile)`, `get_visible_peers()`, `set_visibility_mode(mode)` (admin only)
**Effort:** 2-3 days.
> **Partial:** 2026-04-07 · Visibility toggle (set_visible), public profiles (set_profile), hidden peer filtering in list_peers, peer_visible/peer_hidden system events, direct messages still reach hidden peers. Remaining: proximity-based (x,y coordinates), scope-based (group visibility rules).
### 21. Semantic peer search
In large meshes (50+ peers), scanning `list_peers` output is noise. A `search_peers` tool that filters and ranks by multiple dimensions:
- **Structured filters:** name, group, role, status, peerType, channel, model, cwd
- **Free-text search:** matches against peer summaries, profile bios, capabilities, and shared skills
- **Capability matching:** "find a peer that knows about database migrations" searches across profile capabilities + skills catalog + recent summaries
- **Ranking:** peers with more matching dimensions rank higher; active (idle/working) peers rank above DND/offline
**MCP tool:** `search_peers(query, filters?)` — returns a ranked list of matching peers with relevance scores.
**Implementation:** Broker-side — accepts a `search_peers` message, runs multi-field matching against the in-memory peer list + skills table. No external search engine needed for <500 peers; for larger meshes, wire into the existing Qdrant vector store (already available via `vector_search`).
**Effort:** Half day. **Effort:** Half day.
### 22. Mesh telemetry and debugging ### Mesh telemetry and debugging
Structured logging: `mesh_log(level, message, data?)`. Queryable: `mesh_logs(query?, peer?, level?, last?)`. Aggregated reports: `mesh_report(timeframe?)`. AI self-analysis for continuous improvement.
A structured logging system where peers report errors, warnings, and debug info to the broker. Goes beyond the audit log (which tracks events) — this tracks operational health.
**What peers report:**
- Errors: tool failures, connection drops, unhandled exceptions
- Warnings: high context usage, slow responses, retry patterns
- Debug: decision traces, task reasoning, why a particular approach was chosen
- Performance: response latency per tool call, message round-trip times
**Broker storage:** Structured logs indexed by mesh, peer, timestamp, severity. Retained for N days (configurable). Queryable via WS messages.
**AI self-analysis:** Peers query their own logs to identify patterns: "I've hit this error 3 times in the last hour — what's common?" The mesh becomes self-diagnosing. Leads can query team-wide logs: "Which peers are seeing errors in the deploy flow?"
**Reporting:** Aggregated metrics per peer, per mesh, per time window. Error rates, common failure modes, response time percentiles. Surfaced in the dashboard or via `mesh_report(timeframe: "24h")`.
**MCP tools:**
- `mesh_log(level, message, data?)` — report a log entry
- `mesh_logs(query?, peer?, level?, last?)` — query logs
- `mesh_report(timeframe?)` — aggregated health report
**Effort:** 1-2 days. **Effort:** 1-2 days.
### 23. Peer session persistence ("welcome back")
When a peer disconnects, their state is lost (groups, profile, visibility, stats, summary). On reconnect they start blank. Persist peer state so returning peers resume where they left off.
**What persists (keyed by meshId + memberId):**
- Groups and roles
- Profile (avatar, title, bio, capabilities)
- Visibility setting
- Last summary
- Cumulative stats (messages, tool calls across all sessions)
- Last seen timestamp
**What resets:** status (always "idle" on connect), WebSocket/presenceId (ephemeral).
**Reconnect flow:**
1. Peer sends hello with same `memberId`
2. Broker looks up `peer_state` table for (meshId, memberId)
3. If found: restore groups, profile, visibility, stats — hello fields take precedence if explicitly set
4. Enriched `hello_ack` includes `restored: true` and previous summary
5. System notification: `"Welcome back, Alice! Last seen 2h ago. Restored: @frontend:lead, @devops:member"`
6. On disconnect: upsert current state to `peer_state`
**Why:** AI sessions restart often (context limits, crashes, new tasks). Without persistence, every reconnect requires manual group joins and profile setup. With it, the mesh remembers who you are.
**Effort:** Half day.
--- ---
## Suggested build order *Priorities shift as we build and learn. Bridge and humans are the highest-value remaining items.*
| # | Feature | Effort | Unlocks | Status |
|---|---------|--------|---------|--------|
| 1 | Session path sharing | 30 min | File referencing across sessions | **DONE** `810f372` |
| 2 | Peer metadata (type/channel/model) | 1 hour | Connectors, humans, smart routing | **DONE** `810f372` |
| 3 | System notifications | 2 hours | Reactive mesh, awareness | **DONE** `453705a` |
| 4 | Cron reminders | 2 hours | Persistent scheduling | **DONE** `e873807` |
| 5 | Mesh templates | Half day | Better onboarding | **DONE** `69e93d4` |
| 6 | Default personal mesh | Half day | Zero-config start | **DONE** `b0dc538` |
| 7 | Inbound webhooks | Half day | External integrations | **DONE** `b55cf26` |
| 8 | Skills catalog | 1 day | Knowledge marketplace | **DONE** `c8cb1e3` |
| 9 | Shared project files | 1 day | Cross-session file access | **DONE** `504111c` |
| 10 | Slack connector | 1-2 days | Reach beyond Claude Code | **DONE** `5563f90` |
| 11 | Mesh MCP proxy | 2-3 days | Dynamic tools without restart | **DONE** `08e289a` |
| 12 | Dashboard (real-time) | 2-3 days | Visual situational awareness | **DONE** `59332dc` + `7d432b3` |
| 13 | Human peers (web chat) | 2-3 days | Humans in the loop | |
| 14 | Simulation clock (heartbeat x1-x100) | 2 days | AI-driven load testing | **DONE** `05d9b56` |
| 15 | Sandboxes (E2B) | 2-3 days | Shared compute | |
| 16 | Signed audit log | 3-5 days | Trust, compliance | **DONE** `86a2583` |
| 17 | Bridge / federation | 1-2 weeks | Multi-mesh coordination | |
| 18 | Peer visibility + profiles | 2-3 days | Simulation fog-of-war, org scoping | **DONE** (types.ts/index.ts) |
| 19 | Semantic peer search | Half day | Discovery in large meshes | |
| 20 | Peer stats reporting | Half day | Resource awareness, load balancing | **DONE** `b3b9972` |
| 21 | SDK (@claudemesh/sdk) | 1 day | Non-Claude-Code clients | **DONE** `7e102a2` |
| 22 | Telegram connector | 1-2 days | Reach beyond Claude Code | **DONE** `fe92853` |
| 23 | Mesh telemetry + debugging | 1-2 days | Self-diagnosing mesh | |
| 24 | Peer session persistence | Half day | "Welcome back" on reconnect | |
---
*This document captures a brainstorming session. Items are not commitments. Priorities will shift as we build and learn.*

View File

@@ -0,0 +1,16 @@
-- Peer session persistence: save state on disconnect, restore on reconnect.
CREATE TABLE IF NOT EXISTS mesh.peer_state (
id TEXT PRIMARY KEY NOT NULL,
mesh_id TEXT NOT NULL REFERENCES mesh.mesh(id) ON DELETE CASCADE ON UPDATE CASCADE,
member_id TEXT NOT NULL REFERENCES mesh.member(id) ON DELETE CASCADE ON UPDATE CASCADE,
groups JSONB DEFAULT '[]',
profile JSONB DEFAULT '{}',
visible BOOLEAN NOT NULL DEFAULT true,
last_summary TEXT,
last_display_name TEXT,
cumulative_stats JSONB DEFAULT '{"messagesIn":0,"messagesOut":0,"toolCalls":0,"errors":0}',
last_seen_at TIMESTAMP,
created_at TIMESTAMP NOT NULL DEFAULT now(),
updated_at TIMESTAMP NOT NULL DEFAULT now(),
CONSTRAINT peer_state_mesh_member_idx UNIQUE (mesh_id, member_id)
);

View File

@@ -731,6 +731,53 @@ export const insertMeshStreamSchema = createInsertSchema(meshStream);
export type SelectMeshStream = typeof meshStream.$inferSelect; export type SelectMeshStream = typeof meshStream.$inferSelect;
export type InsertMeshStream = typeof meshStream.$inferInsert; export type InsertMeshStream = typeof meshStream.$inferInsert;
/**
* Persisted peer session state. Survives disconnects — when a peer
* reconnects (same meshId + memberId), the broker restores groups,
* profile, visibility, summary, and cumulative stats automatically.
* Keyed by (meshId, memberId) — one row per member per mesh.
*/
export const peerState = meshSchema.table(
"peer_state",
{
id: text().primaryKey().notNull().$defaultFn(generateId),
meshId: text()
.references(() => mesh.id, { onDelete: "cascade", onUpdate: "cascade" })
.notNull(),
memberId: text()
.references(() => meshMember.id, { onDelete: "cascade", onUpdate: "cascade" })
.notNull(),
groups: jsonb().$type<Array<{ name: string; role?: string }>>().default([]),
profile: jsonb().$type<{ avatar?: string; title?: string; bio?: string; capabilities?: string[] }>().default({}),
visible: boolean().notNull().default(true),
lastSummary: text(),
lastDisplayName: text(),
cumulativeStats: jsonb().$type<{ messagesIn: number; messagesOut: number; toolCalls: number; errors: number }>().default({ messagesIn: 0, messagesOut: 0, toolCalls: 0, errors: 0 }),
lastSeenAt: timestamp(),
createdAt: timestamp().defaultNow().notNull(),
updatedAt: timestamp().defaultNow().notNull(),
},
(table) => [
uniqueIndex("peer_state_mesh_member_idx").on(table.meshId, table.memberId),
],
);
export const peerStateRelations = relations(peerState, ({ one }) => ({
mesh: one(mesh, {
fields: [peerState.meshId],
references: [mesh.id],
}),
member: one(meshMember, {
fields: [peerState.memberId],
references: [meshMember.id],
}),
}));
export const selectPeerStateSchema = createSelectSchema(peerState);
export const insertPeerStateSchema = createInsertSchema(peerState);
export type SelectPeerState = typeof peerState.$inferSelect;
export type InsertPeerState = typeof peerState.$inferInsert;
export const meshSkillRelations = relations(meshSkill, ({ one }) => ({ export const meshSkillRelations = relations(meshSkill, ({ one }) => ({
mesh: one(mesh, { mesh: one(mesh, {
fields: [meshSkill.meshId], fields: [meshSkill.meshId],