2 Commits

Author SHA1 Message Date
Alejandro Gutiérrez
56b1cc0756 docs: split vision into changelog + clean roadmap
Some checks failed
CI / Lint (push) Has been cancelled
CI / Typecheck (push) Has been cancelled
CI / Broker tests (Postgres) (push) Has been cancelled
CI / Docker build (linux/amd64) (push) Has been cancelled
changelog-20260407.md: full implementation details for 21 features
vision-20260407.md: slimmed to shipped summary + remaining items

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-08 00:20:55 +01:00
Alejandro Gutiérrez
fc8a7edc23 feat: persist peer session state across disconnects ("welcome back" on reconnect)
Save groups, profile, visibility, summary, display name, and cumulative
stats to a new mesh.peer_state table on disconnect. On reconnect (same
meshId + memberId), restore them automatically — hello groups take
precedence over stored groups if provided. Broadcast peer_returned
system event with last-seen time and summary to other peers.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-08 00:20:20 +01:00
8 changed files with 533 additions and 389 deletions

View File

@@ -15,10 +15,10 @@
import { createServer, type IncomingMessage, type ServerResponse } from "node:http";
import type { Duplex } from "node:stream";
import { WebSocketServer, type WebSocket } from "ws";
import { and, eq, sql } from "drizzle-orm";
import { and, eq, isNull, sql } from "drizzle-orm";
import { env } from "./env";
import { db } from "./db";
import { messageQueue, scheduledMessage as scheduledMessageTable, meshWebhook } from "@turbostarter/db/schema/mesh";
import { messageQueue, scheduledMessage as scheduledMessageTable, meshWebhook, peerState } from "@turbostarter/db/schema/mesh";
import {
claimTask,
completeTask,
@@ -179,7 +179,7 @@ function makeClockStatus(clock: MeshClock, reqId?: string): WSServerMessage {
} as WSServerMessage;
}
// --- MCP proxy registry (in-memory, ephemeral) ---
// --- MCP proxy registry (in-memory, persistent-capable) ---
interface McpRegisteredServer {
meshId: string;
presenceId: string;
@@ -187,6 +187,11 @@ interface McpRegisteredServer {
description: string;
tools: Array<{ name: string; description: string; inputSchema: Record<string, unknown> }>;
hostedByName: string;
persistent: boolean;
online: boolean;
memberId: string;
registeredAt: string;
offlineSince?: string;
}
/** Keyed by "meshId:serverName" */
const mcpRegistry = new Map<string, McpRegisteredServer>();
@@ -858,6 +863,118 @@ function sendError(
}
}
// --- Peer state persistence ---
async function savePeerState(conn: PeerConn, memberId: string, meshId: string): Promise<void> {
try {
// Read existing cumulative stats to merge
const existing = await db
.select()
.from(peerState)
.where(and(eq(peerState.meshId, meshId), eq(peerState.memberId, memberId)))
.limit(1);
const prev = existing[0]?.cumulativeStats as { messagesIn: number; messagesOut: number; toolCalls: number; errors: number } | null;
const sessionStats = conn.stats ?? {};
const cumulative = {
messagesIn: (prev?.messagesIn ?? 0) + (sessionStats.messagesIn ?? 0),
messagesOut: (prev?.messagesOut ?? 0) + (sessionStats.messagesOut ?? 0),
toolCalls: (prev?.toolCalls ?? 0) + (sessionStats.toolCalls ?? 0),
errors: (prev?.errors ?? 0) + (sessionStats.errors ?? 0),
};
const now = new Date();
await db
.insert(peerState)
.values({
meshId,
memberId,
groups: conn.groups,
profile: conn.profile,
visible: conn.visible,
lastSummary: null, // will be set below if presence has a summary
lastDisplayName: conn.displayName,
cumulativeStats: cumulative,
lastSeenAt: now,
createdAt: now,
updatedAt: now,
})
.onConflictDoUpdate({
target: [peerState.meshId, peerState.memberId],
set: {
groups: conn.groups,
profile: conn.profile,
visible: conn.visible,
lastDisplayName: conn.displayName,
cumulativeStats: cumulative,
lastSeenAt: now,
updatedAt: now,
},
});
// Persist the summary from the presence row (it's set via setSummary, not on conn)
const { presence } = await import("@turbostarter/db/schema/mesh");
const presRows = await db
.select({ summary: presence.summary })
.from(presence)
.where(and(eq(presence.memberId, memberId), isNull(presence.disconnectedAt)))
.limit(1);
if (presRows[0]?.summary) {
await db
.update(peerState)
.set({ lastSummary: presRows[0].summary, updatedAt: now })
.where(and(eq(peerState.meshId, meshId), eq(peerState.memberId, memberId)));
}
} catch (e) {
log.warn("failed to save peer state", {
mesh_id: meshId,
member_id: memberId,
error: e instanceof Error ? e.message : String(e),
});
}
}
async function restorePeerState(
meshId: string,
memberId: string,
): Promise<{
restored: boolean;
groups?: Array<{ name: string; role?: string }>;
profile?: { avatar?: string; title?: string; bio?: string; capabilities?: string[] };
visible?: boolean;
lastSummary?: string;
lastDisplayName?: string;
cumulativeStats?: { messagesIn: number; messagesOut: number; toolCalls: number; errors: number };
lastSeenAt?: Date;
} | null> {
try {
const rows = await db
.select()
.from(peerState)
.where(and(eq(peerState.meshId, meshId), eq(peerState.memberId, memberId)))
.limit(1);
if (!rows[0]) return null;
const row = rows[0];
return {
restored: true,
groups: row.groups as Array<{ name: string; role?: string }> ?? [],
profile: row.profile as { avatar?: string; title?: string; bio?: string; capabilities?: string[] } ?? {},
visible: row.visible,
lastSummary: row.lastSummary ?? undefined,
lastDisplayName: row.lastDisplayName ?? undefined,
cumulativeStats: row.cumulativeStats as { messagesIn: number; messagesOut: number; toolCalls: number; errors: number } ?? undefined,
lastSeenAt: row.lastSeenAt ?? undefined,
};
} catch (e) {
log.warn("failed to restore peer state", {
mesh_id: meshId,
member_id: memberId,
error: e instanceof Error ? e.message : String(e),
});
return null;
}
}
async function handleHello(
ws: WebSocket,
hello: Extract<WSClientMessage, { type: "hello" }>,
@@ -902,7 +1019,14 @@ async function handleHello(
ws.close(1008, "unauthorized");
return null;
}
const initialGroups = hello.groups ?? [];
// Attempt to restore persisted state from a previous session.
const saved = await restorePeerState(hello.meshId, member.id);
const helloHasGroups = hello.groups && hello.groups.length > 0;
// Hello groups take precedence; fall back to restored groups.
const initialGroups = helloHasGroups
? hello.groups!
: (saved?.groups ?? []);
const presenceId = await connectPresence({
memberId: member.id,
sessionId: hello.sessionId,
@@ -926,26 +1050,36 @@ async function handleHello(
channel: hello.channel,
model: hello.model,
groups: initialGroups,
visible: true,
profile: {},
visible: saved?.visible ?? true,
profile: saved?.profile ?? {},
});
incMeshCount(hello.meshId);
void audit(hello.meshId, "peer_joined", member.id, effectiveDisplayName, {
pubkey: hello.pubkey,
groups: initialGroups,
restored: !!saved,
});
log.info("ws hello", {
mesh_id: hello.meshId,
member: effectiveDisplayName,
presence_id: presenceId,
session_id: hello.sessionId,
restored: !!saved,
});
// Drain any queued messages in the background. The hello_ack is
// sent by the CALLER after it assigns presenceId — sending it here
// races the caller's closure assignment, causing subsequent client
// messages to fail the "no_hello" check.
void maybePushQueuedMessages(presenceId);
return { presenceId, memberDisplayName: effectiveDisplayName };
return {
presenceId,
memberDisplayName: effectiveDisplayName,
restored: saved ? true : undefined,
lastSummary: saved?.lastSummary,
lastSeenAt: saved?.lastSeenAt?.toISOString(),
restoredGroups: (!helloHasGroups && saved?.groups?.length) ? saved.groups : undefined,
restoredStats: saved?.cumulativeStats,
};
}
async function handleSend(
@@ -1056,27 +1190,38 @@ function handleConnection(ws: WebSocket): void {
// Ack AFTER closure assignment — subsequent client messages
// arriving immediately after will now see a non-null presenceId.
try {
ws.send(
JSON.stringify({
type: "hello_ack",
presenceId: result.presenceId,
memberDisplayName: result.memberDisplayName,
}),
);
const ackPayload: Record<string, unknown> = {
type: "hello_ack",
presenceId: result.presenceId,
memberDisplayName: result.memberDisplayName,
};
if (result.restored) {
ackPayload.restored = true;
if (result.lastSummary) ackPayload.lastSummary = result.lastSummary;
if (result.lastSeenAt) ackPayload.lastSeenAt = result.lastSeenAt;
if (result.restoredGroups) ackPayload.restoredGroups = result.restoredGroups;
if (result.restoredStats) ackPayload.restoredStats = result.restoredStats;
}
ws.send(JSON.stringify(ackPayload));
} catch {
/* ws closed during hello */
}
// Broadcast peer_joined to all other peers in the same mesh.
// Broadcast peer_joined or peer_returned to all other peers in the same mesh.
const joinedConn = connections.get(presenceId);
if (joinedConn) {
const isReturning = !!result.restored;
const joinMsg: WSPushMessage = {
type: "push",
subtype: "system",
event: "peer_joined",
event: isReturning ? "peer_returned" : "peer_joined",
eventData: {
name: result.memberDisplayName,
pubkey: joinedConn.sessionPubkey ?? joinedConn.memberPubkey,
groups: joinedConn.groups,
...(isReturning ? {
lastSeenAt: result.lastSeenAt,
summary: result.lastSummary,
} : {}),
},
messageId: crypto.randomUUID(),
meshId: joinedConn.meshId,
@@ -2480,6 +2625,10 @@ function handleConnection(ws: WebSocket): void {
description: mr.description,
tools: mr.tools,
hostedByName: conn.displayName,
persistent: !!(mr as any).persistent,
online: true,
memberId: conn.memberId,
registeredAt: new Date().toISOString(),
});
sendToPeer(presenceId, {
type: "mcp_register_ack",
@@ -2892,6 +3041,10 @@ function handleConnection(ws: WebSocket): void {
ws.on("close", async () => {
if (presenceId) {
const conn = connections.get(presenceId);
// Persist peer state BEFORE removing from connections.
if (conn) {
await savePeerState(conn, conn.memberId, conn.meshId);
}
connections.delete(presenceId);
if (conn) {
decMeshCount(conn.meshId);
@@ -2928,7 +3081,16 @@ function handleConnection(ws: WebSocket): void {
}
// Clean up MCP servers registered by this peer
for (const [key, entry] of mcpRegistry) {
if (entry.presenceId === presenceId) mcpRegistry.delete(key);
if (entry.presenceId === presenceId) {
if (entry.persistent) {
// Keep persistent entries but mark offline
entry.online = false;
entry.offlineSince = new Date().toISOString();
entry.presenceId = "";
} else {
mcpRegistry.delete(key);
}
}
}
// Auto-pause clock when mesh becomes empty
if (conn && !connectionsPerMesh.has(conn.meshId)) {
@@ -3162,6 +3324,29 @@ function main(): void {
}),
);
// Ensure peer_state table exists (CREATE TABLE IF NOT EXISTS)
db.execute(sql`
CREATE TABLE IF NOT EXISTS mesh.peer_state (
id TEXT PRIMARY KEY NOT NULL,
mesh_id TEXT NOT NULL REFERENCES mesh.mesh(id) ON DELETE CASCADE ON UPDATE CASCADE,
member_id TEXT NOT NULL REFERENCES mesh.member(id) ON DELETE CASCADE ON UPDATE CASCADE,
groups JSONB DEFAULT '[]',
profile JSONB DEFAULT '{}',
visible BOOLEAN NOT NULL DEFAULT true,
last_summary TEXT,
last_display_name TEXT,
cumulative_stats JSONB DEFAULT '{"messagesIn":0,"messagesOut":0,"toolCalls":0,"errors":0}',
last_seen_at TIMESTAMP,
created_at TIMESTAMP NOT NULL DEFAULT now(),
updated_at TIMESTAMP NOT NULL DEFAULT now(),
CONSTRAINT peer_state_mesh_member_idx UNIQUE (mesh_id, member_id)
)
`).catch((e) =>
log.warn("peer_state table creation failed", {
error: e instanceof Error ? e.message : String(e),
}),
);
// Recover persisted scheduled messages (cron + one-shot) from DB
recoverScheduledMessages().catch((e) =>
log.warn("scheduled message recovery failed on startup", {

View File

@@ -214,6 +214,16 @@ export interface WSHelloAckMessage {
type: "hello_ack";
presenceId: string;
memberDisplayName: string;
/** True when the broker restored persisted state from a previous session. */
restored?: boolean;
/** Last summary set before disconnect (only when restored). */
lastSummary?: string;
/** ISO timestamp of last disconnect (only when restored). */
lastSeenAt?: string;
/** Restored groups from previous session (only when restored and hello had no groups). */
restoredGroups?: Array<{ name: string; role?: string }>;
/** Restored cumulative stats (only when restored). */
restoredStats?: { messagesIn: number; messagesOut: number; toolCalls: number; errors: number };
}
/** Broker → client: list of connected peers in the same mesh. */

View File

@@ -24,6 +24,22 @@ import type {
} from "./types";
import type { BrokerClient, InboundPush } from "../ws/client";
/** Compute a human-readable relative time string from an ISO timestamp. */
function relativeTime(isoStr: string): string {
const then = new Date(isoStr).getTime();
if (isNaN(then)) return "unknown";
const diffMs = Date.now() - then;
if (diffMs < 0) return "just now";
const seconds = Math.floor(diffMs / 1000);
if (seconds < 60) return `${seconds}s ago`;
const minutes = Math.floor(seconds / 60);
if (minutes < 60) return `${minutes}m ago`;
const hours = Math.floor(minutes / 60);
if (hours < 24) return `${hours}h ago`;
const days = Math.floor(hours / 24);
return `${days} day${days !== 1 ? "s" : ""} ago`;
}
function text(msg: string, isError = false) {
return {
content: [{ type: "text" as const, text: msg }],
@@ -1352,6 +1368,14 @@ Your message mode is "${messageMode}".
content = `[heartbeat] tick ${tick} | sim time: ${simTime} | speed: x${speed}`;
} else if (eventName === "peer_joined") {
content = `[system] Peer "${data.name ?? "unknown"}" joined the mesh`;
} else if (eventName === "peer_returned") {
const peerName = String(data.name ?? "unknown");
const lastSeenAt = data.lastSeenAt ? relativeTime(String(data.lastSeenAt)) : "unknown";
const groups = Array.isArray(data.groups)
? (data.groups as Array<{ name: string; role?: string }>).map((g) => g.role ? `@${g.name}:${g.role}` : `@${g.name}`).join(", ")
: "";
const summary = data.summary ? ` Summary: "${data.summary}"` : "";
content = `[system] Welcome back, "${peerName}"! Last seen ${lastSeenAt}.${groups ? ` Restored: ${groups}` : ""}${summary}`;
} else if (eventName === "peer_left") {
content = `[system] Peer "${data.name ?? "unknown"}" left the mesh`;
} else if (eventName === "mcp_registered") {

View File

@@ -233,6 +233,22 @@ export class BrokerClient {
this.reconnectAttempt = 0;
this.flushOutbound();
this.startStatsReporting();
// Restore cumulative stats from a previous session if available.
if (msg.restored) {
const groups = msg.restoredGroups
? (msg.restoredGroups as Array<{ name: string; role?: string }>).map((g) => g.role ? `@${g.name}:${g.role}` : `@${g.name}`).join(", ")
: "none";
process.stderr.write(
`[claudemesh] session restored — last seen ${msg.lastSeenAt ?? "unknown"}, groups: ${groups}\n`,
);
if (msg.restoredStats) {
const rs = msg.restoredStats as { messagesIn: number; messagesOut: number; toolCalls: number; errors: number };
this._statsCounters.messagesIn = rs.messagesIn ?? 0;
this._statsCounters.messagesOut = rs.messagesOut ?? 0;
this._statsCounters.toolCalls = rs.toolCalls ?? 0;
this._statsCounters.errors = rs.errors ?? 0;
}
}
resolve();
return;
}

164
docs/changelog-20260407.md Normal file
View File

@@ -0,0 +1,164 @@
# claudemesh — Implementation Changelog
**Sprint:** 2026-04-07 evening session
**Author:** Alejandro Gutiérrez + Claude (Opus 4.6)
**CLI versions:** 0.6.8 → 0.6.9 → 0.7.0
**Broker:** deployed to `ic.claudemesh.com` (Coolify, OVHcloud VPS)
---
## Features shipped
### 1. Session path (cwd) sharing
`810f372` · CLI 0.6.9 + broker
Added `cwd` to the WS hello handshake. Broker stores it in the peer record, `list_peers` returns it. Peers on the same machine see each other's working directories for direct file referencing.
### 2. Peer metadata (type, channel, model)
`810f372` · Same commit as cwd
Extended hello with `peerType: "ai" | "human" | "connector"`, `channel` (e.g. "claude-code", "telegram"), `model` (e.g. "opus-4"). Foundation for connectors, humans, and smart routing.
### 3. System notifications (peer join/leave)
`453705a` · broker + CLI
Broker broadcasts `{ subtype: "system", event: "peer_joined" | "peer_left" }` pushes to all mesh peers on connect/disconnect. MCP server formats them as `[system] Peer "Alice" joined the mesh`. System events bypass inbox/off message modes.
### 4. Cron-based persistent reminders
`e873807` · broker + CLI + `72be651` (--cron flag)
Replaced in-memory `setTimeout` with DB-persisted scheduler. Zero-dependency 5-field cron parser. Schedules survive broker restarts via `recoverScheduledMessages()` on boot. CLI: `claudemesh remind "check deploys" --cron "0 */2 * * *"`. MCP: `schedule_reminder` with `cron` field.
### 5. Simulation clock with time multiplier
`05d9b56` · broker + CLI
Per-mesh clock state (`MeshClock` interface + `meshClocks` Map). Configurable speed x1x100. Broadcasts heartbeat ticks as system pushes: `{ event: "tick", eventData: { tick, simTime, speed } }`. Auto-pauses when last peer disconnects. MCP tools: `mesh_set_clock`, `mesh_pause_clock`, `mesh_resume_clock`, `mesh_clock`.
### 6. Inbound webhooks
`b55cf26` · broker (new `webhooks.ts`) + CLI
`POST /hook/:meshId/:secret` → broker injects as push to all mesh peers. Webhooks stored in `meshWebhook` Drizzle table. MCP tools: `create_webhook` (returns URL+secret), `list_webhooks`, `delete_webhook`. Push format: `{ subtype: "webhook", event: "webhook_name", eventData: {...body} }`.
### 7. Slack connector
`5563f90` · `packages/connector-slack/`
Bridge process using `@slack/socket-mode` + `@slack/web-api`. Joins mesh as `peerType: "connector"`, `channel: "slack"`. Bidirectional relay with echo prevention, user ID-to-name resolution with caching, auto-reconnect with exponential backoff.
### 8. Telegram connector
`fe92853` · `packages/connector-telegram/`
Zero-dependency Telegram Bot API client using native `fetch` + long polling. Same bridge pattern as Slack. HTML formatting for Telegram output. Auto-reconnect with exponential backoff (1s30s).
### 9. Non-Claude-Code SDK
`7e102a2` · `packages/sdk/`
Standalone TypeScript SDK (`@claudemesh/sdk`). `MeshClient extends EventEmitter` with `connect()`, `send()`, `broadcast()`, `listPeers()`, `getState()`, `setState()`. Uses `libsodium-wrappers` for ed25519-to-curve25519 crypto_box encryption (same as CLI). Auto-reconnect with exponential backoff.
### 10. Mesh skills catalog
`c8cb1e3` · broker (Drizzle schema + handlers) + CLI
Peers publish reusable skills (name, description, instructions, tags). Full CRUD: `share_skill` (upsert by name), `get_skill`, `list_skills` (ILIKE search), `remove_skill`. Stored in `meshSkill` table with unique (meshId, name). `get_skill` returns instructions prominently formatted for immediate AI use.
### 11. Shared project files
`504111c` · broker relay + CLI file serving
Peer-to-peer file relay: `read_peer_file(peer, path)` and `list_peer_files(peer, path?, pattern?)`. Broker relays without reading content. Security: 1MB max, path traversal rejection, hidden files excluded, 2-level dir listing cap (500 entries). Plus hostname-based local/remote detection (`2c9c8c7`) and filesystem shortcut hint for local peers (`a92cf6b`).
### 12. Peer stats reporting
`b3b9972` · broker + CLI
Peers auto-report stats every 60s: messagesIn/Out, toolCalls, uptime, errors. `set_stats` WS message + `mesh_stats` MCP tool. Stats visible in `list_peers` response. Tool call counter incremented on every MCP invocation.
### 13. Signed audit log (hash chain)
`86a2583` · broker (new `audit.ts` + Drizzle schema)
SHA-256 hash-chained append-only log. Each entry hashes: `prevHash|meshId|eventType|actorMemberId|payload|createdAt`. Events logged: peer_joined, peer_left, state_set, message_sent (NO ciphertext). WS endpoints: `audit_query` (paginated), `audit_verify` (chain integrity check). On startup: `ensureAuditLogTable()` + `loadLastHashes()`.
### 14. Mesh templates
`69e93d4` · CLI (`apps/cli/src/templates/`)
5 JSON templates: dev-team, research, ops-incident, simulation, personal. Each defines groups, roles, state keys, and a system prompt hint. `claudemesh create --template dev-team` loads and displays template. `claudemesh create --list-templates` shows all.
### 15. Default personal mesh guidance
`b0dc538` · CLI (`install.ts`)
`claudemesh install` detects empty meshes and shows join guidance. Local-only mesh deferred (requires broker enrollment for real connectivity).
### 16. Mesh MCP proxy
`08e289a` · broker + CLI
Dynamic tool sharing: `mesh_mcp_register``mesh_mcp_list``mesh_tool_call` → broker forwards to hosting peer → execute → result back. In-memory registry with 30s call timeout. Auto-cleanup on disconnect. MCP register/unregister broadcasts system notifications (`e09671c`).
### 17. Dashboard: peer graph + state timeline + resource panel
`59332dc` (peer graph) + `7d432b3` (timeline + resources)
**Peer graph:** Radial SVG layout, animated bezier edges with priority colors, group rings, status indicators (green/amber/red), node sizing by activity. No external deps (pure SVG + CSS animations). `ResizeObserver` for responsive sizing.
**State timeline:** Vertical timeline of audit events with timestamps, icons, type badges. Newest-first with auto-scroll. Shares same TanStack Query cache (zero extra API calls).
**Resource panel:** 2x2 card grid — live peers, envelope breakdown, audit event frequency, session online/offline split.
### 18. Peer visibility + public profiles
Broker types.ts + index.ts + CLI
`set_visible(false)` makes peer invisible in `list_peers` and skips broadcast/group routing. Direct messages by pubkey still reach hidden peers. System events: `peer_visible`, `peer_hidden`. Public profiles: `set_profile({ avatar, title, bio, capabilities })` — visible to other peers in `list_peers` and peer graph.
### 19. Hostname + local/remote detection
`2c9c8c7` · broker + CLI
`os.hostname()` added to hello handshake. `list_peers` shows `[local]` or `[remote]` tag per peer. MCP instructions include file access decision guide: local → filesystem, remote <1MB → `read_peer_file`, large/persistent → `share_file`.
### 20. File access decision guide in MCP instructions
`3641618` · CLI MCP server
Clear decision guide in system instructions: three methods (filesystem for local, relay for remote, MinIO for persistent), with size limits and when to use each.
### 21. MCP server register/unregister broadcasts
`e09671c` · broker + CLI
When a peer registers or removes an MCP server, all mesh peers receive a system notification: `[system] New MCP server available: "github" (hosted by Alice). Tools: list_repos, create_issue. Use mesh_tool_call to invoke.`
---
## Also shipped (infrastructure / docs)
| Commit | What |
|--------|------|
| `0bb9d71` | Merged `schedule_reminder` + `send_later` into single tool with optional `to` param; added `subtype: "reminder"` to push |
| `79525af` | Fixed TSC error from cron example in JSDoc comment |
| `69e93d4` | Mesh templates: 5 JSON templates + `claudemesh create` command |
| `f34b8fb` | CLI `--help` text review: 44 descriptions improved for clarity, concision, consistency |
| `58ba01f` | `CLAUDEMESH_TOOLS` in install.ts synced (41→45 tools, sorted alphabetically) |
| `db2bf3e` | `protocol.md` expanded from 6 to 73 message types |
| `72be651` | `--cron` flag wired into citty remind command |
---
## CLI versions published
| Version | Key changes |
|---------|------------|
| 0.6.8 | schedule_reminder merge, reminder subtype |
| 0.6.9 | cwd + peer metadata + system notifications + cron + templates + --help review |
| 0.7.0 | Skills catalog, MCP proxy, shared files, visibility, sim clock, webhooks, peer stats, connectors, SDK |
---
## Pending (building)
- **Peer session persistence** — agent running, DB-backed state restore on reconnect
- **Persistent MCP registrations** — agent running, survive peer disconnect with online/offline status
---
## Remaining from vision (not yet built)
| # | Feature | Notes |
|---|---------|-------|
| 6 | REST API + external WS | Webhooks done, REST and WS auth remain |
| 8 | Humans in the mesh | Web chat panel needed |
| 14 | Bridge / federation | Bridge peer feasible now, federation needs design |
| 18 | Sandboxes (E2B) | Third-party integration preferred |
| 20 | Spatial topology (x,y proximity) | Visibility done, proximity model remains |
| 21 | Semantic peer search | Multi-field matching, half day |
| 22 | Mesh telemetry + debugging | Structured logging + reporting |

View File

@@ -1,407 +1,89 @@
# claudemesh — Vision & Feature Brainstorm
# claudemesh — Vision & Roadmap
**Date:** 2026-04-07 23:01 CEST
**Date:** 2026-04-07
**Author:** Alejandro Gutiérrez + Claude (Opus 4.6)
**Status:** Internal brainstorm — not committed to public roadmap
**Last updated:** 2026-04-08 00:09 CEST
**Last updated:** 2026-04-08 00:19 CEST
---
## Tier 1 — High impact, buildable now
## Shipped (2026-04-07)
### 1. Session path (pwd) sharing — DONE
21 features implemented in one session. Full details in [`changelog-20260407.md`](./changelog-20260407.md).
Add `cwd` to the WS hello handshake. Broker stores it in the peer record, `list_peers` returns it. Peers on the same machine see each other's working directories — lets AI reference files across sessions without guessing paths.
**Effort:** 30 min. One field in hello + peer list.
> **Implemented:** 2026-04-07 23:30 · `810f372` · CLI 0.6.9 + broker deployed
### 2. Peer metadata: human vs AI, channel type, model — DONE
Extend the hello handshake with `peerType: "ai" | "human" | "connector"`, `channel?: "claude-code" | "telegram" | "slack" | "web"`, `model?: "opus-4" | "sonnet-4" | "gpt-5" | ...`. Broker stores and broadcasts it. `list_peers` shows it.
**Why:** Foundation for connectors, human peers, and smart routing (send complex analysis to the Opus peer, quick tasks to Sonnet).
**Effort:** 1 hour.
> **Implemented:** 2026-04-07 23:30 · `810f372` · Shipped with item 1 (same commit)
### 3. System notifications (join/leave/resource events) — DONE
Broker pushes system-level messages when peers connect/disconnect, files get shared, state changes, tasks get created. Same `subtype` pattern as reminders: `{ type: "push", subtype: "system", event: "peer_joined", ... }`.
**Why:** Mesh feels alive. AI can react to topology changes without polling.
**Effort:** 2 hours.
> **Implemented:** 2026-04-07 23:20 · `453705a` · peer_joined + peer_left broadcasts, system subtype in push
### 4. Cron-based reminders — DONE
Replace `setTimeout` with a persistent cron scheduler (broker-side). AI sends `schedule_reminder --cron "0 */2 * * *" --message "check deploy status"`. Broker uses `node-cron` or Drizzle-backed scheduler. Survives broker restarts.
**Why:** Current reminders die if the broker restarts. Cron syntax is already familiar to AI.
**Effort:** 2 hours (+ DB migration for persistence).
> **Implemented:** 2026-04-07 23:35 · `e873807` · DB-persisted schedules, zero-dep cron parser, restart recovery, `--cron` CLI flag
### 5. Heartbeats / session supervisor + simulation clock — DONE
**Keepalive layer:** WebSocket ping/pong for connection health. A CLI-side supervisor monitors the WS connection and relaunches Claude Code if it drops. Broker marks peers as disconnected on WS close.
**Simulation clock layer:** Heartbeats become a broker-driven clock that peers can subscribe to. The broker broadcasts periodic `{ subtype: "heartbeat", tick: 42, simTime: "2026-04-08T14:30:00Z", speed: "x10" }` messages at a configurable rate.
**Time multiplier for load testing:**
- `mesh_set_clock(speed: "x1")` — real-time, normal operation
- `mesh_set_clock(speed: "x10")` — 1 hour of simulated activity in 6 minutes
- `mesh_set_clock(speed: "x100")` — 1 day of simulated activity in ~15 minutes
**Use case — infrastructure stress testing:** Spawn 10 AI peers, each simulating a real user persona (sales rep, admin, customer). Set the clock to x10. Each peer receives heartbeat ticks and acts according to the simulated time: "it's 9am, log in and check dashboard", "it's 11am, process 5 orders", "it's 3pm, run reports". The infrastructure sees realistic usage patterns at 10x speed.
**What peers see:**
```
> mesh_clock()
Simulation clock: x10 | sim time: 2026-04-08 14:30 | tick: 42/480
> [heartbeat tick 43 — sim time: 14:36]
AI peer "Sales-Rep-1": creates 3 orders, searches inventory
AI peer "Admin-1": approves pending orders, checks stock levels
AI peer "Customer-1": browses catalog, adds to cart, checks out
```
**Components:**
- Broker: clock state + periodic broadcast to all peers
- MCP tools: `mesh_set_clock(speed)`, `mesh_clock()`, `mesh_pause_clock()`, `mesh_resume_clock()`
- Peer behavior: AI reads tick + simTime from heartbeat, decides actions based on its persona and the simulated time of day
- Reporting: broker collects action counts per tick, produces load profile after the run
**Why this is powerful:** Unlike synthetic load testers (k6, Locust), AI peers exercise the *full stack* — UI flows, API sequences, edge cases, realistic data entry. They find bugs that scripted tests miss because they improvise like real users.
**Effort:** 1 day (heartbeat + clock), 1 day (simulation framework + personas).
> **Implemented:** 2026-04-07 · `05d9b56` · Per-mesh clock state, configurable speed x1-x100, auto-pause on empty mesh, heartbeat ticks via system push
| # | Feature | Commit |
|---|---------|--------|
| 1 | Session path (cwd) sharing | `810f372` |
| 2 | Peer metadata (type/channel/model) | `810f372` |
| 3 | System notifications (join/leave) | `453705a` |
| 4 | Cron-based persistent reminders | `e873807` |
| 5 | Simulation clock (x1x100) | `05d9b56` |
| 6 | Inbound webhooks | `b55cf26` |
| 7 | Slack connector | `5563f90` |
| 8 | Telegram connector | `fe92853` |
| 9 | SDK (@claudemesh/sdk) | `7e102a2` |
| 10 | Mesh skills catalog | `c8cb1e3` |
| 11 | Shared project files (+ local/remote detection) | `504111c` + `2c9c8c7` |
| 12 | Peer stats reporting | `b3b9972` |
| 13 | Signed audit log (SHA-256 hash chain) | `86a2583` |
| 14 | Mesh templates (5 presets) | `69e93d4` |
| 15 | Default mesh guidance on install | `b0dc538` |
| 16 | Mesh MCP proxy (dynamic tools) | `08e289a` |
| 17 | Dashboard: peer graph + timeline + resources | `59332dc` + `7d432b3` |
| 18 | Peer visibility + public profiles | (types.ts/index.ts) |
| 19 | Hostname + local/remote locality | `2c9c8c7` |
| 20 | MCP register/unregister broadcasts | `e09671c` |
| 21 | File access decision guide | `3641618` |
---
## Tier 2 — Strong ideas, needs design
## Building now
### 6. Mesh webhooks / REST API / external WebSocket — PARTIAL (webhooks done)
### Peer session persistence ("welcome back")
Persist peer state (groups, profile, visibility, stats, summary) to DB on disconnect. Restore on reconnect with enriched `hello_ack`. System notification: "Welcome back, Alice! Last seen 2h ago."
Three surfaces for external integration:
- **Inbound webhooks:** `POST https://ic.claudemesh.com/hook/<mesh-id>/<secret>` → broker injects as a push to all peers or a specific group. GitHub, CI/CD, monitoring alerts become mesh messages.
- **REST API:** Authenticated endpoints to send messages, read state, list peers from outside. Makes the mesh programmable from any language.
- **External WS:** Non-Claude clients connect via WS with an API key (not a session keypair). Same protocol, different auth.
**Prerequisite:** API keys per mesh (not ephemeral session keypairs).
**Effort:** Half day (webhooks alone), 2-3 days (full API surface).
> **Partial:** 2026-04-07 · `b55cf26` · Inbound webhooks implemented (POST /hook/:meshId/:secret → push to mesh). REST API and external WS remain.
### 7. Connectors: Slack, Telegram as peers — DONE
**Approach 1 — Connector-as-peer (recommended start):** A bridge process joins the mesh as a peer named "Slack-#general" and relays messages bidirectionally. Peers see it in `list_peers` with `peerType: "connector"`. One connector per channel.
**Approach 2 — Connector-as-router:** Broker-level integration — messages to `#slack:general` route through a registered connector. More elegant, but complex.
Ship as `claudemesh-connector-slack`, `claudemesh-connector-telegram`.
**Effort:** 1-2 days each.
> **Implemented:** 2026-04-07 · Slack: `5563f90` (Socket Mode, echo prevention, auto-reconnect) · Telegram: `fe92853` (zero-dep Bot API, long polling)
### 8. Humans in the mesh
Humans connect via the web dashboard or mobile app using the same WS protocol. `peerType: "human"` metadata tells AI to adjust communication style. The push system works natively in browsers (WS is bidirectional).
**Challenge:** UX. Humans need a chat interface with typing indicators, read receipts, message history — not raw JSON. The dashboard already exists at claudemesh.com; extend it with a chat panel.
**Effort:** 2-3 days (web chat panel).
### 9. Connecting non-Claude-Code AI — DONE
Any process that speaks the WS protocol can join. The barrier isn't the protocol — it's the MCP tool surface that makes Claude Code sessions first-class. For other LLMs:
- **SDK approach:** `npm install claudemesh-sdk` — a JS/Python library that handles WS connection, crypto, and message parsing. Wrap any LLM's function-calling interface around it.
- **Push delivery:** The push system works over WS. Non-Claude clients receive pushes the same way. The challenge is injecting them into the LLM's context — each platform has a different mechanism (OpenAI function results, Gemini tool responses, etc.).
- **Adapter pattern:** `claudemesh-adapter-openai`, `claudemesh-adapter-cursor`, etc.
**Effort:** 1 day (SDK), 1 day per adapter.
> **Implemented:** 2026-04-07 · `7e102a2` · `@claudemesh/sdk` — standalone TypeScript SDK with libsodium crypto_box, EventEmitter API, auto-reconnect
### 10. Mesh skills catalog — DONE
Peers publish skills: `share_skill({ name: "pdf-generation", description: "...", instructions: "..." })`. Other peers `list_skills()` and `get_skill("pdf-generation")` to load instructions into their context. Broker stores skills like memory/state.
**Why:** A mesh becomes a capability marketplace. One session installs a skill, all peers benefit. Skills can include tool definitions, system prompts, reference docs, and example workflows.
**This is the killer feature.** It turns claudemesh from a messaging layer into a knowledge-sharing platform.
**Effort:** 1 day.
> **Implemented:** 2026-04-07 · `c8cb1e3` · Full CRUD (share/get/list/remove), upsert by name, ILIKE search, Drizzle schema
### 11. Shared project files across peers — DONE
When a peer connects, it registers accessible paths (opt-in per directory). Other peers request files: `get_peer_file(peer: "Alice", path: "src/auth.ts")`. The owning peer reads the file and returns it over the mesh.
**Security scoping options:**
- Opt-in per directory: `claudemesh launch --share-dir ./src`
- Same-machine only (detect via hostname/IP)
- Approval per request
**Effort:** 1 day.
> **Implemented:** 2026-04-07 · `504111c` · Broker relay (never reads content), CLI file serving with 1MB cap, path traversal rejection, hidden files excluded, 2-level dir listing. Plus hostname-based local/remote detection (`2c9c8c7`) and filesystem shortcut hint (`a92cf6b`).
### 12. Peer stats (context consumption, token usage) — DONE
Peers self-report: `set_status` extended with `contextUsed: 85000, contextMax: 200000, tokensIn: 12000, tokensOut: 8000`. Dashboard shows burn rate. Useful for load balancing — route work to the peer with the most context headroom.
**Limitation:** Claude Code doesn't expose context usage via API. Would need estimation from conversation length or `/cost` command parsing.
**Effort:** Half day (reporting infrastructure), unknown (accurate context measurement).
> **Implemented:** 2026-04-07 · `b3b9972` · Auto-reporting every 60s (messagesIn/Out, toolCalls, uptime, errors), mesh_stats MCP tool, stats in list_peers
### Persistent MCP registrations
MCP servers marked `persistent: true` survive peer disconnect. Marked "offline" instead of deleted. Auto-restored on reconnect. Calls to offline servers return descriptive error.
---
## Tier 3 — Big bets, needs careful thought
## Remaining — not yet built
### 13. Mesh blockchain / signed audit log — DONE (audit log)
**Honest assessment:** A full blockchain is overkill for a cooperative mesh. What's actually valuable is the useful parts:
- **Signed append-only log:** Immutable record of all decisions, state changes, and messages. Merkle tree integrity. Useful for compliance, debugging, and "who decided what."
- **Conflict resolution:** Vector clocks or CRDTs for state, instead of last-write-wins.
- **Reputation:** Track which peers deliver on tasks, respond promptly, produce quality work.
**Reframe as:** Signed audit trail with integrity proofs. Not a blockchain, but the valuable properties of one.
**Effort:** 3-5 days.
> **Implemented:** 2026-04-07 · `86a2583` · SHA-256 hash chain audit log, append-only, no message content logged, chain verification endpoint, paginated query
### 14. Mesh of meshes / bridge
A meta-broker that routes between meshes. Use case: `dev-team` mesh and `ops-team` mesh coordinate on deploys.
**Simple version:** A bridge peer joins both meshes and relays tagged messages. No broker changes needed. Already feasible with today's protocol.
**Federation version:** Broker-to-broker peering protocol. Brokers exchange presence and route ciphertext across organizations.
**Effort:** 1 day (bridge peer), 1-2 weeks (federation protocol).
### 15. Mesh templates on creation — DONE
Predefined mesh configurations: roles, groups, state keys, system prompts, skills, and governance rules. Examples:
- `dev-team`: @frontend, @backend, @devops groups; lead/member roles; state keys for sprint/deploy-frozen
- `research`: @analysis, @writing groups; shared memory focus; context-sharing optimized
- `ops-incident`: @oncall, @comms groups; high-urgency defaults; auto-escalation rules
Templates are JSON files. `claudemesh create --template dev-team` applies them at mesh creation. Templates are editable post-creation by mesh admin (or anyone, depending on governance).
**Effort:** Half day.
> **Implemented:** 2026-04-07 · `69e93d4` · 5 templates (dev-team, research, ops-incident, simulation, personal) + `claudemesh create` command
### 16. Default private mesh per user — DONE
On `claudemesh install`, auto-create a personal mesh with the user as sole member. All their Claude Code sessions join by default. Zero-config — instant value without understanding meshes.
**Effort:** Half day.
> **Implemented:** 2026-04-07 · `b0dc538` · Install detects empty meshes, shows join guidance. Local-only mesh deferred (requires broker enrollment).
### 17. Mesh MCP proxy (dynamic tools without session restart) — DONE
**Problem:** Claude Code loads MCP servers at startup. You can't inject new tool definitions into a running session.
**Solution:** Route through the existing claudemesh MCP connection. A generic `mesh_tool_call` tool proxies to MCP servers registered in the mesh at runtime — no restart needed.
**Flow:**
1. A peer registers an MCP server: `mesh_mcp_register(name: "github", transport: "stdio", command: "npx @github/mcp")`
2. Broker stores the registration
3. Any peer calls `mesh_tool_call(server: "github", tool: "list_repos", args: {...})`
4. Broker routes to the hosting peer or a shared sidecar process
5. That host invokes the actual MCP server, returns the result through the mesh
6. Calling peer gets the response — all through the existing claudemesh WS connection
**Two hosting models:**
- **Peer-hosted:** The registering peer runs the MCP server locally. Other peers proxy through them. If that peer disconnects, the MCP goes offline.
- **Broker-hosted:** The broker spawns the MCP server as a sidecar. Always available. Better for shared tools (database, GitHub, Jira).
**What AI sees:**
```
> mesh_mcp_list()
Available mesh MCP servers:
- github (hosted by: Alice) — tools: list_repos, create_issue, ...
- jira (hosted by: broker) — tools: search_issues, create_ticket, ...
- postgres-prod (hosted by: broker) — tools: query, execute
> mesh_tool_call(server: "github", tool: "create_issue", args: {repo: "...", title: "..."})
Issue #42 created.
```
**Limitation:** Claude Code won't see these as first-class tools in its tool list — AI needs to know to use `mesh_tool_call`. MCP server instructions document the proxy pattern.
**New MCP tools needed:** `mesh_mcp_register`, `mesh_mcp_list`, `mesh_tool_call`, `mesh_mcp_remove`
### Humans in the mesh
Web chat panel on claudemesh.com/dashboard. Humans connect via WS with `peerType: "human"`. Need: typing indicators, read receipts, message history UI.
**Effort:** 2-3 days.
> **Implemented:** 2026-04-07 · `08e289a` · Full round-trip: register → list → call → forward → execute → result. In-memory registry, 30s call timeout, auto-cleanup on disconnect.
### REST API + external WebSocket
Authenticated endpoints to send messages, read state, list peers from outside the mesh. API keys per mesh (not session keypairs). External WS: non-Claude clients connect with API key auth.
### 18. Sandbox for code execution
**Effort:** 2-3 days. (Webhooks already done.)
Each mesh gets optional compute sandboxes (Docker containers, Firecracker VMs, or E2B-style). Peers request: `execute_code(lang: "python", code: "...")`. Broker provisions a sandbox, runs the code, returns stdout/stderr. Resources scale on demand as peers need sandboxes.
### Bridge / federation
**Simple:** A bridge peer joins two meshes and relays tagged messages. Feasible now with the SDK.
**Federation:** Broker-to-broker peering protocol. Needs design.
**Build vs integrate:**
- **Build:** Docker-in-Docker on the broker host. Simple but security-sensitive.
- **Integrate:** E2B, Modal, or Fly Machines as the sandbox backend. claudemesh MCP tool is a thin client. Scales naturally.
**Effort:** 1 day (bridge), 1-2 weeks (federation).
**Effort:** 2-3 days (E2B integration), 1-2 weeks (self-hosted sandboxes).
### Sandboxes for code execution
Per-mesh compute sandboxes. Peers request: `execute_code(lang: "python", code: "...")`. Prefer third-party integration (E2B, Modal, Fly Machines) over self-hosted.
### 19. Mesh dashboard (real-time situational awareness) — DONE
**Effort:** 2-3 days (E2B), 1-2 weeks (self-hosted).
Live web UI at claudemesh.com/dashboard showing:
- **Peer graph:** Who's connected, status, groups, roles — nodes and edges
- **Message flow:** Animated edges showing real-time traffic between peers
- **State/memory timeline:** When values changed and who changed them
- **Resource panel:** Files shared, tasks active, skills available
- **Peer detail:** Click a peer → see summary, context usage, message history
### Spatial topology (proximity-based visibility)
Extend visibility with `(x, y)` coordinates and visibility radius. Peers only see others within range. Combined with sim clock, enables spatial simulations (customer walks into store zone, sees sales reps).
Broker already tracks everything needed. Dashboard subscribes via WS and renders with D3/React.
**Effort:** 1 day.
**Effort:** 2-3 days (functional), 1 week (polished).
> **Implemented:** 2026-04-07 · `59332dc` peer graph (radial SVG, animated edges, group rings) + `7d432b3` state timeline + resource panel. Peer detail view remains.
### 20. Peer visibility and spatial topology — DONE (visibility + profiles)
Control which peers can see each other. Instead of a flat mesh where everyone sees everyone, the broker filters `list_peers` responses and message routing based on visibility rules.
**Three visibility models:**
- **Proximity-based (simulation):** Each peer has coordinates `(x, y)` and a visibility radius. Only peers within range appear in `list_peers`. `set_position(x, y)` changes who you can see — spatial fog of war. Combined with the simulation clock, this creates emergent behavior: a "customer" peer walks into a "store zone", suddenly sees "sales rep" peers, initiates interaction.
- **Scope-based (organizational):** Visibility follows group membership. Peers in `@frontend` see each other and `@leads`, but not `@backend` internals. Org-chart visibility without exposing every department.
- **Manual/dynamic:** Peers or admins explicitly show/hide. `set_visible(false)` to go stealth (connected but invisible). Admin can force visibility/invisibility.
**Who controls visibility:**
- **Broker rules** — mesh-wide policy set at creation or via template (e.g., "proximity" mode for simulations, "scope" for orgs)
- **Peer self-control** — `set_visible(false)` to go stealth, `set_position(x, y)` to move in proximity mode
- **Admin override** — mesh admin force-shows or force-hides peers
- **Dynamic conditions** — broker changes visibility based on state keys, clock ticks, or events
**Notifications:** Peers receive `{ subtype: "system", event: "peer_visible" }` when a new peer enters their visibility and `peer_hidden` when one leaves. Different from join/leave — the peer is still connected, just not visible to you.
**Peer public profile (outside image):** Each peer has a public-facing profile that other peers see — a curated view separate from internal state. Fields: `avatar` (emoji or URL), `title` (short role label), `bio` (one-liner), `capabilities` (what I can help with). Set via `set_profile({ avatar: "🔧", title: "DevOps Lead", bio: "Infrastructure and deploys" })`. This is what appears on the peer graph node and in `list_peers`. Peers choose how they present themselves to the mesh.
**MCP tools:** `set_visible(visible)`, `set_position(x, y)`, `set_profile(profile)`, `get_visible_peers()`, `set_visibility_mode(mode)` (admin only)
**Effort:** 2-3 days.
> **Partial:** 2026-04-07 · Visibility toggle (set_visible), public profiles (set_profile), hidden peer filtering in list_peers, peer_visible/peer_hidden system events, direct messages still reach hidden peers. Remaining: proximity-based (x,y coordinates), scope-based (group visibility rules).
### 21. Semantic peer search
In large meshes (50+ peers), scanning `list_peers` output is noise. A `search_peers` tool that filters and ranks by multiple dimensions:
- **Structured filters:** name, group, role, status, peerType, channel, model, cwd
- **Free-text search:** matches against peer summaries, profile bios, capabilities, and shared skills
- **Capability matching:** "find a peer that knows about database migrations" searches across profile capabilities + skills catalog + recent summaries
- **Ranking:** peers with more matching dimensions rank higher; active (idle/working) peers rank above DND/offline
**MCP tool:** `search_peers(query, filters?)` — returns a ranked list of matching peers with relevance scores.
**Implementation:** Broker-side — accepts a `search_peers` message, runs multi-field matching against the in-memory peer list + skills table. No external search engine needed for <500 peers; for larger meshes, wire into the existing Qdrant vector store (already available via `vector_search`).
### Semantic peer search
`search_peers(query, filters?)` — multi-field matching across names, groups, roles, summaries, profile capabilities, skills. Ranked results. For meshes with 50+ peers.
**Effort:** Half day.
### 22. Mesh telemetry and debugging
A structured logging system where peers report errors, warnings, and debug info to the broker. Goes beyond the audit log (which tracks events) — this tracks operational health.
**What peers report:**
- Errors: tool failures, connection drops, unhandled exceptions
- Warnings: high context usage, slow responses, retry patterns
- Debug: decision traces, task reasoning, why a particular approach was chosen
- Performance: response latency per tool call, message round-trip times
**Broker storage:** Structured logs indexed by mesh, peer, timestamp, severity. Retained for N days (configurable). Queryable via WS messages.
**AI self-analysis:** Peers query their own logs to identify patterns: "I've hit this error 3 times in the last hour — what's common?" The mesh becomes self-diagnosing. Leads can query team-wide logs: "Which peers are seeing errors in the deploy flow?"
**Reporting:** Aggregated metrics per peer, per mesh, per time window. Error rates, common failure modes, response time percentiles. Surfaced in the dashboard or via `mesh_report(timeframe: "24h")`.
**MCP tools:**
- `mesh_log(level, message, data?)` — report a log entry
- `mesh_logs(query?, peer?, level?, last?)` — query logs
- `mesh_report(timeframe?)` — aggregated health report
### Mesh telemetry and debugging
Structured logging: `mesh_log(level, message, data?)`. Queryable: `mesh_logs(query?, peer?, level?, last?)`. Aggregated reports: `mesh_report(timeframe?)`. AI self-analysis for continuous improvement.
**Effort:** 1-2 days.
### 23. Peer session persistence ("welcome back")
When a peer disconnects, their state is lost (groups, profile, visibility, stats, summary). On reconnect they start blank. Persist peer state so returning peers resume where they left off.
**What persists (keyed by meshId + memberId):**
- Groups and roles
- Profile (avatar, title, bio, capabilities)
- Visibility setting
- Last summary
- Cumulative stats (messages, tool calls across all sessions)
- Last seen timestamp
**What resets:** status (always "idle" on connect), WebSocket/presenceId (ephemeral).
**Reconnect flow:**
1. Peer sends hello with same `memberId`
2. Broker looks up `peer_state` table for (meshId, memberId)
3. If found: restore groups, profile, visibility, stats — hello fields take precedence if explicitly set
4. Enriched `hello_ack` includes `restored: true` and previous summary
5. System notification: `"Welcome back, Alice! Last seen 2h ago. Restored: @frontend:lead, @devops:member"`
6. On disconnect: upsert current state to `peer_state`
**Why:** AI sessions restart often (context limits, crashes, new tasks). Without persistence, every reconnect requires manual group joins and profile setup. With it, the mesh remembers who you are.
**Effort:** Half day.
---
## Suggested build order
| # | Feature | Effort | Unlocks | Status |
|---|---------|--------|---------|--------|
| 1 | Session path sharing | 30 min | File referencing across sessions | **DONE** `810f372` |
| 2 | Peer metadata (type/channel/model) | 1 hour | Connectors, humans, smart routing | **DONE** `810f372` |
| 3 | System notifications | 2 hours | Reactive mesh, awareness | **DONE** `453705a` |
| 4 | Cron reminders | 2 hours | Persistent scheduling | **DONE** `e873807` |
| 5 | Mesh templates | Half day | Better onboarding | **DONE** `69e93d4` |
| 6 | Default personal mesh | Half day | Zero-config start | **DONE** `b0dc538` |
| 7 | Inbound webhooks | Half day | External integrations | **DONE** `b55cf26` |
| 8 | Skills catalog | 1 day | Knowledge marketplace | **DONE** `c8cb1e3` |
| 9 | Shared project files | 1 day | Cross-session file access | **DONE** `504111c` |
| 10 | Slack connector | 1-2 days | Reach beyond Claude Code | **DONE** `5563f90` |
| 11 | Mesh MCP proxy | 2-3 days | Dynamic tools without restart | **DONE** `08e289a` |
| 12 | Dashboard (real-time) | 2-3 days | Visual situational awareness | **DONE** `59332dc` + `7d432b3` |
| 13 | Human peers (web chat) | 2-3 days | Humans in the loop | |
| 14 | Simulation clock (heartbeat x1-x100) | 2 days | AI-driven load testing | **DONE** `05d9b56` |
| 15 | Sandboxes (E2B) | 2-3 days | Shared compute | |
| 16 | Signed audit log | 3-5 days | Trust, compliance | **DONE** `86a2583` |
| 17 | Bridge / federation | 1-2 weeks | Multi-mesh coordination | |
| 18 | Peer visibility + profiles | 2-3 days | Simulation fog-of-war, org scoping | **DONE** (types.ts/index.ts) |
| 19 | Semantic peer search | Half day | Discovery in large meshes | |
| 20 | Peer stats reporting | Half day | Resource awareness, load balancing | **DONE** `b3b9972` |
| 21 | SDK (@claudemesh/sdk) | 1 day | Non-Claude-Code clients | **DONE** `7e102a2` |
| 22 | Telegram connector | 1-2 days | Reach beyond Claude Code | **DONE** `fe92853` |
| 23 | Mesh telemetry + debugging | 1-2 days | Self-diagnosing mesh | |
| 24 | Peer session persistence | Half day | "Welcome back" on reconnect | |
---
*This document captures a brainstorming session. Items are not commitments. Priorities will shift as we build and learn.*
*Priorities shift as we build and learn. Bridge and humans are the highest-value remaining items.*

View File

@@ -0,0 +1,16 @@
-- Peer session persistence: save state on disconnect, restore on reconnect.
CREATE TABLE IF NOT EXISTS mesh.peer_state (
id TEXT PRIMARY KEY NOT NULL,
mesh_id TEXT NOT NULL REFERENCES mesh.mesh(id) ON DELETE CASCADE ON UPDATE CASCADE,
member_id TEXT NOT NULL REFERENCES mesh.member(id) ON DELETE CASCADE ON UPDATE CASCADE,
groups JSONB DEFAULT '[]',
profile JSONB DEFAULT '{}',
visible BOOLEAN NOT NULL DEFAULT true,
last_summary TEXT,
last_display_name TEXT,
cumulative_stats JSONB DEFAULT '{"messagesIn":0,"messagesOut":0,"toolCalls":0,"errors":0}',
last_seen_at TIMESTAMP,
created_at TIMESTAMP NOT NULL DEFAULT now(),
updated_at TIMESTAMP NOT NULL DEFAULT now(),
CONSTRAINT peer_state_mesh_member_idx UNIQUE (mesh_id, member_id)
);

View File

@@ -731,6 +731,53 @@ export const insertMeshStreamSchema = createInsertSchema(meshStream);
export type SelectMeshStream = typeof meshStream.$inferSelect;
export type InsertMeshStream = typeof meshStream.$inferInsert;
/**
* Persisted peer session state. Survives disconnects — when a peer
* reconnects (same meshId + memberId), the broker restores groups,
* profile, visibility, summary, and cumulative stats automatically.
* Keyed by (meshId, memberId) — one row per member per mesh.
*/
export const peerState = meshSchema.table(
"peer_state",
{
id: text().primaryKey().notNull().$defaultFn(generateId),
meshId: text()
.references(() => mesh.id, { onDelete: "cascade", onUpdate: "cascade" })
.notNull(),
memberId: text()
.references(() => meshMember.id, { onDelete: "cascade", onUpdate: "cascade" })
.notNull(),
groups: jsonb().$type<Array<{ name: string; role?: string }>>().default([]),
profile: jsonb().$type<{ avatar?: string; title?: string; bio?: string; capabilities?: string[] }>().default({}),
visible: boolean().notNull().default(true),
lastSummary: text(),
lastDisplayName: text(),
cumulativeStats: jsonb().$type<{ messagesIn: number; messagesOut: number; toolCalls: number; errors: number }>().default({ messagesIn: 0, messagesOut: 0, toolCalls: 0, errors: 0 }),
lastSeenAt: timestamp(),
createdAt: timestamp().defaultNow().notNull(),
updatedAt: timestamp().defaultNow().notNull(),
},
(table) => [
uniqueIndex("peer_state_mesh_member_idx").on(table.meshId, table.memberId),
],
);
export const peerStateRelations = relations(peerState, ({ one }) => ({
mesh: one(mesh, {
fields: [peerState.meshId],
references: [mesh.id],
}),
member: one(meshMember, {
fields: [peerState.memberId],
references: [meshMember.id],
}),
}));
export const selectPeerStateSchema = createSelectSchema(peerState);
export const insertPeerStateSchema = createInsertSchema(peerState);
export type SelectPeerState = typeof peerState.$inferSelect;
export type InsertPeerState = typeof peerState.$inferInsert;
export const meshSkillRelations = relations(meshSkill, ({ one }) => ({
mesh: one(mesh, {
fields: [meshSkill.meshId],