diff --git a/.artifacts/backlog/2026-04-10-cli-wizard-architecture-refactor.md b/.artifacts/backlog/2026-04-10-cli-wizard-architecture-refactor.md new file mode 100644 index 0000000..0ba513c --- /dev/null +++ b/.artifacts/backlog/2026-04-10-cli-wizard-architecture-refactor.md @@ -0,0 +1,243 @@ +# CLI Wizard Architecture Refactor + +**Status:** backlog +**Created:** 2026-04-10 +**Source:** Reverse-engineered from `@posthog/wizard` (npm cache), applied to `apps/cli/src/commands/launch.ts` + +## Why + +Launch wizard has three compounding problems: + +1. **Imperative branching** — `launch.ts` checks account → mesh → name → role → exec in hardcoded order. Adding a screen requires touching existing code. Hard to reason about `--resume`, `--non-interactive`, and skip conditions. +2. **Terminal bleed-through on handoff** — wizard→`claude` exec corrupts Ink's TUI state (garbled word wraps, tool labels overwritten, spinner fragments fused to paths). Root cause is spread across multiple exit paths instead of one choke point. +3. **Inconsistent visual design** — ad-hoc colors per file, no central palette, no shared icon set, no shared layout primitives. Every screen reinvents status rows, centering, and spacing. + +PostHog's wizard solves all three with one architectural pattern: **declarative flow pipelines + session-as-store + shared visual primitives**. This artifact captures the plan to port that pattern. + +## What PostHog does (the reference) + +### Flow pipeline (`flows.ts` + `router.ts`) + +Each wizard flow is an array of screen entries: + +```ts +export const FLOWS = { + [Flow.Wizard]: [ + { screen: Screen.Intro, isComplete: s => s.setupConfirmed }, + { screen: Screen.HealthCheck, isComplete: s => s.readinessResult !== null }, + { screen: Screen.Setup, show: needsSetup, isComplete: s => !needsSetup(s) }, + { screen: Screen.Auth, isComplete: s => s.credentials !== null }, + { screen: Screen.Run, isComplete: s => s.runPhase === RunPhase.Completed }, + { screen: Screen.Outro, isComplete: s => s.outroDismissed }, + ], +}; +``` + +The router walks the array, skips entries where `show(s) === false` or `isComplete(s) === true`, and returns the first remaining entry. Zero switch statements. Zero hardcoded transitions. Adding a screen = appending an object. + +### Overlay stack + +Separate from the linear flow cursor. Interrupts (port conflict, auth expired, managed settings) are pushed onto `overlays[]` from anywhere and popped when dismissed. Active screen = top of overlay stack OR flow cursor. Flows never need to know about interrupts. + +### Session as single source of truth + +One `WizardStore` holds all session state. Screens subscribe via React 18 `useSyncExternalStore`. Completion predicates read session; imperative code writes session; the router re-resolves on every change. + +### Visual primitives + +- `styles.ts` — 6-color palette (`Colors`), 9-icon set (`Icons`), alignment enums (`HAlign`, `VAlign`) +- `CardLayout` — semantic centering wrapper used by every screen +- `PickerMenu` — the only selection primitive, used for every choice +- `screen-registry.ts` — maps `Screen` enum → React component +- Brand mark: three colored `█` blocks next to the wizard name on every screen header + +## What claudemesh should do + +### Target file layout + +``` +apps/cli/src/ +├── commands/ +│ └── launch.ts # thin entrypoint: parse flags → start TUI +└── ui/ + ├── styles.ts # palette, icons, alignment enums + ├── store.ts # LaunchStore (session + subscribe) + ├── router.ts # flow cursor + overlay stack + ├── flows.ts # FLOWS = { Launch: [...], Join: [...] } + ├── screen-registry.ts # Screen enum → component + ├── primitives/ + │ ├── CardLayout.tsx + │ ├── PickerMenu.tsx + │ ├── StatusRows.tsx # new: "Directory ✓ /claudemesh" pattern + │ ├── BrandMark.tsx # new: 3 colored squares + label + │ └── LoadingBox.tsx + └── screens/ + ├── WelcomeScreen.tsx + ├── AccountScreen.tsx + ├── MeshPickerScreen.tsx + ├── NameRoleScreen.tsx + ├── ConfirmScreen.tsx + └── HandoffScreen.tsx # last screen; its unmount triggers exec claude +``` + +### Flow definition + +```ts +export const FLOWS = { + [Flow.Launch]: [ + { screen: Screen.Welcome, isComplete: s => s.welcomed }, + { screen: Screen.Account, show: s => !s.hasAccount, isComplete: s => s.hasAccount }, + { screen: Screen.MeshPicker, show: s => s.meshes.length > 1, isComplete: s => s.meshSlug !== null }, + { screen: Screen.NameRole, isComplete: s => s.displayName !== null && s.role !== null }, + { screen: Screen.Confirm, isComplete: s => s.confirmed }, + { screen: Screen.Handoff, isComplete: () => false }, // terminal screen + ], +}; +``` + +### `--resume` works for free + +`--resume ` populates the session from saved state; every satisfied predicate auto-skips. The wizard renders only the screens that still need input. No special `--resume` branches in screen code. + +### `--non-interactive` works for free + +Non-interactive mode: walk the flow, for each incomplete entry check if its required session fields can be sourced from CLI flags. If yes, populate and continue. If no, **fail fast with a clear message** naming the missing flag. Never silently guess defaults. + +``` +$ claudemesh launch --non-interactive --name Alexis +✗ Missing --mesh (required in non-interactive mode when >1 mesh joined) + Available meshes: alexis-mou, dev, staging +``` + +### Overlay interrupts claudemesh needs + +- `BrokerDisconnect` — WS dropped mid-wizard, retry countdown +- `InviteInvalid` — paste invite screen rejected token +- `MeshNotFound` — `--mesh foo` passed but not joined +- `RateLimit` — broker rate limited the CLI, backoff timer +- `UpdateAvailable` — newer CLI version on npm, non-blocking banner + +### Terminal handoff choke point + +The last flow entry (`Screen.Handoff`) renders a brief "Launching Claude Code…" card, then: + +```ts +// apps/cli/src/ui/screens/HandoffScreen.tsx (on mount) +useEffect(() => { + (async () => { + await inkApp.unmount(); + await inkApp.waitUntilExit(); + resetTerminal(); // single choke point for ANSI teardown + await flushStdout(); + execa('claude', claudeArgs, { stdio: 'inherit' }); + })(); +}, []); +``` + +`resetTerminal()` lives in `apps/cli/src/ui/terminal.ts`: + +```ts +export function resetTerminal() { + process.stdout.write( + '\x1b[0m' + // reset SGR + '\x1b[?25h' + // show cursor + '\x1b[?1049l' + // exit alt-screen + '\x1b[?1000l' + // disable mouse tracking + '\x1b[?1002l' + + '\x1b[?1003l' + + '\x1b[?1006l' + + '\x1b[?2004l' + // disable bracketed paste + '\x1b[2J' + // clear screen + '\x1b[H' // cursor home + ); + if (process.stdin.isTTY) process.stdin.setRawMode(false); +} +``` + +PostHog only does SGR reset + clear + home on unmount — they don't hand off to another full-screen app, so that's enough for them. Claudemesh needs the full mode-reset because Claude Code takes over the TTY. + +### Visual design system + +`apps/cli/src/ui/styles.ts`: + +```ts +export const Colors = { + primary: 'cyan', + accent: '#7C3AED', // claudemesh purple + title: '#4C1D95', + success: 'green', + error: 'red', + warning: 'yellow', + muted: 'gray', +} as const; + +export const Icons = { + check: '✔', + cross: '✘', + warning: '⚠', + arrow: '▶', + smallArrow: '▸', + bullet: '•', + diamond: '◆', + square: '█', +} as const; + +export enum HAlign { Left = 'flex-start', Center = 'center', Right = 'flex-end' } +export enum VAlign { Top = 'flex-start', Center = 'center', Bottom = 'flex-end' } +``` + +Every screen imports from here. No inline color strings allowed. + +### Status rows pattern + +Replaces the current plain-text banner: + +``` + ██ claudemesh launch + + Directory ✔ /claudemesh + Account ✔ agutierrez@mineryreport.com + Mesh ✔ alexis-mou (9 peers online) + Name ✔ Alexis + Role ▸ (pick one) + + ▸ Continue + Change mesh + Cancel +``` + +## Implementation order + +| # | Impact | Effort | Scope | +|---|---|---|---| +| 1 | High | S | `ui/styles.ts` — palette + icons + alignment enums; migrate existing screens | +| 2 | High | S | `ui/primitives/StatusRows.tsx` + `BrandMark.tsx` | +| 3 | High | M | `ui/store.ts` + `ui/router.ts` + `ui/flows.ts` (flow pipeline core) | +| 4 | High | M | Refactor `launch.ts` to render through router; port existing screens | +| 5 | High | S | `HandoffScreen` + `resetTerminal()` choke point — fixes TUI bleed bug | +| 6 | High | S | Preselect "Continue" on every confirmation screen (one-keypress happy path) | +| 7 | Med | M | Overlay stack + first two overlays (`BrokerDisconnect`, `InviteInvalid`) | +| 8 | Med | M | `--non-interactive` mode using flow walker + fail-fast flag check | +| 9 | Med | S | Per-mesh/per-role `preRunNotice` extension point | +| 10| Low | L | `DissolveTransition` / `ContentSequencer` polish primitives | + +Steps 1–5 are the atomic unit of value: they fix the bleed-through bug, establish the visual system, and unblock everything else. Should ship as one PR. +Steps 6–9 can each ship independently. +Step 10 is polish — defer until after v0.2. + +## Open questions + +- **Ink version**: current CLI uses Ink 4.x? PostHog is on Ink 5 with `useSyncExternalStore`. Check `apps/cli/package.json` before porting the store pattern — Ink 4 needs a different subscription approach. +- **React version**: `useSyncExternalStore` is React 18+. Confirm. +- **Flow granularity**: should `Join` (paste invite) be a separate flow from `Launch`, or an overlay inside `Launch`? PostHog-style: separate flow triggered from the welcome screen. Simpler. +- **Resume semantics**: does `--resume ` resume the *Claude* session only, or also restore the wizard's last mesh/name/role choice? If the latter, need a `~/.claudemesh/sessions/.json` alongside Claude's own session file. + +## References + +- PostHog wizard source: `~/.npm/_npx/b48b11b34a0cada0/node_modules/@posthog/wizard/dist/src/ui/tui/` + - `start-tui.js` — Ink bootstrap + cleanup + - `router.js` — flow cursor + overlay stack + - `flows.js` — declarative pipeline definition + - `styles.js` — palette + icons + - `screens/IntroScreen.js` — reference for status rows + picker + - `primitives/CardLayout.js` — semantic centering diff --git a/.artifacts/backlog/2026-04-11-v1-feature-inventory.md b/.artifacts/backlog/2026-04-11-v1-feature-inventory.md new file mode 100644 index 0000000..4a5581b --- /dev/null +++ b/.artifacts/backlog/2026-04-11-v1-feature-inventory.md @@ -0,0 +1,820 @@ +# claudemesh v1 — Feature Inventory + +**Status:** backlog reference +**Created:** 2026-04-11 +**Purpose:** Exhaustive audit of what v1 ships today. **Every row in this document must still work after v2 lands.** v2 is a refactor + CLI user flows, NOT a functional rewrite; this inventory is the regression checklist. + +**Source of truth**: +- `apps/cli/src/` — 22 files, ~12 k LOC (v0.10.5) +- `apps/broker/src/` — 23 files, ~11 k LOC +- `packages/db/src/schema/mesh.ts` — 1,019 lines, 23 tables + +--- + +## 0. Summary counts + +| Surface | v1 count | +|---|---| +| CLI commands (subcommands in `index.ts`) | 23 | +| MCP tools (handlers in `mcp/server.ts`) | 79 | +| Broker WS message types (dispatched in `index.ts`) | 85 | +| Broker HTTP endpoints | 18 | +| Postgres tables in `mesh` schema | 23 | +| External backend services the broker manages | 5 (Postgres, Neo4j, Qdrant, MinIO, Docker) | +| Lines of source (CLI + broker, excluding tests) | ~23,450 | + +--- + +## 1. CLI commands + +All dispatched from `apps/cli/src/index.ts`. v1 ships 23 public subcommands plus the bare-command welcome wizard. + +| Command | File | Purpose | Flags / args | +|---|---|---|---| +| `claudemesh` (bare) | `commands/welcome.ts` | Interactive welcome wizard. Entry point for new users. | (none) | +| `launch` | `commands/launch.ts` (775 lines, biggest) | Spawn a Claude Code session with mesh connectivity + MCP tools | `--name`, `--role`, `--groups`, `--mesh`, `--join`, `--message-mode`, `--system-prompt`, `-y/--yes`, `-r/--resume`, `-c/--continue`, `--quiet`, + passthrough to `claude` after `--` | +| `create` | `commands/create.ts` | Create a new mesh from a template | `--template`, `--list-templates` | +| `install` | `commands/install.ts` (538 lines) | Register MCP server + status hooks with Claude Code (`~/.claude.json`, `~/.claude/settings.json`) | `--no-hooks` | +| `uninstall` | `commands/install.ts` | Remove MCP server + hooks from Claude Code config | (none) | +| `join` | `commands/join.ts` (193 lines) | Join a mesh via invite URL or token | positional `` | +| `list` | `commands/list.ts` | Show joined meshes, slugs, local identities | (none) | +| `leave` | `commands/leave.ts` | Leave a joined mesh + remove its local keypair | positional `` | +| `peers` | `commands/peers.ts` | List online peers with status, summary, groups | `--mesh`, `--json` | +| `send` | `commands/send.ts` | Send a message to a peer, group, or all peers | positional ` `, `--mesh`, `--priority` | +| `inbox` | `commands/inbox.ts` | Drain pending inbound messages | `--mesh`, `--json`, `--wait` | +| `state` | `commands/state.ts` | Get / set / list shared KV state in the mesh | positional ` [value]`, `--mesh`, `--json` | +| `info` | `commands/info.ts` | Mesh overview: slug, broker, peer count, state keys | `--mesh`, `--json` | +| `remember` | `commands/memory.ts` | Store a persistent memory visible to all peers | positional ``, `--mesh`, `--tags`, `--json` | +| `recall` | `commands/memory.ts` | Full-text search of mesh memories | positional ``, `--mesh`, `--json` | +| `remind` | `commands/remind.ts` (142 lines) | Schedule a delayed message. Also: `remind list`, `remind cancel ` | positional ``, `--in`, `--at`, `--cron`, `--to`, `--mesh`, `--json` | +| `sync` | `commands/sync.ts` | Sync meshes from the user's claudemesh.com dashboard account | `--force` | +| `profile` | `commands/profile.ts` | View or edit member profile (self or another member if admin) | `--mesh`, `--role-tag`, `--groups`, `--message-mode`, `--name`, `--member`, `--json` | +| `status` | `commands/status.ts` | Check broker connectivity for each joined mesh | (none) | +| `doctor` | `commands/doctor.ts` (212 lines) | Diagnose install, config, keypairs, PATH | 7 checks: Node >= 20, claude binary, MCP registered, hooks registered, config parses, file perms, keypairs valid | +| `mcp` | `mcp/server.ts` (2139 lines) | Start MCP server on stdio (internal — invoked by Claude Code) | (none) | +| `seed-test-mesh` | `commands/seed-test-mesh.ts` | Dev-only: inject a mesh into local config without invite flow | ``, `` | +| `hook` | `commands/hook.ts` | Internal: handle Claude Code hook events (status updates from session lifecycle) | stdin JSON from Claude Code | +| `connect telegram` | `commands/connect-telegram.ts` | Link a Telegram bot to a mesh | inline token prompts, calls broker `/tg/token` | +| `disconnect telegram` | `commands/disconnect-telegram.ts` | Unlink Telegram bot | (none) | + +### Flag-first invocation rewrite + +`apps/cli/src/index.ts` lines 339–355 implement a **friction reducer**: if the user types `claudemesh --resume xxx` or any flag-first invocation, the argv is rewritten to `claudemesh launch --resume xxx` before citty parses it. This lets users skip typing `launch` for common flag-only forms. + +**Must preserve in v2.** Users may depend on this. Applies to `--resume`, `--continue`, `-y`, `--mesh`, `--name`, etc. + +--- + +## 2. MCP tools (79 total) + +Defined in `apps/cli/src/mcp/tools.ts` with schemas, implemented in `apps/cli/src/mcp/server.ts` with per-tool case handlers. Each MCP tool is a RPC that the CLI's MCP server handles locally or forwards to the broker via WS. + +Grouped by domain family. Every tool listed here has a working handler in v1. + +### 2.1 Messaging (4) + +| Tool | v1 behavior | +|---|---| +| `send_message` | Send encrypted message to peer, group, or broadcast. Supports priorities: `now` (immediate), `next` (default), `low`. Broker queues if recipient offline. | +| `list_peers` | List connected peers in the mesh with `presenceId`, `displayName`, `status`, `summary`, `groups`, `roleTag`. | +| `message_status` | Query delivery state of a sent message by `messageId`. | +| `check_messages` | Drain pending inbox messages (push mode). | + +### 2.2 Profile + identity (4) + +| Tool | v1 behavior | +|---|---| +| `set_summary` | Set the current peer's work summary (visible to others). | +| `set_status` | Set status: `idle`, `working`, `dnd`. Priority-ranked by source (`hook` > `manual` > `jsonl`). | +| `set_visible` | Toggle visibility. Hidden peers skip `list_peers` and broadcasts but still receive direct messages. | +| `set_profile` | Update display name, role tag, groups, avatar, title, bio, capabilities. | + +### 2.3 Groups (2) + +| Tool | v1 behavior | +|---|---| +| `join_group` | Join a `@group` with optional role (`lead`, `member`, or free-form). | +| `leave_group` | Leave a `@group`. | + +### 2.4 State KV (3) + +| Tool | v1 behavior | +|---|---| +| `set_state` | Set a key-value pair in the mesh's shared state. Broadcasts `state_change` push to all peers. | +| `get_state` | Read a value by key. | +| `list_state` | List all state keys with values, authors, timestamps. | + +### 2.5 Memory (3) + +| Tool | v1 behavior | +|---|---| +| `remember` | Store a text memory with optional tags. Persists across sessions. | +| `recall` | Full-text search memories by query, ranked results. | +| `forget` | Delete a memory by ID. | + +### 2.6 Files (8) + +| Tool | v1 behavior | +|---|---| +| `share_file` | Upload a file to MinIO. Supports `to: ` for E2E encryption (symmetric key wrapped with peer pubkey), or mesh-wide sharing. Supports `persistent` vs `ephemeral` storage. | +| `get_file` | Download a file by `fileId`. Returns a presigned MinIO URL. | +| `list_files` | List files in the mesh by `scope`, `tags`, author. | +| `file_status` | Query status of a file: who downloaded, when. | +| `delete_file` | Delete a file (owner only). | +| `grant_file_access` | Add another peer as a recipient of an already-encrypted file (re-wraps symmetric key). | +| `read_peer_file` | Read a file from another peer's working directory (requires peer online + sharing). | +| `list_peer_files` | List files in a peer's shared directory (tree of names, not contents). | + +### 2.7 Vectors (Qdrant) (4) + +| Tool | v1 behavior | +|---|---| +| `vector_store` | Store embedding with metadata in a named collection. | +| `vector_search` | Nearest-neighbor search in a collection with `limit`. | +| `vector_delete` | Delete a vector by ID. | +| `list_collections` | List collections in the mesh's Qdrant namespace. | + +### 2.8 Graph (Neo4j) (2) + +| Tool | v1 behavior | +|---|---| +| `graph_query` | Read-only Cypher MATCH query on the per-mesh Neo4j database. | +| `graph_execute` | Write Cypher (CREATE/MERGE/DELETE). | + +### 2.9 Shared SQL (Postgres) (3) + +| Tool | v1 behavior | +|---|---| +| `mesh_query` | SELECT-only query on the per-mesh Postgres schema. | +| `mesh_execute` | DDL + DML (CREATE TABLE, INSERT, UPDATE, DELETE). | +| `mesh_schema` | List tables + columns in the mesh's schema. | + +### 2.10 Streams (4) + +| Tool | v1 behavior | +|---|---| +| `create_stream` | Create a named stream for live data pub-sub. | +| `publish` | Push data to a stream. Subscribers receive in real-time. | +| `subscribe` | Subscribe to a stream. Events arrive as channel notifications. | +| `list_streams` | List active streams. | + +### 2.11 Contexts (3) + +| Tool | v1 behavior | +|---|---| +| `share_context` | Share session understanding with the mesh (summary + files_read + key_findings + tags). | +| `get_context` | Search contexts by query (file path, topic, etc.). | +| `list_contexts` | Show what peers currently know about the codebase. | + +### 2.12 Tasks (4) + +| Tool | v1 behavior | +|---|---| +| `create_task` | Create a work item (title, assignee, priority, tags). | +| `claim_task` | Claim an unclaimed task. | +| `complete_task` | Mark done with optional result summary. | +| `list_tasks` | Filter by status and/or assignee. | + +### 2.13 Scheduling (3) + +| Tool | v1 behavior | +|---|---| +| `schedule_reminder` | One-shot (`deliver_at`, `in_seconds`) or recurring (`cron`). Delivered to self or `to`. Persists across broker restarts. | +| `list_scheduled` | List pending scheduled messages. | +| `cancel_scheduled` | Cancel by ID. | + +### 2.14 Mesh metadata — read (4) + +| Tool | v1 behavior | +|---|---| +| `mesh_info` | Overview: peers, groups, state, memory, files, tasks, streams, tables. | +| `mesh_stats` | Resource usage per peer: messages in/out, tool calls, uptime, errors. | +| `mesh_clock` | Simulation clock status: speed, tick count, simulated time. | +| `ping_mesh` | Test messages through the full pipeline, measure round-trip per priority. Diagnoses push delivery issues. | + +### 2.15 Mesh clock — write (3) + +| Tool | v1 behavior | +|---|---| +| `mesh_set_clock` | Set simulation clock speed (1–100x). Peers receive heartbeat ticks at the simulated rate. | +| `mesh_pause_clock` | Pause simulation clock. | +| `mesh_resume_clock` | Resume paused clock. | + +### 2.16 Skills (5) + +| Tool | v1 behavior | +|---|---| +| `share_skill` | Publish a reusable skill (name + description + instructions + tags + when_to_use + allowed_tools + model + context + agent + user_invocable + argument_hint). Exposed as MCP prompts and `skill://` resources. | +| `get_skill` | Load a skill's full instructions by name. | +| `list_skills` | Browse available skills, optionally filter by keyword. | +| `remove_skill` | Remove a shared skill. | +| `mesh_skill_deploy` | Deploy a multi-file skill bundle from zip or git repo. | + +### 2.17 MCP registry tier 1 — peer-hosted (4) + +| Tool | v1 behavior | +|---|---| +| `mesh_mcp_register` | Register a peer's local MCP server with the mesh (server_name, description, tools schema, persistent flag). Other peers can invoke via `mesh_tool_call`. | +| `mesh_mcp_list` | List MCP servers in the mesh with their tools + hosting peer. | +| `mesh_tool_call` | Call a tool on a mesh-registered MCP server. Routes: caller → broker → hosting peer → execute → result back. 30s timeout. | +| `mesh_mcp_remove` | Unregister a peer-hosted MCP server. | + +### 2.18 MCP registry tier 2 — broker-deployed (7) + +| Tool | v1 behavior | +|---|---| +| `mesh_mcp_deploy` | Deploy an MCP server from zip (via `file_id`), git URL, or npx package. Runs on broker VPS in Docker sandbox. Scope: `peer` (default), `mesh`, or `{group/groups/role/peers}`. Runtime: node / python / bun. Memory, network_allow, env with `$vault:` references. | +| `mesh_mcp_undeploy` | Stop and remove a managed MCP server. | +| `mesh_mcp_update` | Pull latest + restart a git-sourced server. | +| `mesh_mcp_logs` | Tail recent logs from a managed server. | +| `mesh_mcp_scope` | Get or set visibility scope. | +| `mesh_mcp_schema` | Inspect tool schemas for a deployed server. | +| `mesh_mcp_catalog` | List all deployed services with status, scope, tool count. | + +### 2.19 Vault (3) + +| Tool | v1 behavior | +|---|---| +| `vault_set` | Store encrypted credential. `type: env` (string, injected as env var via `$vault:`) or `type: file` (file written to `mount_path` in container). | +| `vault_list` | List vault entries (keys + metadata only, no values). | +| `vault_delete` | Remove a credential. | + +### 2.20 URL watch (3) + +| Tool | v1 behavior | +|---|---| +| `mesh_watch` | Watch a URL for changes. Modes: `hash` (SHA-256 body), `json` (jsonpath extract), `status` (HTTP code). Polling `interval` (min 5s). `notify_on: change \| match: \| not_match:`. Custom headers. | +| `mesh_unwatch` | Stop watching by `watch_id`. | +| `mesh_watches` | List active watches. | + +### 2.21 Webhooks (3) + +| Tool | v1 behavior | +|---|---| +| `create_webhook` | Create an inbound webhook. Returns a URL external services (GitHub, CI/CD, monitoring) can POST to. Payload becomes a mesh message to all peers. | +| `list_webhooks` | List active webhooks. | +| `delete_webhook` | Deactivate by name. | + +--- + +## 3. Broker WS protocol + +`apps/broker/src/index.ts` dispatches 85 message types over a single WebSocket endpoint (`WS_PATH`). Each WS message is a client-initiated RPC; most of the 79 MCP tools above map 1:1 to a WS message. Some additional WS messages exist for connection lifecycle + internal routing. + +### 3.1 Connection lifecycle (3) + +- `hello` — client authentication. Ed25519 signature over `{meshId, memberId, pubkey, timestamp}`. Broker verifies, creates presence row, replies with `hello_ack`. +- `hello_ack` — server → client, confirms authentication + sends restored peer state. +- `get_clock` — get current simulation clock state. + +### 3.2 Messaging (4 WS ops) + +- `send` — send a message. Envelope contains sender, recipient (peer/group/*), priority, nonce, ciphertext. +- `peer_dir_request` / `peer_dir_response` — peer-to-peer directory request (read_peer_file under the hood). +- `peer_file_request` / `peer_file_response` — peer-to-peer file read. + +### 3.3 Profile + presence (5) + +- `set_status`, `set_summary`, `set_visible`, `set_profile`, `set_stats` + +### 3.4 Groups (2) + +- `join_group`, `leave_group` + +### 3.5 State KV (3) + +- `set_state`, `get_state`, `list_state` + +### 3.6 Memory (3) + +- `remember`, `recall`, `forget` + +### 3.7 Files (5) + +- `get_file`, `list_files`, `file_status`, `grant_file_access`, `delete_file` + +### 3.8 Vectors (3) + +- `vector_store`, `vector_search`, `vector_delete`, `list_collections` + +### 3.9 Graph (2) + +- `graph_query`, `graph_execute` + +### 3.10 Shared SQL (3) + +- `mesh_query`, `mesh_execute`, `mesh_schema` + +### 3.11 Streams (4) + +- `create_stream`, `publish`, `subscribe`, `unsubscribe`, `list_streams` + +### 3.12 Contexts (3) + +- `share_context`, `get_context`, `list_contexts` + +### 3.13 Tasks (4) + +- `create_task`, `claim_task`, `complete_task`, `list_tasks` + +### 3.14 Scheduling (3) + +- `schedule`, `list_scheduled`, `cancel_scheduled` + +### 3.15 Mesh metadata (3) + +- `mesh_info`, `peers_list` (from `list_peers`), `message_status` + +### 3.16 Simulation clock (4) + +- `set_clock`, `pause_clock`, `resume_clock`, `get_clock` + +### 3.17 Skills (4) + +- `share_skill`, `get_skill`, `list_skills`, `remove_skill`, `skill_deploy` + +### 3.18 MCP registry (11) + +- `mcp_register`, `mcp_unregister`, `mcp_list`, `mcp_call`, `mcp_call_response` (peer → peer relay) +- `mcp_deploy`, `mcp_undeploy`, `mcp_update`, `mcp_logs`, `mcp_scope`, `mcp_schema`, `mcp_catalog` + +### 3.19 Vault (4) + +- `vault_set`, `vault_get`, `vault_list`, `vault_delete` + +### 3.20 URL watch (3) + +- `watch`, `unwatch`, `watch_list` + +### 3.21 Webhooks (3) + +- `create_webhook`, `list_webhooks`, `delete_webhook` + +### 3.22 Audit (2) + +- `audit_query`, `audit_verify` + +--- + +## 4. Broker HTTP endpoints + +The broker serves both WS (`/ws`) and HTTP on the same port. HTTP endpoints are listed here by (method, path) with purpose. + +| Method | Path | Purpose | +|---|---|---| +| `GET` | `/health` | Health check: liveness probe | +| `GET` | `/metrics` | Prometheus metrics endpoint | +| `POST` | `/hook/set-status` | Receive hook status updates from CLI `hook` command (Claude Code session lifecycle) | +| `POST` | `/join` | Accept v1 invite join (legacy) | +| `POST` | `/invites/:code/claim` | v2 invite claim (public, unauthenticated) | +| `POST` | `/upload` | Upload a file (returns fileId, used by `share_file`) | +| `GET` | `/download/:id` | Download a file (returns content or presigned URL) | +| `POST` | `/cli-sync` | CLI sync endpoint — fetches user's meshes from `claudemesh.com` dashboard via JWT, returns mesh list | +| `POST` | `/tg/token` | Register a Telegram bot token for a mesh (connects via `connect telegram` CLI command) | +| `PATCH` | `/mesh/:id/member/:memberId` | Update a member's profile (admin or self) | +| `GET` | `/mesh/:id/members` | List mesh members | +| `PATCH` | `/mesh/:id/settings` | Update mesh-level settings (owner/admin) | +| `POST` | `/hook/:meshId/:webhookId` | Inbound webhook — external systems POST here to publish a mesh message | +| `GET` | `/test/clock` | Dev-only: simulation clock state | +| `GET` | `/test/flip` | Dev-only: test flip endpoint | +| `GET` | `/test/html` | Dev-only: test HTML endpoint | +| `WS` | `/ws` | WebSocket connection for mesh peers (all WS ops above) | + +--- + +## 5. Database schema — `mesh` Postgres schema + +23 tables in the `mesh` schema (managed via Drizzle). Defined in `packages/db/src/schema/mesh.ts`. + +| Table | Purpose | +|---|---| +| `mesh.mesh` | Mesh identity. slug, name, ownerId, createdAt, settings. | +| `mesh.member` | Per-mesh member record. Stable, durable. pubkey, displayName, role, groups, joinedAt. | +| `mesh.invite` | Invite codes + metadata. | +| `mesh.pending_invite` | v2 invite handshake state (pending claim). | +| `mesh.audit_log` | Audit events per mesh. | +| `mesh.presence` | Ephemeral WS session — one row per active connection. Status, statusSource, statusUpdatedAt. | +| `mesh.message_queue` | Queued messages pending push delivery (priority ordered). | +| `mesh.pending_status` | In-flight status updates (10s TTL). | +| `mesh.state` (meshState) | Shared KV state per mesh. | +| `mesh.memory` (meshMemory) | Shared memories with full-text search. | +| `mesh.file` (meshFile) | File metadata (uploader, size, sha256, persistence, storage location). | +| `mesh.file_access` (meshFileAccess) | Per-recipient ACL on files. | +| `mesh.file_key` (meshFileKey) | Per-recipient wrapped symmetric keys for E2E encryption. | +| `mesh.context` (meshContext) | Shared context entries. | +| `mesh.task` (meshTask) | Tasks with lifecycle (open, claimed, completed, cancelled). | +| `mesh.stream` (meshStream) | Stream metadata. | +| `mesh.skill` (meshSkill) | Skill registrations (name, content, frontmatter, tags). | +| `mesh.webhook` (meshWebhook) | Inbound webhook registrations. | +| `mesh.service` (meshService) | Deployed MCP server state (container ID, scope, env, runtime, memory, logs). | +| `mesh.vault_entry` (meshVaultEntry) | Encrypted vault entries per (mesh, peer, key). | +| `mesh.scheduled_message` | Scheduled / recurring reminders (cron + one-shot). | +| `mesh.peer_state` | Per-peer state (groups, role, profile, message mode preference). | +| `mesh.telegram_bridge` | Telegram bot registration per mesh. | + +--- + +## 6. Broker backend services + +Five external services the broker manages at runtime. All currently work in v1 and ship in the default Docker Compose deployment. + +| Service | Purpose | File | Per-mesh model | +|---|---|---|---| +| **Postgres** (Drizzle) | Primary data store for mesh schema. Also used for `mesh_execute` / `mesh_query` / `mesh_schema` shared-SQL tools via per-mesh schemas. | `db.ts` | Schema-per-mesh for shared SQL tools | +| **Neo4j** | Graph queries (`graph_query`, `graph_execute`). | `neo4j-client.ts` | Database-per-mesh (Enterprise) or labeled-node fallback (Community) | +| **Qdrant** | Vector embeddings + nearest-neighbor search. | `qdrant.ts` | Collection naming: `mesh__`, 1536-dim default, cosine distance | +| **MinIO** | File storage for `share_file` / `get_file`. | `minio.ts` | Bucket-per-mesh: `mesh-`. Persistent + ephemeral key paths. | +| **Docker** | Runs deployed MCP servers in sandboxed containers. | `index.ts` (deploy handler) | Container-per-deployment. Read-only root, dropped caps, memory limits, network_allow. | + +--- + +## 7. Broker core subsystems + +### 7.1 Status engine (`broker.ts`, 2066 lines) + +**Battle-tested status model** ported from `claude-intercom`. Rules: + +- Status sources are ranked: `hook` (3) > `manual` (2) > `jsonl` (1) +- On status update: + - If status **changed** → bump everything, record new source + - If status **unchanged**, incoming source ≥ recorded → upgrade + - If status **unchanged**, incoming source < recorded: + - Recorded source still fresh → keep it (bump timestamp only) + - Recorded source stale → downgrade to honest attribution +- `HOOK_FRESHNESS_MS` window (default 60s) for "fresh" classification +- `WORKING_TTL_MS` after which `working` status reverts to `idle` +- `PENDING_TTL_MS = 10_000` for pending status cleanup +- `TTL_SWEEP_INTERVAL_MS = 15_000` for periodic cleanup + +**Must preserve** — this is the correctness engine for `set_status`, `list_peers`, and Claude Code's status line. + +### 7.2 Message queue + priority delivery + +- Messages are stored in `mesh.message_queue` with priority (`now`, `next`, `low`) +- `now` messages bypass busy-gate and are pushed immediately +- `next` messages wait for idle peer +- `low` messages are pull-only (delivered when peer explicitly drains via `check_messages`) +- Queue is drained via `drainForMember(meshId, memberId)` on WS message arrival or manual `check_messages` +- Duplicate delivery prevention via `messageId` UUID tracking + +### 7.3 Scheduled message delivery (`index.ts` in-memory + DB persistence) + +- One-shot: `deliver_at` (timestamp) or `in_seconds` +- Recurring: standard 5-field cron expression +- Persists to `mesh.scheduled_message` table — survives broker restart +- On broker start, pending schedules are re-registered +- Delivery is via the normal `send_message` pipeline with `subtype: reminder` + +### 7.4 URL watch subsystem (`index.ts`) + +- Poller runs in-process (worker per watch) +- Modes: `hash` (SHA-256 of body), `json` (extract jsonpath value), `status` (HTTP status) +- `notify_on: change | match: | not_match:` +- Persists to DB so watches survive broker restart +- Min interval 5s, max 24h + +### 7.5 Telegram bridge (`telegram-bridge.ts`, 1711 lines) + +**Substantial subsystem.** Provides Telegram Bot API integration: + +- Bot token registration per mesh via `POST /tg/token` +- Long-polling or webhook mode +- `tg:` peer identity registration in the mesh's member table +- Inbound Telegram messages → mesh `send_message` events with `subtype: telegram` +- Outbound `send_message(to: "tg:")` → Telegram Bot API call +- Chat-to-mesh mapping (Telegram chat_id ↔ mesh peer) +- User discovery (`connectChat`) +- Bridge row persistence in `mesh.telegram_bridge` + +**This is ~18% of the broker's total source**. v2 must either: +1. Port the logic into a standalone MCP connector (`apps/mcp-telegram/`), or +2. Keep this file in the broker and wire it into the v2 architecture unchanged (my recommendation per the previous conversation — bundled into the broker image) + +Either way, **every behavior documented here must still work after v2 lands**. + +### 7.6 Auth + crypto (`crypto.ts`, `broker-crypto.ts`, `jwt.ts`) + +- **Hello signatures**: Ed25519 signed tuple of `(meshId, memberId, pubkey, timestamp)`. Verified on every WS connection. Replay protection via timestamp window. +- **Invite verification**: canonical invite payload (`canonicalInvite`) signed by mesh owner, Ed25519 verified on claim +- **JWT**: for `/cli-sync` endpoint — the CLI obtains a JWT from `claudemesh.com` via browser flow, passes it to the broker, broker verifies and returns the user's mesh list +- **File envelopes**: client-side AES-GCM + per-recipient key wrapping (file_key table) + +### 7.7 Rate limiting (`rate-limit.ts`) + +- Per-peer rate limits on expensive operations +- Currently in-process (not Redis-backed) +- Enforces limits on `send`, `vector_store`, `mesh_execute`, `mesh_mcp_deploy`, etc. + +### 7.8 Metrics (`metrics.ts`) + +Prometheus metrics exposed at `/metrics`: +- Request counts by op type +- Latencies p50/p99 +- Connection counts per mesh +- Message delivery counts by priority +- Error rates + +### 7.9 Audit log (`audit.ts`) + +- Every mutation is audited to `mesh.audit_log` +- Tamper-evidence via hash chaining +- Accessible via `audit_query` and `audit_verify` WS ops + +### 7.10 Member API (`member-api.ts`, 284 lines) + +Exports: +- `updateMemberProfile()` — used by `PATCH /mesh/:id/member/:memberId` +- `listMeshMembers()` — used by `GET /mesh/:id/members` +- `updateMeshSettings()` — used by `PATCH /mesh/:id/settings` + +### 7.11 CLI sync (`cli-sync.ts`, 133 lines) + +Exports `handleCliSync()` for `POST /cli-sync`. This is **already the "CLI sync meshes from dashboard" feature** — v2 will reuse this endpoint for its mesh-list refresh logic. + +### 7.12 Webhook subsystem (`webhooks.ts`, 97 lines) + +Handles `POST /hook/:meshId/:webhookId` inbound. Signature verification (HMAC), payload normalization, mesh message emission. + +--- + +## 8. CLI core subsystems + +### 8.1 WS client (`ws/client.ts`, 2191 lines) + +**The biggest CLI file.** Implements the full WS protocol with: +- Connection management, reconnect with exponential backoff +- Message queue for offline buffering +- Request/response correlation via `_reqId` +- Ed25519 hello signature generation +- Crypto envelope wrapping for `send_message` payloads +- Push notification delivery (messages, state changes, system events) +- Per-mesh connection pooling (one WS per mesh) + +### 8.2 MCP server (`mcp/server.ts`, 2139 lines) + +Second biggest CLI file. Implements: +- MCP stdio transport (registered with Claude Code via `install.ts`) +- Tool registry from `mcp/tools.ts` +- Dispatch to 79 handlers (one per tool) +- WS client pooling (one connection per mesh) +- Crypto primitives for memory/state encryption +- Inline file-read helpers for `read_peer_file` +- Channel notification forwarding from broker → Claude Code via MCP elicitation + +### 8.3 Crypto (`crypto/*.ts`) + +- `keypair.ts` — Ed25519 keypair generation + persistence (`~/.claudemesh/keys/.key`) +- `envelope.ts` — NaCl `crypto_box` envelope wrapping +- `file-crypto.ts` — AES-GCM file encryption + per-recipient key wrapping +- `hello-sig.ts` — Hello signature generation/verification + +### 8.4 Auth + invite (`auth/*.ts`, `invite/*.ts`, `lib/invite-v2.ts`) + +- `callback-listener.ts` — local HTTP server that catches browser OAuth callback (for `sync` command) +- `open-browser.ts` — cross-platform browser launcher +- `pairing-code.ts` — pairing code display +- `sync-with-broker.ts` — JWT-based sync from dashboard +- `invite/parse.ts` — parse v1 invite URLs +- `invite/enroll.ts` — enroll into a mesh from an invite +- `lib/invite-v2.ts` — v2 invite format (short-code + signed payload) + +### 8.5 State + config (`state/config.ts`) + +- `~/.claudemesh/config.json` read/write (mesh list, keypairs, profile defaults) +- 0600 permission enforcement +- Schema validation + +### 8.6 TUI primitives (`tui/*.ts`) + +- `colors.ts` — hard-coded ANSI colors +- `index.ts` — input helpers +- `screen.ts` — raw-mode screen control +- `spinner.ts` — simple spinner + +### 8.7 Templates (`templates/index.ts`) + +- `dev-team`, `research`, `ops-incident`, `simulation`, `personal` +- Each template seeds initial state + preset groups + +### 8.8 Tests + +- `__tests__/crypto-roundtrip.test.ts` — crypto round-trip verification +- `__tests__/invite-parse.test.ts` — invite URL parsing +- No integration tests against a real broker + +--- + +## 9. Infrastructure + deployment + +### 9.1 Broker runtime (`env.ts`) + +Environment variables the broker expects: +- `DATABASE_URL` — Postgres connection +- `NEO4J_URL`, `NEO4J_USER`, `NEO4J_PASSWORD` +- `QDRANT_URL` +- `MINIO_ENDPOINT`, `MINIO_ACCESS_KEY`, `MINIO_SECRET_KEY`, `MINIO_USE_SSL` +- `STATUS_TTL_SECONDS` — working status timeout +- `HOOK_FRESH_WINDOW_SECONDS` — hook source freshness window +- `TELEGRAM_BOT_TOKEN` — for bridge +- `DASHBOARD_JWT_SECRET` — for `/cli-sync` verification +- `PORT` (default 8787) +- Various feature flags + +### 9.2 CLI runtime + +- Node >= 20 required (checked in `doctor`) +- `claude` binary must be on PATH +- `~/.claudemesh/` directory with config + keys +- `~/.claude.json` MCP server registration +- `~/.claude/settings.json` status hooks registration + +### 9.3 Deployment (Coolify/Docker Compose) + +- Broker deployed via Coolify + Gitea CI on OVHcloud VPS (`ic.claudemesh.com`) +- WS endpoint: `wss://ic.claudemesh.com/ws` +- HTTP endpoint: `https://ic.claudemesh.com` +- Postgres, Neo4j, Qdrant, MinIO run as siblings in Docker Compose +- Deployed MCP sandboxes use the host Docker daemon via socket mount + +--- + +## 10. Features not in the tool/WS surface (behavioral) + +These are v1 behaviors that exist but aren't enumerated as tools. Each must still work after v2. + +| Feature | Location | Notes | +|---|---|---| +| Flag-first `claudemesh --resume xxx` routing | `cli/src/index.ts` §339 | Rewrites argv to `launch --resume xxx` | +| Bare `claudemesh` → welcome wizard | `cli/src/index.ts` §334 | Runs `runWelcome()` | +| Status hook auto-registration | `commands/install.ts` | Writes to `~/.claude/settings.json` | +| Claude Code session hook handling | `commands/hook.ts` | Receives stdin JSON, posts to `/hook/set-status` | +| Per-mesh keypair directory | `crypto/keypair.ts` | `~/.claudemesh/keys/.key` with 0600 perms | +| E2E file encryption with re-wrapping | `crypto/file-crypto.ts` + `mesh_file_key` table | `grant_file_access` re-wraps symmetric key for new recipient | +| Priority message delivery | `broker.ts` | `now` bypasses busy-gate, `next` waits for idle, `low` is pull-only | +| Hook > manual > jsonl status priority | `broker.ts` | Documented in §7.1 | +| Simulation clock for test time | `index.ts` (broker) | Peers receive heartbeat ticks at simulated rate | +| Audit log hash chaining | `audit.ts` | Tamper-evident — tools call `audit_verify` to check | +| Dashboard-CLI sync | `auth/sync-with-broker.ts` + `cli-sync.ts` | Browser JWT flow, fetches mesh list from dashboard | +| Telegram chat ↔ mesh peer mapping | `telegram-bridge.ts` | Bidirectional routing via `tg:` | +| Inbound webhook payload normalization | `webhooks.ts` | External systems POST, becomes a mesh message | +| Rate limiting per peer per operation | `rate-limit.ts` | In-memory token buckets | +| Prometheus metrics | `metrics.ts` | `/metrics` endpoint | + +--- + +## 11. Test coverage (v1) + +| Test | File | Notes | +|---|---|---| +| Crypto round-trip | `apps/cli/src/__tests__/crypto-roundtrip.test.ts` | Encrypt → decrypt verification | +| Invite URL parsing | `apps/cli/src/__tests__/invite-parse.test.ts` | v1 and v2 formats | +| Broker tests | `apps/broker/tests/*.test.ts` | broker.test.ts, invite-signature.test.ts, invite-v2.test.ts, hello-signature.test.ts, rate-limit.test.ts, encoding.test.ts, dup-delivery.test.ts, metrics.test.ts, logging.test.ts, integration/health.test.ts | + +**v1 test coverage is minimal for the CLI side.** 2 unit test files for 12k LOC. + +Broker has ~10 test files. They cover crypto primitives, invite flow, hello signatures, rate limiting, metrics — but **not** the 85 WS message handlers comprehensively. + +--- + +## 12. The "must preserve" list (high-priority regression checks) + +If v2 breaks any of these, it's a user-facing regression: + +### 12.1 First-run experience +- [ ] `claudemesh` bare command → welcome wizard +- [ ] `claudemesh install` registers MCP server + status hooks in Claude Code config +- [ ] `claudemesh join ` enrolls into a mesh from a v1 OR v2 invite URL +- [ ] `claudemesh launch` starts Claude Code with mesh connectivity + +### 12.2 Session lifecycle +- [ ] Status hooks fire correctly on Claude Code session start/stop/pause +- [ ] `set_status` honors priority (hook > manual > jsonl) +- [ ] `list_peers` shows live status with freshness gating +- [ ] Status TTL sweeper runs every 15s + +### 12.3 Messaging +- [ ] `send_message(to: peer, priority: "now")` delivers immediately +- [ ] `send_message(to: peer, priority: "next")` waits for idle +- [ ] `send_message(to: "@group")` broadcasts to group members +- [ ] `send_message(to: "*")` broadcasts to all mesh peers +- [ ] Offline recipients receive queued messages on reconnect +- [ ] Duplicate delivery is prevented by `messageId` tracking + +### 12.4 Cryptographic integrity +- [ ] Ed25519 keypair generation + persistence with 0600 perms +- [ ] Hello signature verification rejects replay within timestamp window +- [ ] `send_message` envelopes are E2E encrypted (NaCl crypto_box) +- [ ] File uploads are AES-GCM encrypted with per-recipient key wrapping +- [ ] `grant_file_access` re-wraps symmetric key for a new recipient + +### 12.5 All 79 MCP tools +- [ ] Every tool in §2 dispatches correctly through the CLI's MCP server +- [ ] Every tool delegates to the broker WS protocol or local handler as appropriate +- [ ] No tool returns "not implemented" or throws an unexpected error + +### 12.6 Broker backends +- [ ] `mesh_query` / `mesh_execute` / `mesh_schema` work against per-mesh Postgres schema +- [ ] `graph_query` / `graph_execute` work against per-mesh Neo4j database +- [ ] `vector_store` / `vector_search` work against per-mesh Qdrant collection +- [ ] `share_file` / `get_file` work through per-mesh MinIO bucket +- [ ] `mesh_mcp_deploy` spawns a Docker container with correct scope + env + network_allow +- [ ] `vault_set` + `$vault:` env injection works end-to-end for deployed MCPs + +### 12.7 Scheduled + URL watch +- [ ] `schedule_reminder` with `cron` survives broker restart (persisted in DB) +- [ ] `mesh_watch` polls at the specified interval and notifies on change +- [ ] Watch state persists across broker restart + +### 12.8 Telegram bridge +- [ ] `connect telegram` registers bot token via `POST /tg/token` +- [ ] Bot token is stored in `mesh.telegram_bridge` +- [ ] Inbound Telegram messages are routed as mesh messages +- [ ] `send_message(to: "tg:")` routes via Telegram Bot API +- [ ] `disconnect telegram` tears down the bridge cleanly + +### 12.9 Dashboard sync +- [ ] `claudemesh sync` browser flow completes and fetches mesh list +- [ ] `POST /cli-sync` with valid JWT returns user's dashboard meshes + +### 12.10 Webhooks +- [ ] `create_webhook` returns a POST URL +- [ ] External POST to webhook URL becomes a mesh message +- [ ] HMAC signature validation rejects unsigned requests +- [ ] `list_webhooks` + `delete_webhook` work + +### 12.11 Doctor checks +- [ ] Node >= 20 check +- [ ] `claude` binary on PATH +- [ ] MCP server registered in `~/.claude.json` +- [ ] Status hooks registered in `~/.claude/settings.json` +- [ ] `~/.claudemesh/config.json` exists + parses + 0600 perms +- [ ] Mesh keypairs valid + +--- + +## 13. What v2 is adding (net new) + +Not part of the regression list, but tracked here so we don't lose sight of the forward-looking scope. + +### 13.1 New CLI features (from user's stated v2 intent) + +- [ ] `claudemesh login` — device-code OAuth against claudemesh.com's Better Auth backend +- [ ] `claudemesh register` — create a new account from the CLI (via browser handoff) +- [ ] `claudemesh new` — create a mesh from the CLI against `POST /api/my/meshes` (not via templates in the CLI — via dashboard API) +- [ ] `claudemesh invite` — generate an invite from the CLI via `POST /api/my/meshes/:slug/invites` +- [ ] `claudemesh whoami` — show current identity + token source +- [ ] `claudemesh logout` — revoke server-side session + clear local credentials + +### 13.2 Architecture improvements (from user's v2 intent) + +- [ ] Feature-folder `services/` layer with strict facade boundaries +- [ ] ESLint + dependency-cruiser boundary enforcement +- [ ] `cli/` vs `ui/` separation (non-Ink I/O vs Ink rendering) +- [ ] `entrypoints/` folder with cli + mcp entries +- [ ] Typed error classes per service with `toDomainError` helper +- [ ] Coverage threshold enforcement in CI + +### 13.3 Not in v1.0.0 scope (defer to v1.1+) + +Everything from the Composer 2 review rounds that isn't Pass 1: + +- Local-first SQLite source of truth (Lamport, sync daemon, publish transaction) +- Broker security hardening (role-per-mesh Postgres, Docker egress proxy, SSRF policy) +- ICU MessageFormat + per-locale budgets +- Accessibility token-signal matrix +- Tiered MCP catalog + audit process +- session_kind enum +- NFC peer_id normalization +- Write queue state machine + +These stay in the `.artifacts/specs/` as reference documents. They describe a good destination. They are NOT v1.0.0 requirements. + +--- + +## 14. Known v1 technical debt / gaps (worth noting) + +These aren't features — they're places where v1 is weaker than it could be. Document here so v2 doesn't blindly port the weaknesses. + +- **CLI auth is missing** — v1 has no `login` / `logout` command. All account-level operations require the web dashboard. This is what v2 is adding. +- **Imperative command branching** — `commands/launch.ts` is 775 lines with nested flag handling. Cleaner in v2's flow pipeline. +- **Minimal CLI test coverage** — 2 test files for 12k LOC. v2 should have colocated tests per service. +- **Rate limiting is in-memory only** — doesn't survive broker restart; not Redis-backed. +- **No CLI-side caching** — every `list_peers` / `mesh_info` call hits the broker. v2's local-first layer (Pass 2) addresses this. +- **Telegram bridge is a large monolithic file** (1711 lines) — legitimate complexity, but v2 may want to modularize if it touches it. +- **v1 wizard bleed-through** — `launch` → `claude` handoff leaves ANSI state dirty. v2's `resetTerminal()` choke point fixes this. + +None of these are regressions if v2 keeps them as-is. v2 should **not** prioritize fixing them — fix them when they become a problem, not speculatively. + +--- + +## 15. Reading this inventory + +**If you're implementing v2 Phase 1** (foundation layers): every tool in §2, every WS op in §3, every HTTP endpoint in §4, every DB table in §5 must have a place in the v2 folder structure. No new semantics, no improved algorithms — just move the working code. + +**If you're reviewing a v2 PR**: check it against §12 ("must preserve" list). If the PR changes the behavior of anything in that list, it's a regression and needs explicit sign-off. + +**If you're writing v2 docs**: reference this document. Every feature here is user-visible and documented in v1's README / slash-command help / tool descriptions. v2 docs should mention every feature from §2 as preserved. + +--- + +**End of inventory.** diff --git a/.artifacts/backlog/2026-04-11-v2-parity-test-plan.md b/.artifacts/backlog/2026-04-11-v2-parity-test-plan.md new file mode 100644 index 0000000..c3def94 --- /dev/null +++ b/.artifacts/backlog/2026-04-11-v2-parity-test-plan.md @@ -0,0 +1,1068 @@ +# claudemesh v2 — Pass 1 Parity Test Plan + +**Status:** backlog reference +**Created:** 2026-04-11 +**Purpose:** Concrete test-by-test plan that verifies v2 behaves identically to v1 for every feature in the `2026-04-11-v1-feature-inventory.md` regression list. Green parity suite = v2 Pass 1 ready to ship. Red parity suite = keep working. +**Companion to:** `2026-04-11-v1-feature-inventory.md` (this document's §12 is the input to every test below) + +--- + +## 1. Why this exists + +v2 Pass 1 is a refactor: new folder structure, facade pattern, CLI user flows. The broker is unchanged, the backend services are unchanged, every v1 **tool** (the 79 MCP tools + 85 broker WS message types) must keep working. The only safe way to prove "keep working" is to run v1 and v2 side by side and assert they produce identical behavior. + +**v2 deliberately drops some v1 CLI commands** (not tools — CLI subcommands exposed to end users). Because v2 has no users yet and no migration constraints, the v2 CLI picks the best command surface without backwards compatibility. Commands dropped from v2: + +| v1 command | Dropped in v2 because | Replacement | +|---|---|---| +| `claudemesh launch [args]` | Redundant with bare `claudemesh`. The subcommand word adds nothing. | `claudemesh` (bare, with flags) | +| `claudemesh disconnect telegram` | Bridge teardown is done inside Telegram (`/revoke`) or by leaving the mesh; CLI wrapper is cosmetic. | In-Telegram revoke; or leave the mesh | + +**Preserved with UX upgrade**: `claudemesh connect telegram` stays in v2 but is rewritten as an interactive wizard — mesh picker, QR code, `t.me` deep link, waits for bridge confirmation. See Pass 1 spec §5.7. v1's silent auto-pick of `config.meshes[0]` is a multi-mesh footgun and is replaced. + +The 79 MCP tools and 85 WS message types are **all preserved**. Only the thin wrapper CLI subcommands that translated them are dropped. This is a conscious v2 decision to simplify the CLI surface, not a regression. + +v1 has thin test coverage (2 CLI test files for ~12 k LOC, ~10 broker tests mostly covering crypto primitives). That's not enough to catch regressions during a refactor. We need a **parity suite** — a test layer that doesn't trust v2 to describe its own correctness, but compares it to v1 directly. + +The parity suite is the acceptance criteria. v2 Pass 1 ships when it's green. + +--- + +## 2. The seven test layers + +| # | Layer | Purpose | Compared against | +|---|---|---|---| +| 1 | **Parity tests** | Behavioral equivalence on the `inventory §12` regression list | v1 CLI subprocess | +| 2 | **WS contract tests** | Wire-format compatibility — v2 must speak the broker's v1 protocol | captured v1 envelopes | +| 3 | **MCP tool handler tests** | Every one of the 79 tools dispatches identically | v1 handler output | +| 4 | **End-to-end smoke tests** | Full user journeys against a real broker | self-consistent e2e flow | +| 5 | **JSON output golden tests** | `--json` output shape is stable for script consumers | v1 `--json` captures | +| 6 | **Facade unit tests** | Boundary invariants — no token leaks, Zod validation works | facade contract spec | +| 7 | **Port-forwarded v1 tests** | Existing crypto + broker tests still pass | unchanged | + +Tests run in parallel where possible. Layers 1 and 2 are the most load-bearing — they're the proof that v2 doesn't break existing users. + +--- + +## 3. Layer 1 — Parity tests (inventory §12 driven) + +One test file per regression check in the inventory's "must preserve" list. Every file spawns both v1 and v2 CLIs against the same mock broker, captures their behavior, and asserts match on the dimensions that matter (exit code, stdout JSON fields, broker-side DB state, WS messages sent). + +File layout: + +``` +apps/cli-v2/tests/parity/ +├── first-run/ +├── session-lifecycle/ +├── messaging/ +├── crypto/ +├── tools/ (this one is covered by layer 3) +├── backends/ +├── scheduled/ +├── telegram/ +├── dashboard-sync/ +├── webhooks/ +└── doctor/ +``` + +### 3.1 First-run parity (inventory §12.1) + +| Test file | Asserts | +|---|---| +| `first-run/install.test.ts` | `claudemesh install` writes identical entries to `~/.claude.json` + `~/.claude/settings.json` | +| `first-run/install-no-hooks.test.ts` | `claudemesh install --no-hooks` registers only the MCP server, not the status hooks | +| `first-run/uninstall.test.ts` | `claudemesh uninstall` removes everything `install` added, leaving other config entries untouched | +| `first-run/join-v1-invite.test.ts` | `claudemesh join ` enrolls using legacy invite format | +| `first-run/join-v2-invite.test.ts` | `claudemesh join ` enrolls using short-code + signed payload | +| `first-run/bare-first-run-welcome.test.ts` | `claudemesh` on a fresh machine (no config) shows the welcome wizard | +| `first-run/bare-returning-user-launches.test.ts` | `claudemesh` on a machine with config launches a session directly (no wizard) | +| `first-run/bare-with-flags-launches.test.ts` | `claudemesh --resume abc`, `claudemesh --mesh foo -y`, `claudemesh --name Alexis` all dispatch to the launch handler | +| `first-run/launch-word-unknown-command.test.ts` | `claudemesh launch` returns exit code 3 (invalid args) with a clear "Unknown command" error. The word `launch` is deliberately not a subcommand in v2. | + +### 3.2 Session lifecycle parity (inventory §12.2) + +| Test file | Asserts | +|---|---| +| `session-lifecycle/status-hook-start.test.ts` | Running `claudemesh hook` with a Claude Code `session_start` payload posts to `/hook/set-status` with `status: working`, source `hook` | +| `session-lifecycle/status-hook-stop.test.ts` | `session_stop` payload → `/hook/set-status` with `status: idle`, source `hook` | +| `session-lifecycle/status-priority.test.ts` | When a `hook` source status is fresh, a subsequent `manual` status is rejected (priority gating) | +| `session-lifecycle/status-ttl-sweep.test.ts` | After `WORKING_TTL_MS`, a stale `working` status decays to `idle` via the sweeper | +| `session-lifecycle/list-peers-freshness.test.ts` | `claudemesh peers` marks peers with stale hook source as degraded | +| `session-lifecycle/multi-mesh-status.test.ts` | Status updates in mesh A don't affect peer status in mesh B | + +### 3.3 Messaging parity (inventory §12.3) + +| Test file | Asserts | +|---|---| +| `messaging/send-priority-now.test.ts` | `send --priority now` delivers immediately, bypassing busy-gate | +| `messaging/send-priority-next.test.ts` | `send --priority next` waits for the recipient to be idle before delivery | +| `messaging/send-priority-low.test.ts` | `send --priority low` is pull-only (recipient must `check_messages` or `inbox`) | +| `messaging/send-to-group.test.ts` | `send @frontend ` fans out to all group members, not individual peers | +| `messaging/send-broadcast.test.ts` | `send "*" ` broadcasts to all connected peers in the mesh | +| `messaging/offline-queue-drain.test.ts` | Messages sent to an offline peer persist in `mesh.message_queue` and drain when the peer reconnects | +| `messaging/duplicate-delivery-prevention.test.ts` | Sending the same `messageId` twice does not double-deliver | +| `messaging/message-status-lookup.test.ts` | `message_status` returns correct delivery state: queued / delivered / acked | +| `messaging/inbox-drain.test.ts` | `claudemesh inbox` drains and prints pending messages; second run shows empty | +| `messaging/inbox-wait.test.ts` | `claudemesh inbox --wait 5` blocks for broker delivery up to 5s, returns early on arrival | + +### 3.4 Cryptographic integrity parity (inventory §12.4) + +| Test file | Asserts | +|---|---| +| `crypto/keypair-perms.test.ts` | Generated keypairs at `~/.claudemesh/keys/.key` are mode `0600`, parent dir `0700` | +| `crypto/keypair-roundtrip.test.ts` | Keypair generation + persistence + reload produces the same public key | +| `crypto/hello-sig-verification.test.ts` | Valid Ed25519 hello signatures pass; altered timestamps are rejected as replay | +| `crypto/envelope-roundtrip.test.ts` | `send_message` ciphertext decrypts back to original on the recipient side | +| `crypto/file-encrypt.test.ts` | `share_file` with `to: ` produces AES-GCM ciphertext + wrapped symmetric key in `mesh.file_key` | +| `crypto/file-decrypt.test.ts` | Recipient downloads + decrypts, content matches original | +| `crypto/grant-access-rewrap.test.ts` | `grant_file_access` adds a new `file_key` row for the additional recipient, re-wrapping the same symmetric key | +| `crypto/invite-v2-signature.test.ts` | v2 invite payloads pass Ed25519 signature verification; tampered payloads fail | + +### 3.5 Broker backends parity (inventory §12.6) + +| Test file | Asserts | +|---|---| +| `backends/postgres-mesh-execute.test.ts` | `mesh_execute "CREATE TABLE bugs..."` creates the table in the per-mesh schema | +| `backends/postgres-mesh-query.test.ts` | `mesh_query "SELECT * FROM bugs"` returns rows | +| `backends/postgres-mesh-schema.test.ts` | `mesh_schema` lists the newly-created table + columns | +| `backends/postgres-cross-mesh-isolation.test.ts` | Query in mesh A cannot see tables created in mesh B (schema-level isolation) | +| `backends/neo4j-graph-execute.test.ts` | `graph_execute "CREATE (n:Bug {id: 1})"` persists a node in the per-mesh Neo4j database | +| `backends/neo4j-graph-query.test.ts` | `graph_query "MATCH (n:Bug) RETURN n"` returns the created node | +| `backends/qdrant-vector-store.test.ts` | `vector_store collection=docs content=...` upserts into `mesh__docs` collection | +| `backends/qdrant-vector-search.test.ts` | `vector_search collection=docs query=...` returns nearest neighbors with metadata | +| `backends/qdrant-list-collections.test.ts` | `list_collections` enumerates the mesh's collections | +| `backends/minio-share-small-file.test.ts` | `share_file` with < 64 KB uploads and returns a fileId | +| `backends/minio-share-large-file.test.ts` | `share_file` with 10 MB uploads in chunks and returns a fileId | +| `backends/minio-get-file.test.ts` | `get_file` returns the content or presigned URL for download | +| `backends/minio-delete-file.test.ts` | `delete_file` removes the file from the bucket | +| `backends/docker-mcp-deploy.test.ts` | `mesh_mcp_deploy` with a catalog alias spawns a Docker container with the expected env + memory + network_allow | +| `backends/docker-mcp-logs.test.ts` | `mesh_mcp_logs` returns recent stdout/stderr from a running deployment | +| `backends/docker-mcp-undeploy.test.ts` | `mesh_mcp_undeploy` SIGTERMs the container cleanly | + +### 3.6 Scheduled messages + URL watch parity (inventory §12.7) + +| Test file | Asserts | +|---|---| +| `scheduled/one-shot-deliver-at.test.ts` | `schedule_reminder deliver_at=` fires at the target timestamp | +| `scheduled/one-shot-in-seconds.test.ts` | `schedule_reminder in_seconds=5` fires 5 seconds after submission | +| `scheduled/cron-recurring.test.ts` | `schedule_reminder cron="*/1 * * * *"` fires every minute | +| `scheduled/persist-across-restart.test.ts` | Pending reminders survive a broker restart (re-registered from `mesh.scheduled_message` table) | +| `scheduled/list-cancel.test.ts` | `list_scheduled` shows pending; `cancel_scheduled ` prevents delivery | +| `scheduled/url-watch-hash-mode.test.ts` | `mesh_watch mode=hash` detects body change via SHA-256 comparison | +| `scheduled/url-watch-json-mode.test.ts` | `mesh_watch mode=json extract=data.status` detects value change at the jsonpath | +| `scheduled/url-watch-status-mode.test.ts` | `mesh_watch mode=status` detects HTTP status code change | +| `scheduled/url-watch-notify-on-match.test.ts` | `notify_on="match:up"` fires only when value equals `"up"` | +| `scheduled/url-watch-persist.test.ts` | Active watches persist across broker restart | + +### 3.7 Telegram bridge parity (inventory §12.8) + +The Telegram bridge is a broker-side feature that continues to work in v2 Pass 1 because Pass 1 doesn't touch the broker. However, the v2 CLI does NOT expose `claudemesh connect telegram` / `claudemesh disconnect telegram` commands — those were v1-only CLI surface that we drop because (a) there are no users to migrate and (b) Telegram connection is better expressed via broker APIs that the user flows expose. + +Instead, Telegram bridge parity is verified via e2e tests in §6 that connect directly to the broker's `POST /tg/token` endpoint, simulate inbound Telegram webhook payloads, and verify outbound routing via `send_message(to: "tg:")`. + +| Test file | Layer | Asserts | +|---|---|---| +| `telegram/connect-wizard-mesh-picker.test.ts` | parity | `claudemesh connect telegram` with >1 joined mesh shows the Ink mesh picker (v1 silently picked mesh[0] — v2 is explicit) | +| `telegram/connect-wizard-single-mesh.test.ts` | parity | With exactly one joined mesh, the wizard skips the picker and proceeds directly to token request | +| `telegram/connect-wizard-zero-mesh.test.ts` | parity | With zero joined meshes, exits with error code 5 and "run `claudemesh join` first" | +| `telegram/connect-wizard-happy-path.test.ts` | parity | Mock broker returns `{token, deepLink}`; wizard renders QR + link; simulated `telegram_bridge_connected` push triggers success message | +| `telegram/connect-wizard-poll-fallback.test.ts` | parity | When the broker does not emit a push event, the wizard falls back to polling `GET /mesh/:id/members` every 2s until a `tg:*` entry appears | +| `telegram/connect-wizard-rate-limited.test.ts` | parity | Broker 429 response is caught and rendered as "too many Telegram tokens in the last hour" instead of raw HTTP | +| `telegram/connect-wizard-link-flag.test.ts` | parity | `--link` flag prints only the deep link, no QR, no wait (scriptable) | +| `telegram/connect-wizard-status-flag.test.ts` | parity | `--status` flag checks existing bridge without generating a new token | +| `telegram/connect-wizard-ctrl-c.test.ts` | parity | Ctrl-C during the wait phase prints the "link stays valid" hint and exits 0 | +| (e2e) `tests/e2e/telegram/broker-token-register.test.ts` | e2e | `POST /tg/token` registers a bot token, writes to `mesh.telegram_bridge` | +| (e2e) `tests/e2e/telegram/broker-inbound-routing.test.ts` | e2e | Simulated inbound Telegram update is routed as a mesh `send_message` with `subtype: telegram` | +| (parity) `telegram/send-message-to-tg-peer.test.ts` | parity | `send_message(to: "tg:", ...)` via the v2 CLI calls the broker with the same WS envelope as v1 would | +| (parity) `telegram/list-peers-shows-tg-bridge.test.ts` | parity | When a Telegram bridge is registered on the broker, `claudemesh peers` includes `tg:` entries with `type: bridge` | + +### 3.8 Dashboard sync parity (inventory §12.9) + +| Test file | Asserts | +|---|---| +| `dashboard-sync/browser-flow.test.ts` | `claudemesh sync` opens browser, receives JWT via `callback-listener`, fetches mesh list | +| `dashboard-sync/cli-sync-endpoint.test.ts` | `POST /cli-sync` with valid JWT returns the user's dashboard meshes; invalid JWT is rejected | +| `dashboard-sync/force-resync.test.ts` | `claudemesh sync --force` re-links even if already linked | + +### 3.9 Webhooks parity (inventory §12.10) + +| Test file | Asserts | +|---|---| +| `webhooks/create-returns-url.test.ts` | `create_webhook name=github` returns a POST URL | +| `webhooks/external-post-becomes-mesh-message.test.ts` | External `POST /hook/:meshId/:webhookId` with a JSON payload emits a mesh message to all peers | +| `webhooks/hmac-signature-validation.test.ts` | HMAC-signed requests pass, unsigned requests are rejected | +| `webhooks/list-delete.test.ts` | `list_webhooks` + `delete_webhook` round-trip works | + +### 3.10 Doctor checks parity (inventory §12.11) + +| Test file | Asserts | +|---|---| +| `doctor/check-node-version.test.ts` | `doctor` reports Node ≥ 20 (or warns if < 20 in a mocked env) | +| `doctor/check-claude-on-path.test.ts` | `doctor` detects `claude` binary on PATH | +| `doctor/check-mcp-registered.test.ts` | `doctor` detects MCP server entry in `~/.claude.json` | +| `doctor/check-hooks-registered.test.ts` | `doctor` detects status hooks in `~/.claude/settings.json` | +| `doctor/check-config-perms.test.ts` | `doctor` validates `~/.claudemesh/config.json` is mode `0600` | +| `doctor/check-keypairs-valid.test.ts` | `doctor` validates each mesh keypair can sign + verify | + +**Parity layer total: ~70 test files.** Each file runs both v1 and v2 in the same environment and diffs the outputs. + +--- + +## 4. Layer 2 — WS contract tests + +One contract test per broker WS message type (85 total from inventory §3). Each test captures what v1's WS client would send for a given input and asserts v2 sends the byte-identical envelope (modulo legitimate non-determinism like nonces and timestamps, which are normalized before comparison). + +File layout: + +``` +apps/cli-v2/tests/contract/ws/ +├── lifecycle/ (3 tests: hello, hello_ack, get_clock) +├── messaging/ (4 tests) +├── profile/ (5 tests) +├── groups/ (2 tests) +├── state/ (3 tests) +├── memory/ (3 tests) +├── files/ (5 tests) +├── vectors/ (4 tests) +├── graph/ (2 tests) +├── sql/ (3 tests) +├── streams/ (5 tests) +├── contexts/ (3 tests) +├── tasks/ (4 tests) +├── scheduling/ (3 tests) +├── metadata/ (3 tests) +├── clock/ (4 tests) +├── skills/ (5 tests) +├── mcp-registry/ (11 tests) +├── vault/ (4 tests) +├── url-watch/ (3 tests) +├── webhooks/ (3 tests) +└── audit/ (2 tests) +``` + +### 4.1 Contract test pattern + +```ts +// tests/contract/ws/state/set-state.test.ts +import { describe, it, expect } from 'bun:test'; +import { normalize, captureV1Envelope, captureV2Envelope } from '@/tests/helpers/wire-capture'; + +describe('WS contract: set_state', () => { + it('v2 envelope matches v1 for string value', async () => { + const input = { meshId: 'test-mesh', key: 'sprint', value: '2026-W15' }; + const v1 = await captureV1Envelope('set_state', input); + const v2 = await captureV2Envelope('set_state', input); + expect(normalize(v2)).toEqual(normalize(v1)); + }); + + it('v2 envelope matches v1 for JSON value', async () => { + const input = { meshId: 'test-mesh', key: 'deploy_freeze', value: { until: '2026-04-15' } }; + const v1 = await captureV1Envelope('set_state', input); + const v2 = await captureV2Envelope('set_state', input); + expect(normalize(v2)).toEqual(normalize(v1)); + }); + + it('v2 envelope matches v1 for null value (deletion)', async () => { + const input = { meshId: 'test-mesh', key: 'tmp', value: null }; + const v1 = await captureV1Envelope('set_state', input); + const v2 = await captureV2Envelope('set_state', input); + expect(normalize(v2)).toEqual(normalize(v1)); + }); +}); +``` + +### 4.2 The `normalize()` helper + +Strips fields that are legitimately non-deterministic between v1 and v2: + +- `nonce` — random per envelope +- `timestamp` — wall clock +- `messageId` — random UUID +- `_reqId` — random correlation ID +- `ciphertext` — depends on nonce + random keypair; instead of comparing ciphertext directly, both envelopes are decrypted and the plaintext is compared + +Everything else (message type, meshId, priority, sender pubkey, recipient, flags) must match byte-for-byte. + +### 4.3 Full contract test manifest (85 tests) + +Every WS message type from inventory §3 gets a file: + +| Family | WS messages | Test files | +|---|---|---| +| Lifecycle | `hello`, `hello_ack`, `get_clock` | 3 | +| Messaging | `send`, `peer_dir_request`, `peer_dir_response`, `peer_file_request`, `peer_file_response` | 5 | +| Profile | `set_status`, `set_summary`, `set_visible`, `set_profile`, `set_stats` | 5 | +| Groups | `join_group`, `leave_group` | 2 | +| State | `set_state`, `get_state`, `list_state` | 3 | +| Memory | `remember`, `recall`, `forget` | 3 | +| Files | `get_file`, `list_files`, `file_status`, `grant_file_access`, `delete_file` | 5 | +| Vectors | `vector_store`, `vector_search`, `vector_delete`, `list_collections` | 4 | +| Graph | `graph_query`, `graph_execute` | 2 | +| SQL | `mesh_query`, `mesh_execute`, `mesh_schema` | 3 | +| Streams | `create_stream`, `publish`, `subscribe`, `unsubscribe`, `list_streams` | 5 | +| Contexts | `share_context`, `get_context`, `list_contexts` | 3 | +| Tasks | `create_task`, `claim_task`, `complete_task`, `list_tasks` | 4 | +| Scheduling | `schedule`, `list_scheduled`, `cancel_scheduled` | 3 | +| Metadata | `mesh_info`, `list_peers`, `message_status` | 3 | +| Clock | `set_clock`, `pause_clock`, `resume_clock`, `get_clock` | 4 | +| Skills | `share_skill`, `get_skill`, `list_skills`, `remove_skill`, `skill_deploy` | 5 | +| MCP registry | `mcp_register`, `mcp_unregister`, `mcp_list`, `mcp_call`, `mcp_call_response`, `mcp_deploy`, `mcp_undeploy`, `mcp_update`, `mcp_logs`, `mcp_scope`, `mcp_schema`, `mcp_catalog` | 12 | +| Vault | `vault_set`, `vault_get`, `vault_list`, `vault_delete` | 4 | +| URL watch | `watch`, `unwatch`, `watch_list` | 3 | +| Webhooks | `create_webhook`, `list_webhooks`, `delete_webhook` | 3 | +| Audit | `audit_query`, `audit_verify` | 2 | + +**Contract layer total: ~85 test files.** + +This layer is the load-bearing proof that v2's WS client speaks the broker's v1 protocol unchanged. If any of these tests fail, v1 users running v2 against production brokers will experience silent misbehavior. + +--- + +## 5. Layer 3 — MCP tool handler tests + +One test file per MCP tool from inventory §2 (79 tools). Each file: + +1. Invokes the tool through v2's MCP server with a fixture input +2. Captures the WS message v2 sends to the broker +3. Captures the same request through v1's MCP server +4. Asserts both produce identical WS envelopes and identical return values + +File layout mirrors v2's `src/mcp/tools/`: + +``` +apps/cli-v2/tests/mcp-tools/ +├── memory/ +│ ├── remember.test.ts +│ ├── recall.test.ts +│ └── forget.test.ts +├── state/ +│ ├── set-state.test.ts +│ ├── get-state.test.ts +│ └── list-state.test.ts +├── messaging/ +│ ├── send-message.test.ts +│ ├── list-peers.test.ts +│ ├── check-messages.test.ts +│ └── message-status.test.ts +├── profile/ +│ ├── set-profile.test.ts +│ ├── set-status.test.ts +│ ├── set-summary.test.ts +│ └── set-visible.test.ts +├── groups/ +│ ├── join-group.test.ts +│ └── leave-group.test.ts +├── files/ +│ ├── share-file.test.ts +│ ├── get-file.test.ts +│ ├── list-files.test.ts +│ ├── file-status.test.ts +│ ├── delete-file.test.ts +│ ├── grant-file-access.test.ts +│ ├── read-peer-file.test.ts +│ └── list-peer-files.test.ts +├── vectors/ +│ ├── vector-store.test.ts +│ ├── vector-search.test.ts +│ ├── vector-delete.test.ts +│ └── list-collections.test.ts +├── graph/ +│ ├── graph-query.test.ts +│ └── graph-execute.test.ts +├── sql/ +│ ├── mesh-query.test.ts +│ ├── mesh-execute.test.ts +│ └── mesh-schema.test.ts +├── streams/ +│ ├── create-stream.test.ts +│ ├── publish.test.ts +│ ├── subscribe.test.ts +│ └── list-streams.test.ts +├── contexts/ +│ ├── share-context.test.ts +│ ├── get-context.test.ts +│ └── list-contexts.test.ts +├── tasks/ +│ ├── create-task.test.ts +│ ├── claim-task.test.ts +│ ├── complete-task.test.ts +│ └── list-tasks.test.ts +├── scheduling/ +│ ├── schedule-reminder.test.ts +│ ├── list-scheduled.test.ts +│ └── cancel-scheduled.test.ts +├── metadata/ +│ ├── mesh-info.test.ts +│ ├── mesh-stats.test.ts +│ ├── mesh-clock.test.ts +│ └── ping-mesh.test.ts +├── clock-write/ +│ ├── mesh-set-clock.test.ts +│ ├── mesh-pause-clock.test.ts +│ └── mesh-resume-clock.test.ts +├── skills/ +│ ├── share-skill.test.ts +│ ├── get-skill.test.ts +│ ├── list-skills.test.ts +│ ├── remove-skill.test.ts +│ └── mesh-skill-deploy.test.ts +├── mcp-registry-tier1/ +│ ├── mesh-mcp-register.test.ts +│ ├── mesh-mcp-list.test.ts +│ ├── mesh-tool-call.test.ts +│ └── mesh-mcp-remove.test.ts +├── mcp-registry-tier2/ +│ ├── mesh-mcp-deploy.test.ts +│ ├── mesh-mcp-undeploy.test.ts +│ ├── mesh-mcp-update.test.ts +│ ├── mesh-mcp-logs.test.ts +│ ├── mesh-mcp-scope.test.ts +│ ├── mesh-mcp-schema.test.ts +│ └── mesh-mcp-catalog.test.ts +├── vault/ +│ ├── vault-set.test.ts +│ ├── vault-list.test.ts +│ └── vault-delete.test.ts +├── url-watch/ +│ ├── mesh-watch.test.ts +│ ├── mesh-unwatch.test.ts +│ └── mesh-watches.test.ts +└── webhooks/ + ├── create-webhook.test.ts + ├── list-webhooks.test.ts + └── delete-webhook.test.ts +``` + +**MCP layer total: 79 test files.** + +### 5.1 MCP handler test pattern + +```ts +// tests/mcp-tools/memory/remember.test.ts +import { describe, it, expect } from 'bun:test'; +import { v1McpServer, v2McpServer, mockBroker } from '@/tests/helpers'; + +describe('MCP tool: remember (parity)', () => { + it('v1 and v2 produce identical WS envelopes', async () => { + const input = { + content: 'Payments API rate-limits at 100 req/s after March incident', + tags: ['payments', 'rate-limit'], + }; + + const v1Sent = await v1McpServer.invokeAndCapture('remember', input); + const v2Sent = await v2McpServer.invokeAndCapture('remember', input); + + expect(v2Sent).toEqual(v1Sent); + }); + + it('v1 and v2 return identical tool results', async () => { + const input = { content: 'test memory', tags: [] }; + const v1Result = await v1McpServer.invoke('remember', input); + const v2Result = await v2McpServer.invoke('remember', input); + expect(v2Result).toEqual(v1Result); + }); + + it('v2 rejects invalid input via Zod with same error shape as v1', async () => { + const invalidInput = { content: '', tags: 'not-an-array' }; + await expect(v2McpServer.invoke('remember', invalidInput)).rejects.toThrow(); + }); +}); +``` + +--- + +## 6. Layer 4 — End-to-end smoke tests + +Full journey tests against a real broker in a Docker sandbox. Each test spins up: + +1. A fresh Postgres + Neo4j + Qdrant + MinIO + broker stack (via `testcontainers`) +2. One or more v2 CLI instances as subprocesses +3. Optionally a Claude Code mock to simulate MCP client interactions + +These are slower than layers 1–3 but catch integration bugs that unit-level parity tests miss. + +File layout: + +``` +apps/cli-v2/tests/e2e/ +├── first-run/ +│ ├── install-and-join.test.ts +│ ├── fresh-mesh-create.test.ts +│ └── launch-with-mesh.test.ts +├── messaging/ +│ ├── two-peer-send-receive.test.ts +│ ├── broadcast-to-group.test.ts +│ └── offline-queue-drain.test.ts +├── files/ +│ ├── upload-download-small.test.ts +│ ├── upload-download-large.test.ts +│ └── e2e-encrypted-share.test.ts +├── state-and-memory/ +│ ├── state-across-peers.test.ts +│ ├── memory-full-text-search.test.ts +│ └── context-share-and-query.test.ts +├── tasks/ +│ ├── create-claim-complete.test.ts +│ └── list-by-status.test.ts +├── backends/ +│ ├── shared-sql-roundtrip.test.ts +│ ├── neo4j-graph-roundtrip.test.ts +│ ├── qdrant-vector-roundtrip.test.ts +│ └── minio-file-roundtrip.test.ts +├── mcp-registry/ +│ ├── deploy-catalog-entry.test.ts +│ ├── call-deployed-tool.test.ts +│ └── undeploy-cleanup.test.ts +├── telegram/ +│ ├── connect-and-route.test.ts +│ └── disconnect-cleanup.test.ts +├── dashboard-sync/ +│ └── browser-flow.test.ts +├── scheduled/ +│ ├── cron-reminder-fires.test.ts +│ ├── url-watch-detects-change.test.ts +│ └── persist-across-restart.test.ts +├── webhooks/ +│ └── inbound-post-becomes-message.test.ts +└── journey/ + ├── full-user-journey.test.ts (install → join → send → file → logout) + ├── connector-journey.test.ts (deploy mcp → call tool → undeploy) + └── skill-sharing-journey.test.ts (share_skill → teammate loads → invoke) +``` + +**E2E layer total: ~28 test files.** + +### 6.1 E2E test harness + +Each e2e file uses a shared harness: + +```ts +import { startFreshBroker, stopBroker, spawnCli } from '@/tests/helpers/e2e'; + +describe('e2e: two-peer send-receive', () => { + let broker: BrokerHandle; + let alice: CliHandle; + let bob: CliHandle; + + beforeAll(async () => { + broker = await startFreshBroker(); + alice = await spawnCli({ broker, displayName: 'Alice' }); + bob = await spawnCli({ broker, displayName: 'Bob' }); + await alice.join(broker.seedInvite); + await bob.join(broker.seedInvite); + }); + + afterAll(async () => { + await alice.shutdown(); + await bob.shutdown(); + await stopBroker(broker); + }); + + it('alice sends to bob; bob receives via inbox', async () => { + await alice.send({ to: 'Bob', message: 'hello' }); + const inbox = await bob.inbox(); + expect(inbox).toHaveLength(1); + expect(inbox[0].plaintext).toBe('hello'); + }); +}); +``` + +### 6.2 Testcontainers vs local broker + +Two modes, switchable via env var: + +- `E2E_BROKER=docker` — spins up a fresh broker + all backends via `testcontainers`. Slow (~30s per test) but hermetic. +- `E2E_BROKER=local` — connects to a running local broker (`ic.claudemesh.com` or `localhost:8787`). Fast but requires manual setup. + +CI uses `docker` mode. Dev iteration uses `local` mode. + +--- + +## 7. Layer 5 — JSON output golden tests + +`--json` output is the stable contract for script consumers. These tests lock the shape and fields. + +File layout: + +``` +apps/cli-v2/tests/golden/ +├── list-json.test.ts +├── peers-json.test.ts +├── info-json.test.ts +├── inbox-json.test.ts +├── state-get-json.test.ts +├── state-list-json.test.ts +├── remember-json.test.ts +├── recall-json.test.ts +├── remind-json.test.ts +├── profile-json.test.ts +├── mcp-info-json.test.ts +└── mcp-stats-json.test.ts +``` + +### 7.1 Golden test pattern + +```ts +// tests/golden/list-json.test.ts +import { describe, it, expect } from 'bun:test'; +import { runV2Cli } from '@/tests/helpers'; +import { listJsonShape } from '@/tests/fixtures/golden/list.json'; + +describe('golden: claudemesh list --json', () => { + it('output shape matches locked schema', async () => { + const output = JSON.parse(await runV2Cli(['list', '--json'])); + expect(output).toMatchObject(listJsonShape); + }); + + it('includes schema_version field', async () => { + const output = JSON.parse(await runV2Cli(['list', '--json'])); + expect(output.schema_version).toMatch(/^\d+\.\d+$/); + }); + + it('mesh entries have all v1 fields', async () => { + const output = JSON.parse(await runV2Cli(['list', '--json'])); + for (const mesh of output.meshes) { + expect(mesh).toHaveProperty('slug'); + expect(mesh).toHaveProperty('name'); + expect(mesh).toHaveProperty('kind'); + expect(mesh).toHaveProperty('brokerUrl'); + expect(mesh).toHaveProperty('memberCount'); + } + }); +}); +``` + +Golden fixtures live in `tests/fixtures/golden/*.json` and are captured from v1 CLI runs the first time the test is written, then locked. + +**Golden layer total: ~12 test files.** + +--- + +## 8. Layer 6 — Facade unit tests + +Per-service colocated tests that verify the facade contract: + +- Every facade function validates input with Zod +- Every output type passes the boundary scanner (no `token`, `api_key`, `password`, path-like patterns) +- Error mapping via `toDomainError` preserves cause + logs unmapped errors +- Never exposes class instances, DB connections, or raw HTTP responses + +File layout (colocated with services): + +``` +apps/cli-v2/src/services/ +├── auth/ +│ └── facade.test.ts +├── mesh/ +│ └── facade.test.ts +├── invite/ +│ └── facade.test.ts +├── broker/ +│ └── facade.test.ts +├── api/ +│ └── facade.test.ts +├── crypto/ +│ └── facade.test.ts +├── store/ +│ └── facade.test.ts +├── config/ +│ └── facade.test.ts +├── state/ +│ └── facade.test.ts +├── device/ +│ └── facade.test.ts +├── clipboard/ +│ └── facade.test.ts +├── spawn/ +│ └── facade.test.ts +├── telemetry/ +│ └── facade.test.ts +├── health/ +│ └── facade.test.ts +├── update/ +│ └── facade.test.ts +├── i18n/ +│ └── facade.test.ts +└── lifecycle/ + └── facade.test.ts +``` + +Plus one global test at `tests/unit/facade-boundary-scan.test.ts` that walks every facade and asserts no output type contains forbidden keys (AST-based via ts-morph, per facade-pattern spec §10.2). + +**Facade layer total: ~17 facade-specific test files + 1 global scanner.** + +### 8.1 Facade test pattern + +```ts +// services/auth/facade.test.ts +import { describe, it, expect, vi } from 'vitest'; +import * as facade from './facade'; +import { getAuthService } from './index'; + +vi.mock('./index'); + +describe('auth facade contract', () => { + it('loginWithDeviceCode rejects leaked token in output', async () => { + vi.mocked(getAuthService).mockReturnValue({ + startDeviceCodeFlow: vi.fn().mockResolvedValue({ + user: { id: 'u1', display_name: 'Alejandro', email: 'a@b.c' }, + token: 'cm_session_SECRET', + raw_response: { headers: {} }, + }), + } as any); + + const result = await facade.loginWithDeviceCode(); + const serialized = JSON.stringify(result); + + expect(serialized).not.toContain('cm_session_'); + expect(serialized).not.toContain('SECRET'); + expect(serialized).not.toContain('raw_response'); + }); + + it('loginWithToken validates input with Zod', async () => { + await expect(facade.loginWithToken({ token: 'malformed' })).rejects.toMatchObject({ + code: 'AUTH_INVALID_TOKEN', + }); + }); + + it('whoAmI never throws', async () => { + vi.mocked(getAuthService).mockReturnValue({ + getCurrentState: vi.fn().mockRejectedValue(new Error('boom')), + } as any); + await expect(facade.whoAmI()).resolves.toBeDefined(); + }); + + it('toDomainError logs unmapped errors', async () => { + const logSpy = vi.fn(); + vi.mocked(getAuthService).mockReturnValue({ + logout: vi.fn().mockRejectedValue(new TypeError('null pointer')), + logger: { error: logSpy }, + } as any); + + await facade.logout(); + expect(logSpy).toHaveBeenCalledWith( + expect.stringContaining('unmapped error'), + expect.any(Object), + ); + }); +}); +``` + +--- + +## 9. Layer 7 — Port-forwarded v1 tests + +v1's existing tests cover crypto primitives and broker correctness. They all apply to v2 as-is because: + +- v2 uses the same crypto primitives (Ed25519, NaCl crypto_box, AES-GCM) +- v2 talks to the same broker, so broker tests run unchanged +- v2 parses the same invite formats + +### 9.1 Tests to port forward + +| v1 test | Forward-port path | Notes | +|---|---|---| +| `apps/cli/src/__tests__/crypto-roundtrip.test.ts` | `apps/cli-v2/tests/unit/crypto-roundtrip.test.ts` | Direct copy; uses shared crypto primitives | +| `apps/cli/src/__tests__/invite-parse.test.ts` | `apps/cli-v2/tests/unit/invite-parse.test.ts` | Direct copy; v2 parses same v1 + v2 invite formats | +| `apps/broker/tests/broker.test.ts` | N/A — stays in broker | Broker unchanged | +| `apps/broker/tests/invite-signature.test.ts` | N/A — stays in broker | Broker unchanged | +| `apps/broker/tests/invite-v2.test.ts` | N/A — stays in broker | Broker unchanged | +| `apps/broker/tests/hello-signature.test.ts` | N/A — stays in broker | Broker unchanged | +| `apps/broker/tests/rate-limit.test.ts` | N/A — stays in broker | Broker unchanged | +| `apps/broker/tests/encoding.test.ts` | N/A — stays in broker | Broker unchanged | +| `apps/broker/tests/dup-delivery.test.ts` | N/A — stays in broker | Broker unchanged | +| `apps/broker/tests/metrics.test.ts` | N/A — stays in broker | Broker unchanged | +| `apps/broker/tests/logging.test.ts` | N/A — stays in broker | Broker unchanged | +| `apps/broker/tests/integration/health.test.ts` | N/A — stays in broker | Broker unchanged | + +**Port-forward layer: 2 CLI tests copied, ~10 broker tests remain in place.** + +--- + +## 10. Test helper infrastructure + +Shared helpers under `apps/cli-v2/tests/helpers/`: + +### 10.1 `v1-runner.ts` + +Spawns the v1 CLI as a subprocess and captures its output: + +```ts +export async function runV1Cli(args: string[], opts?: RunOpts): Promise; +export async function v1Send(args: SendArgs): Promise; // returns messageId +export async function v1Join(inviteUrl: string): Promise; +export async function v1Install(): Promise; +// ... one helper per v1 command +``` + +Uses the installed `claudemesh` binary from `apps/cli/` (v1). Tests assume v1 is available at `../../cli/dist/index.js` or via `npx claudemesh@0.10.5`. + +### 10.2 `v2-runner.ts` + +Same interface, but spawns v2: + +```ts +export async function runV2Cli(args: string[], opts?: RunOpts): Promise; +// ... matching v1 helper surface +``` + +Uses `apps/cli-v2/dist/entrypoints/cli.js`. + +### 10.3 `wire-capture.ts` + +Intercepts WS messages by routing both v1 and v2 CLIs through a test proxy that records envelopes: + +```ts +export async function captureV1Envelope(op: string, input: any): Promise; +export async function captureV2Envelope(op: string, input: any): Promise; +export function normalize(env: WsEnvelope): WsEnvelope; +``` + +The proxy runs on a local port, the CLI's broker URL is set to `ws://localhost:/ws`, and the proxy logs every message before forwarding to a real test broker. + +### 10.4 `mock-broker.ts` + +In-memory broker for unit tests. Implements enough of the WS protocol to test CLI-side behavior without a real database stack. + +Handles: +- Hello + authentication (skipped signature check in test mode) +- Echo back `ack` for every client message +- In-memory state for state_kv, memory, tasks +- Configurable response fixtures for WS ops + +### 10.5 `real-broker.ts` (for e2e) + +Spins up a real broker + Postgres + Neo4j + Qdrant + MinIO stack via `testcontainers`: + +```ts +export async function startFreshBroker(opts?: BrokerOpts): Promise; +export async function stopBroker(handle: BrokerHandle): Promise; +``` + +The stack is pre-configured with a seed mesh + fixture users + a known invite URL. Tests use these as starting state. + +### 10.6 `temp-home.ts` + +Creates an isolated `~/.claudemesh/` for each test: + +```ts +export async function tempHome(fn: (homeDir: string) => Promise): Promise; +``` + +Cleans up on completion. Prevents tests from interfering with the developer's real claudemesh config. + +### 10.7 `ink-render.ts` + +Snapshots Ink screens for UI tests (per `cli-v2-ux-design.md` §12.1): + +```ts +export async function renderScreen(Component: any, props?: any): Promise; +export async function waitForText(frame: () => string, text: string, timeoutMs?: number): Promise; +``` + +### 10.8 `sqlite-fixture.ts` + +(Pass 1 only needs this if we add any local caching — most of Pass 1 won't touch SQLite since the local-first work is Pass 2.) + +--- + +## 11. Shared fixtures + +Under `apps/cli-v2/tests/fixtures/`: + +``` +fixtures/ +├── auth/ +│ ├── valid-session-token.json # sample cm_session_... token +│ ├── valid-pat.json # sample cm_pat_... token +│ ├── expired-token.json +│ └── malformed-token.json +├── meshes/ +│ ├── sample-personal-mesh.json +│ ├── sample-shared-mesh.json +│ └── sample-guest-mesh.json +├── invites/ +│ ├── v1-invite-url.txt +│ ├── v2-invite-url.txt +│ ├── expired-invite-url.txt +│ └── malformed-invite-url.txt +├── wire/ +│ ├── v1-envelopes/ # captured v1 WS envelopes, one JSON file per op +│ │ ├── send.json +│ │ ├── set_state.json +│ │ ├── remember.json +│ │ └── ... (85 files) +│ └── broker-responses/ # captured v1 broker responses +│ ├── hello_ack.json +│ ├── peers_list.json +│ └── ... +├── golden/ +│ ├── list-json.json # expected JSON output shape +│ ├── peers-json.json +│ ├── info-json.json +│ └── ... (12 files) +├── telegram/ +│ ├── sample-bot-token.json +│ ├── sample-inbound-update.json # Telegram webhook payload +│ └── expected-routed-message.json +└── mcp-tool-inputs/ + ├── memory-remember.json # one fixture input per tool + ├── memory-recall.json + └── ... (79 files, one per tool) +``` + +Total fixture count: ~180 files. Most are small JSON snippets captured once from v1 runs and locked. + +--- + +## 12. Execution order + dependencies + +Tests run in parallel where possible, but some layers depend on others: + +``` +┌─────────────────────────────────────┐ +│ Layer 7: port-forwarded v1 tests │ ← no dependencies, runs first +│ Layer 6: facade unit tests │ ← depends on v2 services existing +└─────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────┐ +│ Layer 2: WS contract tests │ ← depends on wire fixtures captured +│ Layer 3: MCP tool handler tests │ ← depends on v1 + v2 MCP servers +│ Layer 5: JSON golden tests │ ← depends on v1 + v2 CLI built +└─────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────┐ +│ Layer 1: parity tests │ ← depends on mock-broker + helpers +└─────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────┐ +│ Layer 4: e2e smoke tests │ ← depends on real broker + testcontainers +└─────────────────────────────────────┘ +``` + +Layers 1, 2, 3, 5, 6, 7 run on every PR. Layer 4 runs on `main` merges + release candidates (slower). + +--- + +## 13. CI integration + +### 13.1 PR-level pipeline + +```yaml +jobs: + lint-and-typecheck: + - biome check + - eslint (boundaries + 3 custom rules) + - tsc --noEmit + - dependency-cruiser + + unit-tests: + - bun test tests/unit/ # facade layer + - bun test src/services/**/*.test.ts # colocated facade tests + - bun test tests/golden/ # JSON shape + + parity-tests: + - bun test tests/parity/ + needs: [v1-cli-available, v2-cli-built] + + contract-tests: + - bun test tests/contract/ + needs: [wire-fixtures-available] + + mcp-tool-tests: + - bun test tests/mcp-tools/ + + port-forward-tests: + - bun test tests/unit/crypto-roundtrip.test.ts + - bun test tests/unit/invite-parse.test.ts +``` + +### 13.2 Release-candidate pipeline + +Adds layer 4: + +```yaml +jobs: + e2e-docker: + - E2E_BROKER=docker bun test tests/e2e/ + timeout-minutes: 60 +``` + +### 13.3 Coverage gates + +- Unit tests: ≥ 80% branch coverage on `src/services/**/*.ts` (excluding `services/broker/*`) +- Parity tests: 100% of inventory §12 checks mapped to at least one passing test +- Contract tests: 100% of 85 WS message types have at least one passing test +- MCP tool tests: 100% of 79 tools have at least one passing test +- E2E tests: all 28 journey tests passing on `main` + +If any gate fails, the PR cannot merge. + +--- + +## 14. Success criteria (the ship checklist) + +v2 Pass 1 ships when ALL of these are green: + +- [ ] **Layer 1 parity**: 70 test files, every inventory §12 regression check has at least one passing parity test +- [ ] **Layer 2 contract**: 85 test files, every broker WS message type has a contract test passing against captured v1 envelopes +- [ ] **Layer 3 MCP tools**: 79 test files, every MCP tool handler produces identical WS output between v1 and v2 +- [ ] **Layer 4 e2e**: 28 journey tests pass against a real broker in Docker +- [ ] **Layer 5 golden**: 12 JSON output tests pass, `schema_version` field present and stable +- [ ] **Layer 6 facade**: 17 service facade test files pass + 1 global boundary scanner (AST-based, no false positives) +- [ ] **Layer 7 port-forward**: 2 forwarded v1 CLI tests pass + broker test suite unchanged and green +- [ ] **Coverage gates**: all thresholds met +- [ ] **Zero new regressions**: any previously-passing test that starts failing must be fixed before merge (no skipping, no `.todo`) + +**Total test files in v2 Pass 1: ~295** (70 + 85 + 79 + 28 + 12 + 18 + 2). + +That's a lot. Most are template-driven — one helper + one fixture + one pattern = many tests. The scaffolding pass creates the files with `NotImplementedError` stubs, and the implementation pass fills them in. + +**No time estimate.** It's done when the checklist is green. + +--- + +## 15. Scaffolding implications + +When v2 is re-scaffolded, the test infrastructure ships alongside the source: + +- `tests/helpers/` with `v1-runner`, `v2-runner`, `wire-capture`, `mock-broker`, `real-broker`, `temp-home`, `ink-render` as stubs with `NotImplementedError` +- `tests/fixtures/` with directory structure and placeholder JSON files +- Every test file in layers 1–6 scaffolded with its describe blocks, imports, and fixture references — but the body is `throw new NotImplementedError('')` +- A CI job that counts test files vs expected counts and fails if any are missing +- A script `tests/helpers/capture-v1-fixtures.ts` that runs v1 once against a test broker to generate the wire fixtures + +The scaffold pass adds ~300 test files. The implementation pass replaces the `NotImplementedError` bodies with real assertions, one file at a time, driven by the priority order above. + +The implementation pass is organized by test layer, not by feature: + +1. Scaffold everything (layers 1–6) with stubs +2. Implement layer 7 (port-forwarded tests) — fastest wins, establishes test harness +3. Implement layer 6 (facade units) — validates each service as it's written +4. Implement layer 5 (golden JSON) — locks output shapes early +5. Implement layer 2 (WS contract) — proves wire compatibility +6. Implement layer 3 (MCP tool handlers) — proves tool dispatch +7. Implement layer 1 (parity) — full behavioral equivalence +8. Implement layer 4 (e2e) — end-to-end sanity + +When layer 1 is fully green, v2 Pass 1 is shippable. + +--- + +## 16. What this plan does NOT cover + +Explicitly out of scope for Pass 1 testing: + +- **Broker-side tests** — broker is unchanged in Pass 1; broker's own test suite runs unchanged +- **Performance regression tests** — v2 shouldn't be slower than v1, but quantifying that is Pass 2 work with bench tests +- **Security audit** — the spec-level security improvements (role-per-mesh Postgres, egress proxies, SSRF policies) are Pass 2 +- **Accessibility audit** — the testable a11y matrix is Pass 2 (requires the VoiceOver shim which is Pass 2) +- **Load tests** — 10k concurrent peers, sustained message throughput — deferred +- **Chaos tests** — broker restart mid-operation, network partition recovery — deferred to local-first Pass 2 +- **Cross-platform tests on Windows** — v2 Pass 1 targets macOS + Linux; Windows support is best-effort, tested on release candidates only + +Pass 2 adds those layers when the corresponding features ship. + +--- + +**End of plan.** diff --git a/.artifacts/hero-animation/clawd-apple-zoom.png b/.artifacts/hero-animation/clawd-apple-zoom.png new file mode 100644 index 0000000..a547fe3 Binary files /dev/null and b/.artifacts/hero-animation/clawd-apple-zoom.png differ diff --git a/.artifacts/hero-animation/clawd-zoom-v2.png b/.artifacts/hero-animation/clawd-zoom-v2.png new file mode 100644 index 0000000..61b99e5 Binary files /dev/null and b/.artifacts/hero-animation/clawd-zoom-v2.png differ diff --git a/.artifacts/hero-animation/clawd-zoom.png b/.artifacts/hero-animation/clawd-zoom.png new file mode 100644 index 0000000..a2f2e5a Binary files /dev/null and b/.artifacts/hero-animation/clawd-zoom.png differ diff --git a/.artifacts/hero-animation/fcc-preview-v1.png b/.artifacts/hero-animation/fcc-preview-v1.png new file mode 100644 index 0000000..9b59886 Binary files /dev/null and b/.artifacts/hero-animation/fcc-preview-v1.png differ diff --git a/.artifacts/hero-animation/fcc-preview-v2.png b/.artifacts/hero-animation/fcc-preview-v2.png new file mode 100644 index 0000000..5542f34 Binary files /dev/null and b/.artifacts/hero-animation/fcc-preview-v2.png differ diff --git a/.artifacts/hero-animation/fcc-preview-v3.png b/.artifacts/hero-animation/fcc-preview-v3.png new file mode 100644 index 0000000..51cff13 Binary files /dev/null and b/.artifacts/hero-animation/fcc-preview-v3.png differ diff --git a/.artifacts/hero-animation/features-section.png b/.artifacts/hero-animation/features-section.png new file mode 100644 index 0000000..fc96955 Binary files /dev/null and b/.artifacts/hero-animation/features-section.png differ diff --git a/.artifacts/hero-animation/features-with-skills.png b/.artifacts/hero-animation/features-with-skills.png new file mode 100644 index 0000000..2b623ac Binary files /dev/null and b/.artifacts/hero-animation/features-with-skills.png differ diff --git a/.artifacts/hero-animation/frame-01-alone.png b/.artifacts/hero-animation/frame-01-alone.png new file mode 100644 index 0000000..b0e3ffa Binary files /dev/null and b/.artifacts/hero-animation/frame-01-alone.png differ diff --git a/.artifacts/hero-animation/hero-with-mesh-v1.png b/.artifacts/hero-animation/hero-with-mesh-v1.png new file mode 100644 index 0000000..02093a4 Binary files /dev/null and b/.artifacts/hero-animation/hero-with-mesh-v1.png differ diff --git a/.artifacts/hero-animation/landing-cover.png b/.artifacts/hero-animation/landing-cover.png new file mode 100644 index 0000000..5d484a0 Binary files /dev/null and b/.artifacts/hero-animation/landing-cover.png differ diff --git a/.artifacts/hero-animation/landing-live.png b/.artifacts/hero-animation/landing-live.png new file mode 100644 index 0000000..3f27751 Binary files /dev/null and b/.artifacts/hero-animation/landing-live.png differ diff --git a/.artifacts/hero-animation/mesh-constellation-v1.png b/.artifacts/hero-animation/mesh-constellation-v1.png new file mode 100644 index 0000000..13fdd44 Binary files /dev/null and b/.artifacts/hero-animation/mesh-constellation-v1.png differ diff --git a/.artifacts/hero-animation/mesh-constellation-v2.png b/.artifacts/hero-animation/mesh-constellation-v2.png new file mode 100644 index 0000000..abe7aaa Binary files /dev/null and b/.artifacts/hero-animation/mesh-constellation-v2.png differ diff --git a/.artifacts/hero-animation/mesh-constellation-v3.png b/.artifacts/hero-animation/mesh-constellation-v3.png new file mode 100644 index 0000000..acff492 Binary files /dev/null and b/.artifacts/hero-animation/mesh-constellation-v3.png differ diff --git a/.artifacts/hero-animation/mesh-hero-apple-clawd.png b/.artifacts/hero-animation/mesh-hero-apple-clawd.png new file mode 100644 index 0000000..f531926 Binary files /dev/null and b/.artifacts/hero-animation/mesh-hero-apple-clawd.png differ diff --git a/.artifacts/hero-animation/mesh-hero-clip.png b/.artifacts/hero-animation/mesh-hero-clip.png new file mode 100644 index 0000000..6f88df0 Binary files /dev/null and b/.artifacts/hero-animation/mesh-hero-clip.png differ diff --git a/.artifacts/hero-animation/mesh-hero-full.png b/.artifacts/hero-animation/mesh-hero-full.png new file mode 100644 index 0000000..fef27c7 Binary files /dev/null and b/.artifacts/hero-animation/mesh-hero-full.png differ diff --git a/.artifacts/hero-animation/mesh-hero-v1.png b/.artifacts/hero-animation/mesh-hero-v1.png new file mode 100644 index 0000000..9520dcc Binary files /dev/null and b/.artifacts/hero-animation/mesh-hero-v1.png differ diff --git a/.artifacts/hero-animation/mesh-icon-big.png b/.artifacts/hero-animation/mesh-icon-big.png new file mode 100644 index 0000000..8258890 Binary files /dev/null and b/.artifacts/hero-animation/mesh-icon-big.png differ diff --git a/.artifacts/hero-animation/mesh-no-overlap.png b/.artifacts/hero-animation/mesh-no-overlap.png new file mode 100644 index 0000000..8514cf3 Binary files /dev/null and b/.artifacts/hero-animation/mesh-no-overlap.png differ diff --git a/.artifacts/hero-animation/mesh-peers-equal.png b/.artifacts/hero-animation/mesh-peers-equal.png new file mode 100644 index 0000000..c67ecd6 Binary files /dev/null and b/.artifacts/hero-animation/mesh-peers-equal.png differ diff --git a/.artifacts/hero-animation/mesh-trail-5700.png b/.artifacts/hero-animation/mesh-trail-5700.png new file mode 100644 index 0000000..82b6a49 Binary files /dev/null and b/.artifacts/hero-animation/mesh-trail-5700.png differ diff --git a/.artifacts/hero-animation/mesh-trail-inflight.png b/.artifacts/hero-animation/mesh-trail-inflight.png new file mode 100644 index 0000000..59baa58 Binary files /dev/null and b/.artifacts/hero-animation/mesh-trail-inflight.png differ diff --git a/.artifacts/hero-animation/mesh-trail-top.png b/.artifacts/hero-animation/mesh-trail-top.png new file mode 100644 index 0000000..1e1a771 Binary files /dev/null and b/.artifacts/hero-animation/mesh-trail-top.png differ diff --git a/.artifacts/hero-animation/mesh-trail-v1.png b/.artifacts/hero-animation/mesh-trail-v1.png new file mode 100644 index 0000000..fcb67c8 Binary files /dev/null and b/.artifacts/hero-animation/mesh-trail-v1.png differ diff --git a/.artifacts/hero-animation/mesh-trail-v2.png b/.artifacts/hero-animation/mesh-trail-v2.png new file mode 100644 index 0000000..f299411 Binary files /dev/null and b/.artifacts/hero-animation/mesh-trail-v2.png differ diff --git a/.artifacts/hero-animation/mesh-triangle.png b/.artifacts/hero-animation/mesh-triangle.png new file mode 100644 index 0000000..001d15b Binary files /dev/null and b/.artifacts/hero-animation/mesh-triangle.png differ diff --git a/.artifacts/hero-animation/mesh-zoom-mid.png b/.artifacts/hero-animation/mesh-zoom-mid.png new file mode 100644 index 0000000..2c4bbf9 Binary files /dev/null and b/.artifacts/hero-animation/mesh-zoom-mid.png differ diff --git a/.artifacts/hero-animation/prompt-box-early.png b/.artifacts/hero-animation/prompt-box-early.png new file mode 100644 index 0000000..7705cc4 Binary files /dev/null and b/.artifacts/hero-animation/prompt-box-early.png differ diff --git a/.artifacts/hero-animation/prompt-input-live.png b/.artifacts/hero-animation/prompt-input-live.png new file mode 100644 index 0000000..a908165 Binary files /dev/null and b/.artifacts/hero-animation/prompt-input-live.png differ diff --git a/.artifacts/hero-animation/reference.png b/.artifacts/hero-animation/reference.png new file mode 100644 index 0000000..a0bc2cb Binary files /dev/null and b/.artifacts/hero-animation/reference.png differ diff --git a/.artifacts/hero-animation/responsive-1200.png b/.artifacts/hero-animation/responsive-1200.png new file mode 100644 index 0000000..7d3d9c0 Binary files /dev/null and b/.artifacts/hero-animation/responsive-1200.png differ diff --git a/.artifacts/hero-animation/responsive-1700.png b/.artifacts/hero-animation/responsive-1700.png new file mode 100644 index 0000000..da8a9e9 Binary files /dev/null and b/.artifacts/hero-animation/responsive-1700.png differ diff --git a/.artifacts/hero-animation/responsive-800.png b/.artifacts/hero-animation/responsive-800.png new file mode 100644 index 0000000..2d19f1e Binary files /dev/null and b/.artifacts/hero-animation/responsive-800.png differ diff --git a/.artifacts/hero-animation/session-mid-2.png b/.artifacts/hero-animation/session-mid-2.png new file mode 100644 index 0000000..3ae1d11 Binary files /dev/null and b/.artifacts/hero-animation/session-mid-2.png differ diff --git a/.artifacts/hero-animation/session-mid-3.png b/.artifacts/hero-animation/session-mid-3.png new file mode 100644 index 0000000..ade5ddd Binary files /dev/null and b/.artifacts/hero-animation/session-mid-3.png differ diff --git a/.artifacts/hero-animation/session-mid.png b/.artifacts/hero-animation/session-mid.png new file mode 100644 index 0000000..163ff3d Binary files /dev/null and b/.artifacts/hero-animation/session-mid.png differ diff --git a/.artifacts/hero-animation/where-mesh-fits-v2.png b/.artifacts/hero-animation/where-mesh-fits-v2.png new file mode 100644 index 0000000..f8e88d0 Binary files /dev/null and b/.artifacts/hero-animation/where-mesh-fits-v2.png differ diff --git a/.artifacts/hero-animation/where-mesh-fits.png b/.artifacts/hero-animation/where-mesh-fits.png new file mode 100644 index 0000000..93f9210 Binary files /dev/null and b/.artifacts/hero-animation/where-mesh-fits.png differ diff --git a/.artifacts/prompts/claudemesh-prompts.rtf b/.artifacts/prompts/claudemesh-prompts.rtf new file mode 100644 index 0000000..c886085 --- /dev/null +++ b/.artifacts/prompts/claudemesh-prompts.rtf @@ -0,0 +1,29 @@ +{\rtf1\ansi\ansicpg1252\cocoartf2867 +\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +{\*\expandedcolortbl;;} +\margl1440\margr1440\vieww11180\viewh8060\viewkind0 +\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0 + +\f0\fs24 \cf0 Mesh templates for predefined roles, groups\'85\ +Mesh blockchain, can it be a good addition? For what?\ +Mesh webhooks, external web sockets, restful apis to be connected to the mesh (mcp)\ +Mesh skills available for all ai? Like a mesh catalog of skills for sessions to get and use them?\ +Inicial private mesh by default for every new user\ +Mesh dashboard for situational awareness of mesh, to illustrate the peers connected, their activity, status, mesh structure\ +Mesh of meshes? bridge?\ +Mesh Connectors: slack, telegram, they can appear as peers? Or sth different?\ +Connect humans to the mesh? Peer info to know about if human, type of channel (telegram or whatever) or llm model if ai?\ +How to connect others than just claude code? The problem will be the push system I suppose\ +\ +Add path (pwd) where each session is being executed for them to understand how to reference files if same computer? Maybe only visible for peers on same computer?\ +What if a peer on connection can make available all the project files, folders and subfolders? Direct access? So other ai can read files if needed from connected projects?\ +Can we have peer stats for example about context consumption?\ +Mesh notifications about new peers, new connectors, new resources? Broadcast?\ +Allow group or role changes dynamically not only on mesh connection?\ +Dynamic mcp that can be connected or disconnected on realtime without resetting the claude code sessions?\ +Mesh templates on creation, with a predefined structure that it can be changed as well by mesh admin role? Or any? Or what idea?\ +What if reminders can be just cron so ai knows exactly how to configure crons for the mesh? So broker can handle the cron creation? What about mesh heartbeats to keep ai alive?\ +Sandbox for code execution, python, node, chromium, etc so any peer can connect to resources, and resources being scalable on real time if a new peer needs a sandbox?\ +\ +} \ No newline at end of file diff --git a/.artifacts/specs/2026-04-10-cli-auth-device-code-pat.md b/.artifacts/specs/2026-04-10-cli-auth-device-code-pat.md new file mode 100644 index 0000000..e9bbe8a --- /dev/null +++ b/.artifacts/specs/2026-04-10-cli-auth-device-code-pat.md @@ -0,0 +1,593 @@ +# CLI Auth — Device Code Flow + Personal Access Tokens + +**Status:** spec +**Created:** 2026-04-10 +**Owner:** CLI-Dev (implementation), Orchestrator (spec) +**Target version:** v0.11.0 +**Related:** `2026-04-10-anthropic-vision-meshes-invites.md`, `2026-04-10-cli-wizard-architecture-refactor.md` + +## Goal + +The CLI is a first-class client. From a fresh terminal, with zero prior browser interaction, a user can: + +``` +claudemesh login # device-code OAuth, browser handshake +claudemesh create "Platform team" # creates real mesh via /api/my/meshes +claudemesh invite --email alice@x.com # generates invite, sends email +claudemesh launch --mesh platform-team -y # spawns Claude Code in the mesh +``` + +For CI / scripting / non-interactive contexts, PAT works too: + +``` +claudemesh login --token cm_pat_abc123 +claudemesh create "CI test mesh" --json | jq .id +``` + +This is the auth substrate that unblocks the "Anthropic vision" — every other dashboard-only feature (meshes, invites, members, billing) becomes CLI-accessible after this lands. + +## Non-goals + +- SSO / SAML / enterprise IdP integration (later, post-1.0) +- Refresh tokens with rotation (long-lived API keys are sufficient for v1) +- Multi-account switching (one logged-in identity per `~/.claudemesh/auth.json`) +- Device fleet management UI (single "revoke" button per token is enough for v1) + +## Auth model overview + +Two coexisting credential types, both backed by **Better Auth's `apiKey` plugin**: + +| Type | Created via | Lifetime | Use case | Storage | +|---|---|---|---|---| +| **Device-code session token** | `claudemesh login` (OAuth-style browser handshake) | 90 days, auto-renew on use | Interactive humans on their workstation | `~/.claudemesh/auth.json` | +| **Personal access token (PAT)** | Dashboard → Settings → CLI tokens → Generate | User-chosen (30d / 90d / 1y / never), explicit revocation | CI, scripts, automation, server-side cron | Anywhere the user puts it; CLI reads from `--token` flag, env var, or `auth.json` | + +Both flow through the same `Authorization: Bearer cm__` header. The API doesn't care which one it gets — it just validates against the `api_key` table. + +**Token format:** +- `cm_session_<32-byte base32>` — device-code sessions +- `cm_pat_<32-byte base32>` — personal access tokens + +The `cm_` prefix lets us scan for leaked tokens with regex (e.g. GitHub secret scanning, internal scripts). The middle segment (`session` / `pat`) is for human readability in token lists, not for security. + +## User flows + +### 1. First-time login (interactive happy path) + +``` +$ claudemesh login + + ██ claudemesh login + + Opening browser for authentication… + + If your browser didn't open, visit: + https://claudemesh.com/cli-auth?code=ABCD-EFGH + + Enter this code: + ABCD-EFGH + + Waiting for confirmation… ⠋ +``` + +In the browser: +1. User lands on `/cli-auth?code=ABCD-EFGH` +2. If not signed in, Better Auth login screen appears, then redirects back +3. User sees a confirmation card: + ``` + Link this CLI session? + Code: ABCD-EFGH + Device: Alejandro's MacBook Pro · darwin · arm64 + Expires in 9:47 + [Approve] [Deny] + ``` +4. User clicks Approve + +CLI polls every 1.5s, sees `approved`, receives token, writes `~/.claudemesh/auth.json` with `0600`, prints: + +``` + ✔ Authenticated as Alejandro Gutiérrez + ✔ Token saved to ~/.claudemesh/auth.json + ✔ Synced 3 meshes: alexis-mou, dev, claudefarm + + Run claudemesh --help to get started. +``` + +### 2. First-time login (PAT, non-interactive) + +``` +$ claudemesh login --token cm_pat_abc123def456... + ✔ Authenticated as Alejandro Gutiérrez (via PAT "ci-deploy") + ✔ Token saved to ~/.claudemesh/auth.json +``` + +Or one-shot, no save: + +``` +$ CLAUDEMESH_TOKEN=cm_pat_abc123 claudemesh create "test" +``` + +### 3. Already logged in, runs a command + +``` +$ claudemesh create "Platform team" + ✔ Created mesh platform-team (id: q5RI89Fl…) + ✔ Joined locally + ▸ Invite peers: claudemesh invite --mesh platform-team +``` + +No auth prompt — token in `auth.json` is used silently. + +### 4. Token expired or revoked + +``` +$ claudemesh peers + ✘ Authentication failed (token expired or revoked) + + Run claudemesh login to re-authenticate. +``` + +Exit code `2`. The `auth.json` is **not** auto-deleted (user might be debugging) but the next `claudemesh login` overwrites it cleanly. + +### 5. Wizard launch flow with auth integration + +When `claudemesh` (bare, no auth) is run: + +``` + ██ claudemesh + + ▸ Sign in (opens browser) + Paste a personal access token + Join a mesh via invite URL + Exit +``` + +After auth completes, the wizard transitions naturally into the launch flow (mesh picker → name → role → confirm → handoff). One uninterrupted experience from "fresh install" to "Claude Code in a mesh." + +### 6. CI / non-interactive + +``` +# .github/workflows/test.yml +- run: | + claudemesh login --token ${{ secrets.CLAUDEMESH_PAT }} + claudemesh create "CI run $GITHUB_RUN_ID" --json > mesh.json +``` + +Or zero-state: + +``` +- env: + CLAUDEMESH_TOKEN: ${{ secrets.CLAUDEMESH_PAT }} + run: claudemesh create "CI run $GITHUB_RUN_ID" --json +``` + +Token resolution order: `--token` flag > `CLAUDEMESH_TOKEN` env var > `~/.claudemesh/auth.json`. + +### 7. Logout + +``` +$ claudemesh logout + ✔ Token revoked on server + ✔ Removed ~/.claudemesh/auth.json +``` + +`logout` calls `DELETE /api/my/cli/sessions/current` to revoke server-side, then unlinks the local file. Best-effort: if the server call fails, still delete locally and warn. + +## Architecture + +### Backend — Better Auth `apiKey` plugin + +Better Auth ships an `apiKey` plugin that handles: +- Token generation (cryptographically random) +- Hashed storage (only the hash hits the DB; raw token never persisted) +- Verification middleware (validates `Authorization: Bearer …`) +- Per-token metadata (name, scopes, expiry, last-used) +- Per-token revocation + +We use it for both PAT and device-code sessions. Device-code sessions just have a marker in metadata distinguishing them from user-generated PATs. + +**Wire-up:** `apps/web/src/lib/auth/index.ts` (or wherever Better Auth is initialized) adds: + +```ts +import { apiKey } from "better-auth/plugins"; + +export const auth = betterAuth({ + // …existing config + plugins: [ + // … + apiKey({ + enableMetadata: true, + apiKeyHeaders: ["x-api-key", "authorization"], + defaultPrefix: "cm_", + rateLimit: { enabled: true, timeWindow: 60_000, maxRequests: 100 }, + }), + ], +}); +``` + +### Backend — device-code table + +The `apiKey` plugin doesn't ship device-code flow out of the box. We add a small table + 4 endpoints on top. + +```sql +-- packages/db/migrations/0020_cli-device-code.sql +CREATE TABLE cli_device_code ( + device_code text PRIMARY KEY, -- opaque random, sent to CLI + user_code text UNIQUE NOT NULL, -- short human code: "ABCD-EFGH" + user_id text REFERENCES "user"(id), -- nullable until approved + api_key_id text REFERENCES api_key(id), -- the issued token, set on approve + device_name text NOT NULL, -- "Alejandro's MacBook Pro" + device_os text NOT NULL, -- "darwin" + device_arch text NOT NULL, -- "arm64" + ip_address text, -- for audit + user_agent text, + status text NOT NULL DEFAULT 'pending', -- 'pending' | 'approved' | 'denied' | 'expired' + created_at timestamptz NOT NULL DEFAULT now(), + expires_at timestamptz NOT NULL, -- created_at + 10 min + approved_at timestamptz +); + +CREATE INDEX cli_device_code_user_code_idx ON cli_device_code(user_code); +CREATE INDEX cli_device_code_status_expires_idx ON cli_device_code(status, expires_at); +``` + +A scheduled job (or lazy cleanup on insert) deletes rows where `status='expired'` AND `expires_at < now() - interval '7 days'`. + +### Backend — endpoints + +All under `apps/web/src/app/api/auth/cli/` (or wherever you keep public auth routes — these need to be **unauthed** since the CLI has no token yet). + +| Method | Path | Auth | Purpose | +|---|---|---|---| +| `POST` | `/api/auth/cli/device-code` | none | CLI requests a new device code. Body: `{ device_name, device_os, device_arch }`. Returns `{ device_code, user_code, expires_at, verification_url }`. | +| `GET` | `/api/auth/cli/device-code/:device_code` | none | CLI polls for status. Returns `{ status: 'pending'|'approved'|'denied'|'expired', token?: string, user?: { id, name, email } }`. Token only present when status=approved, and only **once** (subsequent polls return approved without token). | +| `POST` | `/api/auth/cli/device-code/:user_code/approve` | session | Browser confirms. Creates an `api_key` row with metadata `{ kind: 'session', device_name, device_code }`, sets `cli_device_code.api_key_id`, status=approved. | +| `POST` | `/api/auth/cli/device-code/:user_code/deny` | session | Browser denies. Sets status=denied. | + +Authed endpoints (under `/api/my/cli/`): + +| Method | Path | Purpose | +|---|---|---| +| `GET` | `/api/my/cli/sessions` | List active CLI sessions for the user (devices, last seen, created). | +| `DELETE` | `/api/my/cli/sessions/:id` | Revoke a specific session. | +| `POST` | `/api/my/cli/tokens` | Create a PAT. Body: `{ name, expires_in_days?, scopes? }`. Returns the raw token **once**. | +| `GET` | `/api/my/cli/tokens` | List PATs (no raw values, just metadata). | +| `DELETE` | `/api/my/cli/tokens/:id` | Revoke a PAT. | + +### Backend — middleware + +Existing `enforceAuth` (in `packages/api/src/utils/`) currently reads cookies. Extend it to also accept `Authorization: Bearer cm_…`: + +```ts +export async function enforceAuth(ctx) { + const bearer = ctx.req.headers.get("authorization")?.replace(/^Bearer /, ""); + if (bearer?.startsWith("cm_")) { + const result = await auth.api.verifyApiKey({ key: bearer }); + if (result.valid) { + // record last_used_at, increment usage counter + return { user: result.user, via: "apiKey", apiKey: result.apiKey }; + } + throw new TRPCError({ code: "UNAUTHORIZED", message: "Invalid token" }); + } + // …existing cookie-based auth +} +``` + +The `apiKey` plugin handles `last_used_at` updates automatically. + +### Backend — web route + +`apps/web/src/app/[locale]/cli-auth/page.tsx`: + +- Reads `?code=ABCD-EFGH` from query string +- If no session, redirects to `/login?next=/cli-auth?code=ABCD-EFGH` +- If session, fetches device code metadata via server component, renders confirmation card +- Approve button → `POST /api/auth/cli/device-code/:user_code/approve` +- Deny button → `POST /api/auth/cli/device-code/:user_code/deny` +- After approve, shows: "✓ CLI authenticated. Return to your terminal." + +Mobile-friendly. Confirmation card shows device fingerprint so the user can verify they're approving the right session. + +### Backend — dashboard PAT UI + +`apps/web/src/app/[locale]/dashboard/settings/cli-tokens/page.tsx`: + +- List of existing PATs (name, created, last used, expires) +- "Generate new token" button → modal with name + expiry picker +- After creation, show raw token once with copy button + warning ("This token will not be shown again") +- Per-row revoke button + +Reuses existing dashboard layout. Should be ~150 lines including the modal. + +### CLI — file layout + +``` +apps/cli/src/ +├── commands/ +│ ├── login.ts # NEW +│ ├── logout.ts # NEW +│ ├── whoami.ts # NEW +│ ├── create.ts # rewrite to call API +│ ├── invite.ts # NEW +│ ├── sync.ts # rewrite to call API +│ └── …existing +└── lib/ + ├── auth-store.ts # NEW: read/write ~/.claudemesh/auth.json + ├── api-client.ts # NEW: typed fetch wrapper + ├── device-info.ts # NEW: collect hostname, os, arch for device-code request + └── …existing +``` + +### CLI — `auth-store.ts` + +```ts +// ~/.claudemesh/auth.json +type AuthFile = { + version: 1; + token: string; // cm_session_… or cm_pat_… + user: { id: string; name: string; email: string }; + created_at: string; // ISO + source: "device-code" | "pat" | "env"; +}; +``` + +Read priority: `--token` flag > `CLAUDEMESH_TOKEN` env > `auth.json`. +Write only on `login` success. File mode `0600`. Parent dir `0700`. +On read, if file mode is too permissive, log a warning and continue. + +### CLI — `api-client.ts` + +Thin wrapper over `fetch`: + +```ts +export class ClaudemeshApi { + constructor(private opts: { baseUrl: string; token: string }) {} + + async createMesh(input: { name: string; slug?: string }) { … } + async listMeshes() { … } + async createInvite(input: { meshId: string; email?: string; role?: string }) { … } + async listSessions() { … } + async revokeSession(id: string) { … } + async whoami() { … } +} +``` + +Type definitions live in `packages/api/src/contracts/cli.ts` (new file) — generated from the existing tRPC routers as plain types so the CLI doesn't need to import the whole tRPC client. + +Base URL from `CLAUDEMESH_API_URL` env var, defaults to `https://claudemesh.com`. Allows local dev against `http://localhost:3000`. + +### CLI — device-code login flow + +```ts +// commands/login.ts +async function deviceCodeLogin() { + const device = collectDeviceInfo(); + const { device_code, user_code, expires_at, verification_url } = + await api.requestDeviceCode(device); + + console.log(` Opening ${verification_url}…`); + console.log(` Code: ${user_code}`); + + await openBrowser(`${verification_url}?code=${user_code}`); + + const spinner = ora("Waiting for confirmation").start(); + const deadline = new Date(expires_at).getTime(); + + while (Date.now() < deadline) { + await sleep(1500); + const result = await api.pollDeviceCode(device_code); + if (result.status === "approved") { + spinner.succeed("Authenticated"); + await authStore.write({ token: result.token, user: result.user, source: "device-code" }); + await syncMeshes(); + return; + } + if (result.status === "denied") { + spinner.fail("Denied in browser"); + process.exit(1); + } + } + spinner.fail("Timed out"); + process.exit(1); +} +``` + +Polls every 1.5s. Server returns `{ slow_down: true }` if polled too fast (rate limit at 1/sec). + +## Security + +1. **Tokens are hashed at rest** (Better Auth `apiKey` plugin handles this with bcrypt or argon2). +2. **Raw tokens shown to user once.** PATs in dashboard, device-code tokens via `claudemesh login` output. Never logged, never re-displayable. +3. **`auth.json` is `0600`.** CLI refuses to write if parent dir can't be made `0700`. Warns on read if mode is wider. +4. **Token prefix `cm_` enables secret scanning.** Document the regex `cm_(session|pat)_[a-z0-9]{32,}` in security docs so GitHub secret scanning, GitGuardian, etc. can detect leaks. +5. **`/api/auth/cli/device-code/:device_code` polling is rate-limited** to 1 req/sec per IP per device_code. Returns `429` with `slow_down: true` body. +6. **Device codes expire in 10 minutes.** Approved-but-unclaimed tokens stay valid (the polling endpoint still returns the token for 60 seconds after approval, then the device_code row is GC'd). +7. **Audit logging.** Every device-code approval, PAT creation, and PAT revocation emits an audit event (`auth.cli.session.created`, `auth.cli.pat.created`, etc.). Stored in existing audit log if there is one, otherwise new `audit_log` table. +8. **Session invalidation on password change.** When a user changes their password via Better Auth, all `cli_session` `api_key` rows for that user are revoked. PATs are NOT auto-revoked (they're explicitly user-managed). +9. **Token revocation is immediate.** `auth.api.verifyApiKey` checks DB on every request — no in-memory cache. +10. **No CSRF concern** for device-code endpoints — the unauthed ones don't act on user state, the authed ones use Better Auth's existing CSRF protection. + +## Wizard UX integration + +The current welcome wizard already has: +``` +▸ Create account (new to claudemesh) + Sign in (existing account) + Paste an invite URL + Exit +``` + +After this spec lands, the welcome screen becomes: +``` + ██ claudemesh + + ▸ Sign in ← device-code OAuth + Paste an access token ← PAT path + Join via invite URL ← unchanged + Create account ← opens /register, then back to login + Exit +``` + +"Sign in" becomes the headline option. The current "Create account" still opens browser to `/register` but flows back through the device-code handshake instead of a custom callback. + +Once authenticated, the wizard transitions to: +``` + ██ claudemesh launch + + Account ✔ Alejandro Gutiérrez + Mesh ▸ (pick one — 3 available) + Name ✔ Alexis (from --name) + Role ▸ (pick one) + + ▸ Continue + Cancel +``` + +Status rows show what's filled and what's left. Mesh picker fetches from `GET /api/my/meshes` via the freshly minted token. + +This integrates cleanly with the wizard architecture refactor in `2026-04-10-cli-wizard-architecture-refactor.md`: auth becomes one screen in the launch flow with `isComplete: s => s.user !== null`. On a fresh machine the auth screen runs; on a returning machine it's auto-skipped. + +## Error handling + +| Scenario | Behavior | +|---|---| +| Browser doesn't open | Print URL prominently, keep polling | +| Network down during poll | Retry with exponential backoff (1.5s → 3s → 6s, max 30s) | +| Device code expires | Print "Login timed out, run `claudemesh login` to retry", exit 1 | +| Token rejected by API | Print "Authentication failed", suggest `claudemesh login`, exit 2 | +| `auth.json` corrupted | Print "Auth file corrupted, run `claudemesh login`", exit 2 | +| `auth.json` permissions wrong | Warn, fix to `0600`, continue | +| PAT pasted to `--token` is malformed | Print "Invalid token format (expected `cm_pat_…`)", exit 1 | +| PAT pasted to `--token` is valid format but unknown | API returns 401, print "Token rejected", exit 2 | +| Two CLI instances poll simultaneously | Both get the same approved status; first to read gets the token, second gets `{ status: 'approved', token: null }` (already_claimed). Document this. | +| User clicks Approve in browser, then closes tab | CLI's poll catches it, login succeeds. The browser tab closure is irrelevant. | +| User completes login on machine A, then runs `claudemesh login` on machine B with same account | Both sessions coexist as separate `api_key` rows. `claudemesh whoami --sessions` shows both. | + +## Implementation phases + +Each phase ships independently and is independently testable. + +### Phase 1 — Backend foundation (4–6 hours) + +- [ ] Wire Better Auth `apiKey` plugin in `apps/web/src/lib/auth/` +- [ ] Migration `0020_cli-device-code.sql` +- [ ] Drizzle schema for `cli_device_code` in `packages/db/src/schema/auth.ts` +- [ ] Endpoints: `POST /api/auth/cli/device-code`, `GET /api/auth/cli/device-code/:device_code`, `POST /api/auth/cli/device-code/:user_code/approve`, `POST /api/auth/cli/device-code/:user_code/deny` +- [ ] Extend `enforceAuth` middleware to accept `Authorization: Bearer cm_…` +- [ ] Endpoints: `POST /api/my/cli/tokens`, `GET /api/my/cli/tokens`, `DELETE /api/my/cli/tokens/:id`, `GET /api/my/cli/sessions`, `DELETE /api/my/cli/sessions/:id` +- [ ] Unit tests for token verification and device-code state machine + +### Phase 2 — Web routes (3–4 hours) + +- [ ] `/cli-auth?code=...` page (server component + approve/deny client component) +- [ ] `/dashboard/settings/cli-tokens` page (list + create modal + revoke) +- [ ] Translations for both pages (en, es) +- [ ] E2E test: full device-code happy path with Playwright + +### Phase 3 — CLI auth core (4–5 hours) + +- [ ] `lib/device-info.ts` — collect hostname, os, arch +- [ ] `lib/auth-store.ts` — read/write `~/.claudemesh/auth.json` with mode checks +- [ ] `lib/api-client.ts` — typed fetch wrapper with bearer header +- [ ] `commands/login.ts` — device-code flow + `--token` PAT path +- [ ] `commands/logout.ts` — revoke + delete local +- [ ] `commands/whoami.ts` — print current identity + token source +- [ ] Token resolution helper (`--token` > `CLAUDEMESH_TOKEN` > `auth.json`) +- [ ] Unit tests for auth-store and token resolution + +### Phase 4 — CLI commands wired to API (3–4 hours) + +- [ ] Rewrite `commands/create.ts` to call `POST /api/my/meshes` +- [ ] New `commands/invite.ts` with `--email`, `--mesh`, `--role`, `--expires-in` +- [ ] Rewrite `commands/sync.ts` to call `GET /api/my/meshes` and reconcile local config +- [ ] Update `commands/list.ts` to show server-side meshes too +- [ ] Integration tests against staging broker + web + +### Phase 5 — Wizard integration (3–4 hours) + +- [ ] Welcome screen new options (Sign in / Paste token / Create account / Join invite) +- [ ] Auth screen as a flow step with `isComplete: s => s.user !== null` +- [ ] Status rows pattern showing auth state during launch +- [ ] First-run detection (no `auth.json`) → auto-route to login + +### Phase 6 — Polish, docs, ship (2–3 hours) + +- [ ] Update `README.md`, `apps/cli/README.md`, `docs/quickstart.md` +- [ ] CHANGELOG entry for v0.11.0 +- [ ] Telemetry events for `auth.cli.login.{start,success,fail}` +- [ ] Bump `apps/cli/package.json` to `0.11.0` +- [ ] Publish to npm +- [ ] Deploy broker / web (no broker changes, web for new routes) + +**Total estimate:** 19–26 hours of focused work. Realistic: 3–4 days with testing and review. + +## Dependencies between phases + +``` +Phase 1 (backend) ──┬─→ Phase 2 (web routes) + └─→ Phase 3 (CLI auth core) + │ + └─→ Phase 4 (commands) + │ + └─→ Phase 5 (wizard) + │ + └─→ Phase 6 (ship) +``` + +Phase 1 and 2 can be parallelized after the schema lands. Phase 3 needs Phase 1 endpoints live (even if on staging). Phase 4 onwards is strictly serial. + +## Telemetry + +Emit these events (PostHog or whatever the existing analytics are): + +- `cli.login.started` — properties: `{ method: 'device-code' | 'pat' }` +- `cli.login.succeeded` — properties: `{ method, user_id }` +- `cli.login.failed` — properties: `{ method, reason }` +- `cli.logout` — properties: `{ user_id }` +- `cli.command.executed` — properties: `{ command, exit_code, duration_ms, authenticated: boolean }` +- `cli.api.error` — properties: `{ endpoint, status, error_code }` + +Telemetry is **opt-out**. First run shows a one-line notice: "claudemesh collects anonymized usage telemetry. Disable with `claudemesh telemetry off`." + +## Open questions + +1. **Better Auth `apiKey` plugin version** — confirm it's installed and at a version that supports `enableMetadata`. Check `pnpm why better-auth` in `apps/web`. +2. **Audit log table** — does one already exist? If not, this spec adds three rows of log; not worth a new table for that. Use `console.log` with structured JSON to stderr and let the platform's log collector handle it. +3. **Email sending** — `claudemesh invite --email` requires a transactional email path. Does the web app already have one (Resend, Postmark)? If yes, reuse. If no, defer the email send to a follow-up; the invite command can still create the invite and print the URL. +4. **Token scopes** — v1 ships with no scopes; every token has full account access. Should we add `mesh:read`, `mesh:write`, `invite:create` scopes from day one, or wait? **Recommendation:** wait. YAGNI. Add when a user actually wants a read-only CI token. +5. **PAT expiry default** — 90 days? 1 year? Never? Better Auth supports all three. **Recommendation:** 1 year default, user can pick "never" with explicit warning. +6. **Mesh slug uniqueness in `claudemesh create`** — what happens if two users try to create meshes with the same slug? Existing API behavior should be tested. If it errors, the CLI should suggest `--slug platform-team-2`. +7. **`claudemesh login` when already logged in** — re-authenticate (overwrite) or error ("already logged in, run logout first")? **Recommendation:** re-authenticate silently with a one-line notice ("Replacing existing session for Alejandro"). + +## Acceptance criteria + +For v0.11.0 to ship, all of these must be true: + +- [ ] `claudemesh login` on a fresh machine (no `auth.json`) opens browser, completes device-code flow, writes `auth.json`, runs in <30 seconds end-to-end +- [ ] `claudemesh login --token cm_pat_…` works without browser +- [ ] `claudemesh logout` revokes server-side and deletes local file +- [ ] `claudemesh whoami` prints user identity and token source +- [ ] `claudemesh create "Test mesh"` creates a real mesh on the server, joins it locally, and the user can see it on the dashboard +- [ ] `claudemesh invite --email a@b.c --mesh test` creates an invite and prints the URL +- [ ] `claudemesh launch` (bare) on a fresh machine walks login → mesh picker → name/role → Claude Code, all in one wizard +- [ ] Dashboard `/dashboard/settings/cli-tokens` lists, creates, and revokes PATs +- [ ] All flows work in `en` and `es` +- [ ] Existing `claudemesh launch` invocations (with token already in `auth.json`) still work without prompting +- [ ] Token in `auth.json` survives an hour of idle and continues to work (no aggressive expiry) +- [ ] Revoking a token in the dashboard makes the next CLI call fail with a clear error +- [ ] Documentation updated in `README.md`, `apps/cli/README.md`, `docs/quickstart.md` +- [ ] CHANGELOG entry written +- [ ] Published to npm as `claudemesh-cli@0.11.0` + +## What this unlocks + +Once this lands, every dashboard-only feature becomes one CLI command away. Future specs that depend on this: + +- `claudemesh members list` / `claudemesh members add` +- `claudemesh billing usage` +- `claudemesh mesh archive` +- `claudemesh stream subscribe` (live broker events) +- `claudemesh skill publish` (publish a skill to mesh registry) +- `claudemesh log tail` (mesh-wide audit log) + +This is the foundational unlock. Everything else is incremental on top. diff --git a/.artifacts/specs/2026-04-10-cli-v2-pass2-facade-pattern.md b/.artifacts/specs/2026-04-10-cli-v2-pass2-facade-pattern.md new file mode 100644 index 0000000..6959408 --- /dev/null +++ b/.artifacts/specs/2026-04-10-cli-v2-pass2-facade-pattern.md @@ -0,0 +1,1490 @@ +# claudemesh-cli v2 Pass 2 — Facade Pattern (referenced by Pass 1) + +> ⚠️ **This document is a special case: it applies to BOTH Pass 1 and Pass 2.** +> +> The facade pattern is the main architectural improvement of v2 Pass 1 — it's the scalability + code distribution win you asked for. Pass 1 implements the full facade structure described here. +> +> However, some concrete examples in this document reference services that only exist in Pass 2 (`services/store`, `services/broker/sync-daemon`, etc.). When an example mentions a Pass 2-only service, treat the pattern as authoritative and substitute the Pass 1 service names. +> +> **Pass 1 services** (use the facade pattern here): +> auth, mesh, invite, broker, api, crypto, config, state (last-used cache only, not mesh state), device, clipboard, spawn, telemetry, health, update, i18n, lifecycle, logger. +> +> **Pass 2 services** (deferred): +> store (local SQLite source of truth), broker/sync-daemon (outbox/inbox), broker/peer-crypto (extended per-mesh long-term keys), anything in the shared-infrastructure spec. +> +> For the Pass 1 implementation target that lists exactly which services ship in Pass 1, see **`2026-04-11-cli-v2-pass1.md`**. + +**Status:** Boundary canonical (applies to both Pass 1 and Pass 2) +**Created:** 2026-04-10 +**Consolidated:** 2026-04-10 (post-reviews, no appendices) +**Companion to:** `2026-04-10-cli-v2-final-vision.md` (§3.2 defers to this document) +**Purpose:** Single source of truth for the UI↔services boundary. Specifies how facades work, what they contain, and how the ESLint + dependency-cruiser config enforces them. When a developer asks "can I import X from Y?", the answer is in here. + +--- + +## Table of contents + +1. The problem +2. The principle +3. Facade contract +4. Import policy (the hard rules) +5. Example facades (TypeScript, verified) +6. Directory structure +7. ESLint boundaries configuration +8. dependency-cruiser configuration +9. Type-only imports, dynamic imports, and re-exports +10. Testing facades and contract drift +11. What facades never expose +12. Async streams and cancellation +13. Errors and validation +14. FAQ + +--- + +## 1. The problem + +Without a facade, UI components end up importing whatever files happen to exist in a service folder: + +```ts +// ui/screens/AuthScreen.tsx — bad +import { deviceCodeLogin } from '@/services/auth/device-code'; +import { writeTokenFile } from '@/services/auth/token-store'; +import { apiClient } from '@/services/api/client'; +``` + +Three problems: + +1. `AuthScreen` couples to specific implementation files — rename one and the UI breaks +2. `AuthScreen` has access to low-level operations (`writeTokenFile`) it should never call directly +3. `AuthScreen` has the raw `apiClient` and can make any API call anywhere + +The fix is **one narrow door per service**, enforced by tooling, not naming conventions. UI and non-service consumers import from `services//facade.ts` only. Every other file in the service is private. + +## 2. The principle + +> **A facade is a narrow, Promise-returning, plain-data interface that hides every implementation detail of a service. Consumers orchestrate business logic through facades. Services implement the logic behind them.** + +Consequences we actively want: + +- UI components are trivially testable with mock facades (no SQLite, no network, no filesystem) +- Services can be refactored freely without touching any consumer code +- The "what can UI do" surface is auditable by reading every `facade.ts` file +- Circular imports between UI and services become structurally impossible +- Boundary drift is a lint failure at CI, not a code review issue + +## 3. Facade contract + +### 3.1 What a facade MUST be + +1. **A single `facade.ts` file per service**, at `services//facade.ts`. Not a folder, not a subdirectory, not multiple files. +2. **Named exports only** — no default export, no namespace re-export +3. **Each export is either an async function or a pure data constant** (never a class, never a factory, never a singleton handle) +4. **Every parameter is a plain object** (`{ ... }`) — never a class instance, never a service handle +5. **Every return type is a plain object** constructed inline — never a pass-through of a service response +6. **Every input is Zod-validated** at the facade boundary before touching the service +7. **Every output is Zod-validated** before returning to the caller — the facade literally builds a new object and runs `.parse()` on it +8. **Errors are typed** — facades throw instances of domain-specific error classes from `services//errors.ts`, never raw strings, never `ZodError` from input validation + +### 3.2 What a facade MUST NOT do + +1. **Never return a class instance** (even one defined in the same service) +2. **Never expose filesystem paths, URLs, or tokens** in return values — not even masked ones +3. **Never expose a database handle, HTTP client, or socket** — these are service-internal +4. **Never take a callback** for state — async/await only; for progress streams use async iterators with `AbortSignal` +5. **Never depend on React, Ink, or any UI library** — facades are framework-agnostic +6. **Never import from `ui/`, `cli/`, `commands/`, `mcp/`, or `entrypoints/`** +7. **Never use globals** — every dependency is injected at service boot time +8. **Never `export *` from another file** — every symbol is explicitly named (prevents accidental internal leakage) +9. **Never pass a service response through `...spread`** to the return object — every field is picked by name + +### 3.3 Facade lifecycle + +Facades themselves are stateless. They're free functions that call into the service. The service holds state (database connection, HTTP client, lifecycle); the facade is the stateless adapter. + +```ts +// services/auth/index.ts — service boot (called once from entrypoints) +import { createAuthService } from './implementation'; +import { getTokenStore } from './token-store'; +import { getApiClient } from '@/services/api'; + +let instance: AuthService | null = null; + +export function getAuthService(): AuthService { + if (!instance) { + instance = createAuthService({ + tokenStore: getTokenStore(), + apiClient: getApiClient(), + }); + } + return instance; +} + +// NOTE: no `export * from './device-code'`, no `export { ... } from './internal'`. +// Only `getAuthService` is public via index.ts, and only services/* can import it. +``` + +```ts +// services/auth/facade.ts +import { z } from 'zod'; +import { getAuthService } from './index'; +import { InvalidTokenError, DeviceCodeTimeoutError, AuthNetworkError } from './errors'; + +// ...schemas and facade functions below +``` + +UI never imports `getAuthService` — only the facade. + +## 4. Import policy (the hard rules) + +This is the stack of allow-lists. Every rule is enforced by ESLint boundaries + dependency-cruiser. Any violation fails CI. + +| Consumer | May import from | +|---|---| +| `entrypoints/` | `cli/`, `commands/`, `ui/`, `mcp/`, `service-facade/`, `utils/`, `types/`, `constants/`, `locales/`, `templates/`, `migrations/` | +| `commands/` | `cli/`, `ui/`, **`service-facade/` only**, `utils/`, `types/`, `constants/`, `locales/` | +| `ui/` | `ui/`, **`service-facade/` only**, `utils/`, `types/`, `constants/`, `locales/` | +| `cli/` (non-Ink I/O) | **`service-facade/` only**, `utils/`, `types/`, `constants/`, `locales/` | +| `mcp/` | **`service-facade/` only**, `templates/`, `utils/`, `types/`, `constants/`, `locales/` | +| `service-facade` (a service's own facade) | its own `service-internal`, other services' `service-facade`, `utils/`, `types/`, `constants/`, `locales/` | +| `service-internal` (service implementation files) | its own `service-internal`, its own `service-facade` (rare), other services' `service-facade`, `templates/`, `utils/`, `types/`, `constants/`, `locales/` | +| `service-index` (factory barrel) | its own `service-internal`, `utils/`, `types/`, `constants/`, `locales/` | +| `service-test` | its own `service-internal`, its own `service-facade`, any `service-facade` (for integration), `utils/`, `types/`, `constants/`, `templates/` | +| `templates/` | `utils/`, `types/`, `constants/` | +| `locales/` | `types/` | +| `utils/` | `types/` | +| `constants/` | (nothing) | +| `types/` | (nothing, except other `types/`) | +| `migrations/` | own `service-index`, `utils/`, `types/`, `constants/` | + +**Key tightenings from review:** + +- **`commands` uses facades only.** Commands go through facades like UI does. If a command needs deeper access, the facade is missing a function — extend the facade, don't bypass it. +- **`entrypoints` no longer gets `service-internal` access.** Entrypoints call `commands`, which call facades. +- **`mcp` now uses facades only** (no `service-index`). MCP tool handlers are not magic — they go through the same narrow interfaces as commands. If a tool needs cross-service composition beyond what a single facade exposes, it composes multiple facades. +- **`service-facade` can import from other services' `service-facade`** (cross-service facade composition). Service-to-service calls go facade→facade, not through `index.ts`. This removes the `service-index` cross-service path entirely. +- **`service-index` is downgraded** to a factory barrel that only exposes `getXxxService()` and is imported only by `entrypoints/cli.ts` for DI wiring and by its own service's internals. No other layer imports `service-index`. +- **`migrations/` no longer gets `service-internal` access.** Migrations work against `service-index` (its own service only, through the factory) and the raw database connection. Deep data surgery happens through typed helpers in the service's internals, not directly. + +**The only consumers that touch `service-internal` are**: +- The service itself (its own folder) +- The service's tests +- The service's `facade.ts` and `index.ts` files (by definition) + +## 5. Example facades (TypeScript, verified) + +### 5.1 Auth facade — with input AND output validation + +```ts +// services/auth/facade.ts + +import { z } from 'zod'; +import { getAuthService } from './index'; +import { + InvalidTokenError, + DeviceCodeTimeoutError, + AuthNetworkError, + toDomainError, +} from './errors'; + +// ---- Input schemas ---- + +const LoginWithTokenInputSchema = z.object({ + token: z.string().regex(/^cm_(session|pat)_[a-z0-9]{32,}$/, 'malformed token'), +}); + +// ---- Output schemas ---- + +const UserSchema = z.object({ + id: z.string(), + display_name: z.string(), + email: z.string().email(), +}).strict(); + +const LoginResultSchema = z.object({ + user: UserSchema, +}).strict(); + +const WhoAmIResultSchema = z.object({ + signed_in: z.boolean(), + user: UserSchema.nullable(), + token_source: z.enum(['device_code', 'pat', 'env']).nullable(), +}).strict(); + +const LogoutResultSchema = z.object({ + server_revoked: z.boolean(), +}).strict(); + +// ---- Exported types ---- + +export type LoginResult = z.infer; +export type WhoAmIResult = z.infer; +export type LogoutResult = z.infer; + +// ---- Facade functions ---- + +/** + * Start the device-code flow. Opens browser, polls until user approves or denies. + * @throws DeviceCodeTimeoutError if the user doesn't respond in 10 minutes + * @throws AuthNetworkError if claudemesh.com is unreachable + */ +export async function loginWithDeviceCode(): Promise { + try { + const result = await getAuthService().startDeviceCodeFlow(); + // Build output explicitly — no spread, no pass-through + return LoginResultSchema.parse({ + user: { + id: result.user.id, + display_name: result.user.display_name, + email: result.user.email, + }, + }); + } catch (err) { + throw toDomainError(err); + } +} + +/** + * Login using a PAT or session token from explicit input. + * @throws InvalidTokenError if the token format is wrong + * @throws AuthNetworkError if the server rejects or is unreachable + */ +export async function loginWithToken(input: unknown): Promise { + // Validate input BEFORE touching the service + const parsed = LoginWithTokenInputSchema.safeParse(input); + if (!parsed.success) { + throw new InvalidTokenError('malformed token format'); + } + try { + const result = await getAuthService().loginWithToken(parsed.data.token); + return LoginResultSchema.parse({ + user: { + id: result.user.id, + display_name: result.user.display_name, + email: result.user.email, + }, + }); + } catch (err) { + throw toDomainError(err); + } +} + +/** + * Check current auth state. Never throws. + */ +export async function whoAmI(): Promise { + try { + const state = await getAuthService().getCurrentState(); + return WhoAmIResultSchema.parse({ + signed_in: state.signed_in, + user: state.user + ? { + id: state.user.id, + display_name: state.user.display_name, + email: state.user.email, + } + : null, + token_source: state.token_source, + }); + } catch { + return WhoAmIResultSchema.parse({ + signed_in: false, + user: null, + token_source: null, + }); + } +} + +/** + * Revoke the current session server-side and clear local credentials. + * Best-effort on server; always clears local. + */ +export async function logout(): Promise { + try { + const result = await getAuthService().logout(); + return LogoutResultSchema.parse({ + server_revoked: result.server_revoked, + }); + } catch { + return LogoutResultSchema.parse({ server_revoked: false }); + } +} +``` + +**Key properties verified in this example**: +- `.strict()` on every schema prevents extra fields from passing through (eliminates the "class instance with matching fields" bypass) +- Input is validated BEFORE the service call +- Output is built explicitly, field by field — no spread, no pass-through +- Every branch catches errors and maps via `toDomainError` +- Zod errors never escape the facade — they're mapped to domain errors + +### 5.2 Error mapping helper — preserves cause + logs unmapped bugs + +```ts +// services/auth/errors.ts + +import { ZodError } from 'zod'; +import { logger } from '@/services/logger/facade'; // structured logger + +export class InvalidTokenError extends Error { + readonly code = 'AUTH_INVALID_TOKEN'; + constructor(message: string, options?: { cause?: unknown }) { + super(message); + if (options?.cause) this.cause = options.cause; + } +} + +export class DeviceCodeTimeoutError extends Error { + readonly code = 'AUTH_DEVICE_CODE_TIMEOUT'; + constructor(cause?: unknown) { + super('device code flow timed out'); + if (cause) this.cause = cause; + } +} + +export class AuthNetworkError extends Error { + readonly code = 'AUTH_NETWORK'; + constructor(cause?: unknown) { + super('auth network error'); + if (cause) this.cause = cause; + } +} + +/** + * Unmapped errors are real bugs. They land in the UnmappedError class and + * get logged with full stack for telemetry, so they don't disappear into + * generic AuthNetworkError (which would hide root causes). + */ +export class UnmappedError extends Error { + readonly code = 'UNMAPPED'; + constructor(cause: unknown) { + super('unmapped internal error'); + this.cause = cause; + } +} + +/** + * Map any thrown value into a typed domain error. + * + * Contract: + * - Domain errors pass through unchanged + * - ZodError → InvalidTokenError with original cause attached + * - Node network errors (ENOTFOUND, ECONNREFUSED, etc.) → AuthNetworkError + * - EVERYTHING ELSE → UnmappedError (explicitly logged as a bug) + * + * Unmapped errors are logged at ERROR level with the full stack trace so + * they surface in telemetry instead of being silently categorized as + * network errors. This fixes the observability gap where a null pointer + * bug would appear as "network error" in logs. + */ +export function toDomainError(err: unknown): Error { + // Domain errors pass through unchanged + if (err instanceof InvalidTokenError) return err; + if (err instanceof DeviceCodeTimeoutError) return err; + if (err instanceof AuthNetworkError) return err; + if (err instanceof UnmappedError) return err; + + // Zod validation failures → typed input error, preserving the original for logs + if (err instanceof ZodError) { + const mapped = new InvalidTokenError('schema validation failed', { cause: err }); + logger.warn('facade: zod validation failed', { errors: err.errors, mapped: mapped.code }); + return mapped; + } + + // Node network errors → AuthNetworkError, preserving the original + if (err && typeof err === 'object' && 'code' in err) { + const code = (err as { code: unknown }).code; + if ( + code === 'ENOTFOUND' || + code === 'ECONNREFUSED' || + code === 'ETIMEDOUT' || + code === 'ECONNRESET' || + code === 'EAI_AGAIN' + ) { + return new AuthNetworkError(err); + } + } + + // Anything else is a bug. Log it with full context and return an UnmappedError. + // This prevents programmer bugs from being silently miscategorized. + logger.error('facade: unmapped error (likely a bug)', { + error: err instanceof Error ? { message: err.message, stack: err.stack, name: err.name } : err, + facade: 'auth', + }); + return new UnmappedError(err); +} +``` + +**Key change from v1**: the previous implementation collapsed everything unknown into `AuthNetworkError`. A null pointer exception in the service would surface to the UI as "network error" and telemetry would never flag it as a bug. The new `UnmappedError` class catches these with explicit logging at ERROR level, so real bugs show up in logs and can be tracked by telemetry. + +Every service's `errors.ts` follows this pattern: domain errors + `UnmappedError` + `toDomainError` helper that logs unmapped cases. + +### 5.3 Mesh facade (summary form) + +```ts +// services/mesh/facade.ts + +import { z } from 'zod'; +import { getMeshService } from './index'; +import { MeshNotFoundError, SlugCollisionError, PermissionDeniedError, toDomainError } from './errors'; + +const MeshSummarySchema = z.object({ + slug: z.string(), + name: z.string(), + kind: z.enum(['personal', 'shared_owner', 'shared_guest']), + peer_count: z.number().int().nonnegative(), + peers_online: z.number().int().nonnegative(), + last_used_at: z.number().int().nullable(), +}).strict(); + +const MeshListResultSchema = z.object({ + meshes: z.array(MeshSummarySchema), + last_used_slug: z.string().nullable(), +}).strict(); + +const PublishMeshResultSchema = z.object({ + slug: z.string(), + invite_url: z.string().url(), +}).strict(); + +export type MeshSummary = z.infer; +export type MeshListResult = z.infer; +export type PublishMeshResult = z.infer; + +export async function createMesh(input: unknown): Promise { + const parsed = z.object({ + name: z.string().min(1).max(128), + slug: z.string().regex(/^[a-z0-9-]+$/).optional(), + }).safeParse(input); + if (!parsed.success) throw new MeshNotFoundError('invalid create input'); + try { + const r = await getMeshService().create(parsed.data); + return MeshSummarySchema.parse({ + slug: r.slug, name: r.name, kind: r.kind, + peer_count: r.peer_count, peers_online: r.peers_online, + last_used_at: r.last_used_at, + }); + } catch (err) { throw toDomainError(err); } +} + +export async function listMeshes(): Promise { + try { + const r = await getMeshService().list(); + return MeshListResultSchema.parse({ + meshes: r.meshes.map(m => ({ + slug: m.slug, name: m.name, kind: m.kind, + peer_count: m.peer_count, peers_online: m.peers_online, + last_used_at: m.last_used_at, + })), + last_used_slug: r.last_used_slug, + }); + } catch (err) { throw toDomainError(err); } +} + +export async function publishMesh(input: unknown): Promise { + const parsed = z.object({ slug: z.string() }).safeParse(input); + if (!parsed.success) throw new MeshNotFoundError('invalid publish input'); + try { + const r = await getMeshService().publish(parsed.data); + return PublishMeshResultSchema.parse({ slug: r.slug, invite_url: r.invite_url }); + } catch (err) { throw toDomainError(err); } +} + +// ...joinMeshByInvite, renameMesh, leaveMesh, resolveLaunchTarget follow same pattern +``` + +### 5.4 Clipboard facade (even trivial services get a facade) + +```ts +// services/clipboard/facade.ts + +import { z } from 'zod'; +import { getClipboardService } from './index'; + +const DetectInviteResultSchema = z.object({ + has_invite: z.boolean(), + mesh_slug: z.string().nullable(), + url: z.string().url().nullable(), +}).strict(); + +export type DetectInviteResult = z.infer; + +/** + * Never returns raw clipboard content — only the detected invite metadata. + * Prevents arbitrary clipboard content from flowing into UI state (privacy). + */ +export async function detectInviteInClipboard(): Promise { + try { + const r = await getClipboardService().detectInvite(); + return DetectInviteResultSchema.parse({ + has_invite: r.has_invite, + mesh_slug: r.mesh_slug ?? null, + url: r.url ?? null, + }); + } catch { + return DetectInviteResultSchema.parse({ + has_invite: false, + mesh_slug: null, + url: null, + }); + } +} +``` + +Yes, this is more code than `getClipboardService().detectInvite()` would be. That's the point: the facade prevents UI from ever learning that `clipboardService` exists, and prevents the implementation from leaking raw clipboard content through the boundary. + +## 6. Directory structure + +``` +apps/cli-v2/src/services/auth/ +├── client.ts # private — HTTP calls to /api/auth/cli/* +├── device-code.ts # private — device-code flow orchestration +├── pat.ts # private — PAT parsing and validation +├── token-store.ts # private — ~/.claudemesh/auth.json R/W +├── refresh.ts # private — silent re-auth +├── implementation.ts # private — assembles the service from parts +├── schemas.ts # private — internal Zod schemas +├── errors.ts # private — domain error classes + toDomainError +├── types.ts # private — internal types +├── index.ts # PUBLIC (for other services) — exports getAuthService() +├── facade.ts # PUBLIC (for ui/commands/cli/mcp) — narrow facade +├── facade.test.ts # facade contract test (verifies no tokens leak) +└── auth.test.ts # internal unit tests +``` + +**Rules for this tree**: + +- `index.ts` contains only **named exports of the service factory/getter**. It must not `export *`, must not re-export internal files, must not expose types for internal implementation details. +- `facade.ts` is a single file. It is never a folder. It never has siblings named `facade.*.ts`. +- Internal files (`client.ts`, `device-code.ts`, etc.) can import each other freely within the service. +- Cross-service access is through each service's `index.ts`, not through internals. +- Nested folders inside a service (e.g. `services/mesh/subsystem/`) are allowed, but every file in them is classified as `service-internal` regardless of depth. + +## 7. ESLint boundaries configuration + +This is the enforced config. It has been reviewed for all the bypass paths found in the initial draft: shallow globs, nested files, test overlap, re-exports, dynamic imports, and type-only imports. + +```js +// apps/cli-v2/.eslintrc.cjs +module.exports = { + plugins: ['boundaries', 'claudemesh-custom'], // claudemesh-custom is an in-repo ESLint plugin + settings: { + 'boundaries/elements': [ + // Entry points — process entry + { type: 'entrypoints', pattern: 'src/entrypoints/*.{ts,tsx,mts,cts}' }, + + // Top-level layers + { type: 'cli', pattern: 'src/cli/**/*.{ts,tsx,mts,cts}' }, + { type: 'commands', pattern: 'src/commands/**/*.{ts,tsx,mts,cts}' }, + { type: 'ui', pattern: 'src/ui/**/*.{ts,tsx,mts,cts}' }, + { type: 'mcp', pattern: 'src/mcp/**/*.{ts,tsx,mts,cts}' }, + + // Service layers — order matters: test > facade > index > internal + // Test pattern MUST come first so *.test.ts files classify as service-test, + // not service-internal. + // Facade pattern MUST cover both facade.ts (single file) AND facade/*.ts + // (folder form) to prevent the facade-as-folder bypass. + { type: 'service-test', pattern: 'src/services/*/**/*.test.{ts,tsx,mts,cts}' }, + { type: 'service-facade', pattern: [ + 'src/services/*/facade.{ts,tsx,mts,cts}', + 'src/services/*/facade/**/*.{ts,tsx,mts,cts}', // fallback if someone uses facade/ folder + ]}, + { type: 'service-index', pattern: 'src/services/*/index.{ts,tsx,mts,cts}' }, + { type: 'service-internal', pattern: 'src/services/*/**/*.{ts,tsx,mts,cts}' }, + + // Pure / data layers + { type: 'templates', pattern: 'src/templates/**/*.{ts,tsx,mts,cts}' }, + { type: 'locales', pattern: 'src/locales/**/*.{ts,tsx,mts,cts}' }, + { type: 'utils', pattern: 'src/utils/**/*.{ts,tsx,mts,cts}' }, + { type: 'types', pattern: 'src/types/**/*.{ts,tsx,mts,cts}' }, + { type: 'constants', pattern: 'src/constants/**/*.{ts,tsx,mts,cts}' }, + { type: 'migrations', pattern: 'src/migrations/**/*.{ts,tsx,mts,cts}' }, + ], + 'boundaries/include': ['src/**/*.{ts,tsx,mts,cts}'], + }, + rules: { + // Hard boundary rule — facades-only for all consumer layers + 'boundaries/element-types': ['error', { + default: 'disallow', + rules: [ + // entrypoints compose the application; they need service-index for DI wiring + // but never touch service-internal. + { from: 'entrypoints', allow: [ + 'cli', 'commands', 'ui', 'mcp', + 'service-facade', 'service-index', + 'templates', 'locales', 'utils', 'types', 'constants', 'migrations', + ] }, + + // UI: facades only. + { from: 'ui', allow: [ + 'ui', 'service-facade', + 'locales', 'utils', 'types', 'constants', + ] }, + + // Commands: facades only (no service-index, no service-internal). + { from: 'commands', allow: [ + 'cli', 'ui', 'service-facade', + 'locales', 'utils', 'types', 'constants', + ] }, + + // CLI (non-Ink I/O plumbing): facades only. + { from: 'cli', allow: [ + 'service-facade', + 'locales', 'utils', 'types', 'constants', + ] }, + + // MCP: facades only (TIGHTENED — no longer gets service-index). + // Cross-service composition happens by importing from other services' facades. + { from: 'mcp', allow: [ + 'service-facade', + 'templates', 'locales', 'utils', 'types', 'constants', + ] }, + + // A service's facade can use its own internals + OTHER services' facades + // (cross-service facade composition). No longer uses service-index for + // cross-service calls. + { from: 'service-facade', allow: [ + 'service-internal', 'service-facade', + 'locales', 'utils', 'types', 'constants', + ] }, + + // A service's internals can freely use each other + other services' facades. + { from: 'service-internal', allow: [ + 'service-internal', 'service-facade', + 'templates', 'locales', 'utils', 'types', 'constants', + ] }, + + // A service's index.ts is a factory barrel. It imports its own internals + // and exposes getXxxService(). It does NOT re-export anything else. + // Other services do not import from this — use service-facade for cross- + // service calls. + { from: 'service-index', allow: [ + 'service-internal', + 'locales', 'utils', 'types', 'constants', + ] }, + + // Tests can import their own service freely; may also import OTHER services' + // facades for integration tests (not their internals). + { from: 'service-test', allow: [ + 'service-internal', 'service-facade', 'service-index', + 'service-test', + 'templates', 'locales', 'utils', 'types', 'constants', + ] }, + + // Pure layers + { from: 'templates', allow: ['utils', 'types', 'constants'] }, + { from: 'locales', allow: ['types'] }, + { from: 'utils', allow: ['types'] }, + { from: 'constants', allow: [] }, + { from: 'types', allow: ['types'] }, + + // Migrations: only their own service's index (for the DI factory) and + // internal (for deep data surgery). No cross-service internals. + { from: 'migrations', allow: [ + 'service-index', 'service-internal', + 'utils', 'types', 'constants', + ] }, + ], + }], + + // Ban `export *` globally — closes the bulk re-export loophole. + 'no-restricted-syntax': [ + 'error', + { + selector: "ExportAllDeclaration", + message: "`export *` is forbidden. Use named exports.", + }, + ], + + // Custom in-repo rule: ban named re-exports from `./internal` paths in + // service index.ts files. Only `getXxxService` getters can be exported. + 'claudemesh-custom/no-index-reexport-internal': 'error', + + // Custom in-repo rule: ban `import type` and value imports from internal + // service files across layer boundaries. Complements boundaries plugin + // which by default doesn't distinguish value vs type imports. + 'claudemesh-custom/type-imports-count-as-edges': 'error', + + // Custom in-repo rule: use ts-morph AST to find all dynamic `import()` + // calls with non-literal arguments targeting the services/ path. Blocks + // `await import(var)` as well as string literals. + 'claudemesh-custom/no-dynamic-service-imports': 'error', + + // Invert no-restricted-imports to an allowlist: from consumer layers, + // block EVERYTHING under services/*/ except facade.*. + 'no-restricted-imports': [ + 'error', + { + patterns: [ + { + group: [ + // Block direct imports to any internal service file from consumer layers + '**/services/*/!(facade)**', + '**/services/*/!(facade).*', + // Block imports from index.ts unless explicitly from entrypoints + '**/services/*/index', + '**/services/*/index.*', + ], + message: 'Import from services//facade.ts only. These files are internal.', + }, + ], + }, + ], + }, + overrides: [ + // Service internals, facades, and tests bypass the blocklist (they need each other) + { + files: [ + 'src/services/*/**', + ], + rules: { + 'no-restricted-imports': 'off', + }, + }, + // Entrypoints can import service-index for DI wiring + { + files: ['src/entrypoints/*'], + rules: { + 'no-restricted-imports': ['error', { + patterns: [ + { group: ['**/services/*/!(facade|index)**'], message: 'Entrypoints use facade or index only.' }, + ], + }], + }, + }, + // Tests can mock internals + { + files: ['tests/**/*.{ts,tsx}'], + rules: { + 'no-restricted-imports': 'off', + }, + }, + ], +}; +``` + +### Custom in-repo ESLint rules + +The three `claudemesh-custom/*` rules above are implemented in `tools/eslint-plugin-claudemesh/`: + +#### `no-index-reexport-internal` + +Parses each `services/*/index.ts` file and rejects any `ExportNamedDeclaration` that references a local file (starting with `./`) unless the exported symbol is a factory getter matching the pattern `get*Service`. + +```ts +// services/auth/index.ts — ALLOWED +export { getAuthService } from './implementation'; + +// services/auth/index.ts — REJECTED +export { deviceCodeLogin } from './device-code'; // leaks internal +export { tokenStore } from './token-store'; // leaks internal +``` + +This closes the named-re-export loophole that `no-restricted-syntax: ExportAllDeclaration` alone didn't catch. + +#### `type-imports-count-as-edges` + +By default, `eslint-plugin-boundaries` may treat `import type { Foo } from '...'` as a free pass because TypeScript erases type-only imports at compile time. But type imports create source-level coupling — renaming an internal type breaks UI code that imported it. This rule marks type imports as full dependency edges for boundary enforcement. + +Implementation: a simple AST walker that reports any `ImportDeclaration` with `importKind === 'type'` where the source path crosses a layer boundary, just like a value import would. + +#### `no-dynamic-service-imports` + +Uses `ts-morph` or the TypeScript compiler API to walk every `ImportExpression` (dynamic `import()` call) in the source tree. Rejects any call whose argument is: + +- Not a string literal +- A string literal matching `services/*/[^f]` (anything other than `facade.*`) +- A template literal +- A function call returning a string + +```ts +// REJECTED — non-literal argument +const p = 'services/auth/client'; +await import(p); + +// REJECTED — template literal +await import(`services/${name}/client`); + +// REJECTED — string literal pointing to internal file +await import('@/services/auth/client'); + +// ALLOWED — string literal pointing to facade +await import('@/services/auth/facade'); +``` + +The rule runs as part of the ESLint lint pass. No separate build-time scanner is needed — everything is enforced at CI via ESLint. + +### Key fixes from review (second round) + +- **Pattern order is verified** by a classification test that asserts `services/auth/facade.ts` → `service-facade`, `services/auth/client.ts` → `service-internal`, `services/auth/foo.test.ts` → `service-test` before the rules run. +- **`facade/` folder bypass closed**: the `service-facade` pattern is an array of two globs, the second catches `services/*/facade/**/*.ts`. +- **`commands`, `mcp` no longer reach `service-index`**: both use facades only. MCP cross-service composition goes through other services' facades. +- **`service-facade` imports other services' `service-facade`, not `service-index`**: this makes `service-index` a pure DI factory consumed only by entrypoints. +- **Named re-export loophole closed** via `claudemesh-custom/no-index-reexport-internal`. +- **Dynamic import loophole closed** via `claudemesh-custom/no-dynamic-service-imports` using AST walking, not regex. +- **Type-only imports count as edges** via `claudemesh-custom/type-imports-count-as-edges`. +- **`no-restricted-imports` inverted to allowlist**: consumer layers are blocked from importing ANY file under `services/*/` except `facade.*`. Overrides for entrypoints (which can also use index) and service-internal files (which can import each other). +- **Migrations` tightened** to only their own service (not cross-service internals). + +## 8. dependency-cruiser configuration + +Belt-and-suspenders folder-level rules for anything the ESLint config might miss. + +```js +// apps/cli-v2/dependency-cruiser.config.js +module.exports = { + forbidden: [ + { + name: 'ui-only-facades', + comment: 'UI may only import from services//facade.ts, never internals or index.', + severity: 'error', + from: { path: '^src/ui' }, + to: { + path: '^src/services/[^/]+/', + pathNot: '^src/services/[^/]+/facade\\.(ts|tsx|mts|cts)$', + }, + }, + { + name: 'commands-only-facades', + comment: 'Commands may only import from services//facade.ts.', + severity: 'error', + from: { path: '^src/commands' }, + to: { + path: '^src/services/[^/]+/', + pathNot: '^src/services/[^/]+/facade\\.(ts|tsx|mts|cts)$', + }, + }, + { + name: 'cli-only-facades', + comment: 'CLI I/O layer may only import from services//facade.ts.', + severity: 'error', + from: { path: '^src/cli' }, + to: { + path: '^src/services/[^/]+/', + pathNot: '^src/services/[^/]+/facade\\.(ts|tsx|mts|cts)$', + }, + }, + { + name: 'mcp-no-cross-internal', + comment: 'MCP tools may cross services via index.ts, not via internals.', + severity: 'error', + from: { path: '^src/mcp' }, + to: { + path: '^src/services/[^/]+/', + pathNot: '^src/services/[^/]+/(facade|index)\\.(ts|tsx|mts|cts)$', + }, + }, + { + name: 'cli-no-ui', + comment: 'Non-Ink I/O plumbing must not depend on Ink.', + severity: 'error', + from: { path: '^src/cli' }, + to: { path: '^src/ui' }, + }, + { + name: 'services-no-ui', + comment: 'Services must not depend on Ink or commands.', + severity: 'error', + from: { path: '^src/services' }, + to: { path: '^src/(ui|commands|cli)' }, + }, + { + name: 'utils-pure', + comment: 'Utils must not import from effectful layers.', + severity: 'error', + from: { path: '^src/utils' }, + to: { path: '^src/(services|ui|commands|cli|mcp|entrypoints)' }, + }, + { + name: 'types-pure', + comment: 'Types must not import from anything except other types.', + severity: 'error', + from: { path: '^src/types' }, + to: { pathNot: '^src/types' }, + }, + { + name: 'no-circular', + comment: 'No circular dependencies anywhere.', + severity: 'error', + from: {}, + to: { circular: true }, + }, + ], + options: { + tsPreCompilationDeps: true, + tsConfig: { fileName: './tsconfig.json' }, + includeOnly: '^src', + }, +}; +``` + +## 9. Type-only imports, dynamic imports, and re-exports + +### 9.1 Type-only imports + +Type-only imports (`import type { X } from '...'`) **do count as dependency edges** for boundary purposes. The rationale: a type import creates coupling. If the internal file's types change, the UI breaks. That's exactly what facades exist to prevent. + +ESLint boundaries treats `import type` as equivalent to `import` for classification purposes. dependency-cruiser is configured with `tsPreCompilationDeps: true` which includes type-only edges. + +### 9.2 Dynamic imports — AST-based enforcement + +`await import('computed-path')` cannot be statically analyzed with regex. Variable arguments (`const p = ...; await import(p)`) and template literals (`` await import(`services/${name}/client`) ``) would escape any regex-based check. + +**Enforcement**: the custom ESLint rule `claudemesh-custom/no-dynamic-service-imports` uses the TypeScript compiler API (via `ts-morph` or `@typescript-eslint/parser`) to walk every `CallExpression` whose callee is the `import` keyword (the `ImportExpression` node type). + +```ts +// tools/eslint-plugin-claudemesh/rules/no-dynamic-service-imports.ts +import type { TSESLint, TSESTree } from '@typescript-eslint/utils'; + +export const noDynamicServiceImports: TSESLint.RuleModule<'illegalDynamicImport', []> = { + meta: { + type: 'problem', + messages: { + illegalDynamicImport: + 'Dynamic import of a service path is forbidden. Use the facade directly: {{hint}}', + }, + schema: [], + }, + defaultOptions: [], + create(context) { + return { + ImportExpression(node: TSESTree.ImportExpression) { + const arg = node.source; + + // Case 1: non-literal argument (variable, function call, template with expressions) + if (arg.type !== 'Literal' || typeof arg.value !== 'string') { + // Check if this file is inside src/services/ — those are allowed + // to use dynamic imports freely within their own service + const filename = context.filename; + if (filename.includes('/src/services/')) return; + + context.report({ + node, + messageId: 'illegalDynamicImport', + data: { hint: 'dynamic import arguments must be string literals' }, + }); + return; + } + + // Case 2: literal pointing to a service internal + const path = arg.value; + const match = path.match(/services\/([^/]+)\/(.+)$/); + if (!match) return; + + const [, serviceName, rest] = match; + // Allow only imports of the facade or types + if (rest === 'facade' || rest.startsWith('facade.') || rest.startsWith('facade/')) return; + if (rest === 'types' || rest.startsWith('types.')) return; + + context.report({ + node, + messageId: 'illegalDynamicImport', + data: { + hint: `use '@/services/${serviceName}/facade' instead`, + }, + }); + }, + }; + }, +}; +``` + +This catches: +- `await import('services/auth/client')` — literal pointing to internal +- `await import(computedVar)` — non-literal argument from outside the service +- `` await import(`services/${x}/y`) `` — template literal with expression + +Consumers inside `src/services/` can still use dynamic imports freely within their own service (lazy loading, plugin patterns). The rule only fires when a non-service file tries to reach into a service folder dynamically. + +**No regex in build.ts** — the entire enforcement is via ESLint's CI pass, using TypeScript's AST. + +### 9.3 Re-exports — named re-exports also banned + +`export *` is banned project-wide via `no-restricted-syntax: ExportAllDeclaration`. But that only closes the bulk-leak path — **named re-exports from sibling internals are also banned** via the custom rule `claudemesh-custom/no-index-reexport-internal`. + +**Rules for `services//index.ts`**: + +- ALLOWED: `export { getAuthService } from './implementation';` — but only if the exported name matches `/^get\w+Service$/`, confirming it's a factory getter +- REJECTED: `export { deviceCodeLogin } from './device-code';` — leaks an internal function +- REJECTED: `export { writeTokenFile } from './token-store';` — leaks a low-level helper +- REJECTED: `export { AuthService } from './types';` — leaks an implementation type (use explicit imports from `./types` if needed by internals, and expose via the facade) + +**Rules for `services//facade.ts`**: + +- Exports are named only — no `export *`, no namespace re-export +- Every exported symbol is either an `async function` or a `const` data value +- No re-exports at all — the facade BUILDS output from scratch via Zod parsing, never passes through + +**Rules for internal files** (`services//*.ts` except `facade.ts` and `index.ts`): + +- Can import from each other freely within the same service folder +- Can `export` named symbols to sibling files in the same service +- Cannot `export` to non-sibling consumers except via the facade or the factory getter in `index.ts` + +The custom rule enforces these at CI. Any violation fails the PR with a clear error message pointing to the offending export. + +## 10. Testing facades and contract drift + +### 10.1 Facade contract test + +Every service's `facade.test.ts` verifies the contract holds: + +```ts +// services/auth/facade.test.ts + +import { describe, it, expect, vi } from 'vitest'; +import * as facade from './facade'; +import { getAuthService } from './index'; + +vi.mock('./index', () => ({ + getAuthService: vi.fn(), +})); + +describe('auth facade contract', () => { + it('loginWithDeviceCode returns only user info — no token leaks', async () => { + vi.mocked(getAuthService).mockReturnValue({ + startDeviceCodeFlow: vi.fn().mockResolvedValue({ + user: { id: 'u1', display_name: 'Alejandro', email: 'a@b.c' }, + token: 'cm_session_SECRETSECRETSECRETSECRETSECRETSE', + raw_response: { headers: {} }, + }), + } as any); + + const result = await facade.loginWithDeviceCode(); + const serialized = JSON.stringify(result); + + expect(serialized).not.toContain('cm_session_'); + expect(serialized).not.toContain('SECRET'); + expect(serialized).not.toContain('raw_response'); + expect(result).toEqual({ + user: { id: 'u1', display_name: 'Alejandro', email: 'a@b.c' }, + }); + }); + + it('whoAmI never throws even when service fails', async () => { + vi.mocked(getAuthService).mockReturnValue({ + getCurrentState: vi.fn().mockRejectedValue(new Error('boom')), + } as any); + + await expect(facade.whoAmI()).resolves.toBeDefined(); + }); + + it('loginWithToken rejects malformed token with InvalidTokenError', async () => { + const { InvalidTokenError } = await import('./errors'); + await expect(facade.loginWithToken({ token: 'not-a-token' })) + .rejects.toBeInstanceOf(InvalidTokenError); + }); + + it('every exported function is async (returns a Promise)', () => { + for (const key of Object.keys(facade)) { + const v = (facade as any)[key]; + if (typeof v === 'function') { + const r = v({}); + expect(r).toBeInstanceOf(Promise); + r.catch(() => {}); // swallow test-only rejection + } + } + }); +}); +``` + +### 10.1.5 Boundaries classification test (verify pattern precedence) + +`eslint-plugin-boundaries` pattern resolution is implementation-defined — not all versions guarantee "first match wins" or "most specific glob wins." Before trusting the config, we verify classification empirically: + +```ts +// tests/unit/facade-boundaries-classification.test.ts + +import { describe, it, expect } from 'vitest'; +import { ESLint } from 'eslint'; + +const linter = new ESLint({ overrideConfigFile: '.eslintrc.cjs' }); + +async function classifyFile(filePath: string): Promise { + // Use the boundaries plugin's internal classification function. + // If the plugin exposes it via getFileElement(), use that directly. + // Otherwise, run the linter and check which element-types rule fires. + const config = await linter.calculateConfigForFile(filePath); + const boundariesSettings = config.settings?.['boundaries/elements'] ?? []; + // Match each pattern in order; return the first hit + for (const element of boundariesSettings) { + const patterns = Array.isArray(element.pattern) ? element.pattern : [element.pattern]; + for (const p of patterns) { + // Use minimatch or picomatch to test the pattern + if (minimatch(filePath, p)) return element.type; + } + } + return null; +} + +describe('boundaries classification', () => { + const cases: Array<[string, string]> = [ + ['src/entrypoints/cli.ts', 'entrypoints'], + ['src/cli/print.ts', 'cli'], + ['src/commands/launch.ts', 'commands'], + ['src/ui/screens/AuthScreen.tsx', 'ui'], + ['src/mcp/tools/memory.ts', 'mcp'], + + // Service classifications — the critical part + ['src/services/auth/facade.ts', 'service-facade'], + ['src/services/auth/facade/helper.ts', 'service-facade'], // facade-as-folder bypass closed + ['src/services/auth/index.ts', 'service-index'], + ['src/services/auth/client.ts', 'service-internal'], + ['src/services/auth/device-code.ts', 'service-internal'], + ['src/services/auth/nested/deep/helper.ts', 'service-internal'], // nested files caught + ['src/services/auth/auth.test.ts', 'service-test'], + ['src/services/auth/nested/deep.test.ts', 'service-test'], // nested tests caught + + // Pure layers + ['src/utils/levenshtein.ts', 'utils'], + ['src/types/api.ts', 'types'], + ['src/constants/paths.ts', 'constants'], + ['src/locales/en.ts', 'locales'], + ['src/templates/solo.ts', 'templates'], + ['src/migrations/0001-v1-config.ts', 'migrations'], + ]; + + for (const [path, expected] of cases) { + it(`${path} classifies as ${expected}`, async () => { + const actual = await classifyFile(path); + expect(actual).toBe(expected); + }); + } +}); +``` + +This test runs in CI and fails if the boundaries plugin misclassifies any file. If a future version of `eslint-plugin-boundaries` changes pattern resolution, this test catches it before the real rules silently break. + +### 10.2 Boundary leak scanner (AST-based) + +Regex scanning has too many false positives (`device_token` as a legitimate field name) and false negatives (schemas not named with `Output` or `Result`). The leak scanner uses `ts-morph` to walk each facade's AST and extract actual Zod schema output types: + +```ts +// tests/unit/facade-boundary-scan.test.ts + +import { describe, it, expect } from 'vitest'; +import { Project, Type, VariableDeclaration } from 'ts-morph'; +import { globSync } from 'glob'; + +// Keys we never want to expose through an output schema. Whole-key match, not substring — +// so "device_token" (legitimate device identifier) doesn't collide with "token" (auth secret). +const FORBIDDEN_OUTPUT_KEYS = new Set([ + // Auth + 'token', 'access_token', 'refresh_token', 'api_key', 'apiKey', 'secret', + 'password', 'session_token', 'sessionToken', + // Low-level handles + 'connection', 'db', 'pool', 'client', 'socket', 'stream', + // Internal URLs + 'broker_url', 'api_url', 'internal_url', 'webhook_secret', +]); + +// Patterns that indicate a raw filesystem path or secret-looking value +const FORBIDDEN_VALUE_PATTERNS = [ + /^\/home\//, + /^\/Users\//, + /^\/var\//, + /^\/etc\//, + /^~\//, + /cm_(session|pat)_/, +]; + +describe('facade boundary scan (AST-based)', () => { + const project = new Project({ + tsConfigFilePath: 'tsconfig.json', + }); + + const facadeSourceFiles = project + .getSourceFiles() + .filter(f => f.getFilePath().includes('/services/') && f.getBaseName().startsWith('facade')); + + for (const sourceFile of facadeSourceFiles) { + const filePath = sourceFile.getFilePath(); + + it(`${filePath} — no forbidden keys in exported types`, () => { + // Walk all exported type declarations and check their shape + const exportedTypes = sourceFile.getExportedDeclarations(); + for (const [exportName, declarations] of exportedTypes) { + for (const decl of declarations) { + // Get the TypeScript type for this declaration + const type = decl.getType(); + assertNoForbiddenKeysInType(type, exportName, filePath); + } + } + }); + + it(`${filePath} — no export * statements`, () => { + const hasExportStar = sourceFile + .getExportDeclarations() + .some(d => d.isNamespaceExport()); + expect(hasExportStar, `${filePath} uses export * — use named exports`).toBe(false); + }); + + it(`${filePath} — does not import from ui/, commands/, cli/, mcp/, entrypoints/`, () => { + const imports = sourceFile.getImportDeclarations(); + for (const imp of imports) { + const spec = imp.getModuleSpecifierValue(); + expect( + spec, + `${filePath} imports from forbidden layer: ${spec}`, + ).not.toMatch(/^(?:\.\.?\/)*(?:ui|commands|cli|mcp|entrypoints)\//); + } + }); + } +}); + +function assertNoForbiddenKeysInType(type: Type, contextName: string, file: string): void { + // Check object property names + if (type.isObject()) { + for (const prop of type.getProperties()) { + const name = prop.getName(); + // Exact match (not substring) so `device_token` doesn't collide with `token` + if (FORBIDDEN_OUTPUT_KEYS.has(name)) { + throw new Error( + `Forbidden key "${name}" in exported type "${contextName}" at ${file}. ` + + `Output types cannot expose raw tokens, secrets, or low-level handles.`, + ); + } + // Recurse into nested object types + const propType = prop.getTypeAtLocation(prop.getValueDeclarationOrThrow()); + assertNoForbiddenKeysInType(propType, `${contextName}.${name}`, file); + } + } + + // Check union members (e.g. `A | B`) + if (type.isUnion()) { + for (const member of type.getUnionTypes()) { + assertNoForbiddenKeysInType(member, contextName, file); + } + } + + // Check array element types + if (type.isArray()) { + const elementType = type.getArrayElementType(); + if (elementType) { + assertNoForbiddenKeysInType(elementType, `${contextName}[]`, file); + } + } +} +``` + +This test: +- **Walks actual TypeScript types** using `ts-morph`, not regex on source strings +- **Exact key matches** on the forbidden list, so `device_token` (legitimate) doesn't trip on `token` (secret) +- **Recursive** — catches nested objects, arrays, and union types +- **Covers every exported type** including ones not named `Output` or `Result` +- **Runs on every CI build** — adding a facade automatically adds test coverage + +The test is slower than regex scanning (parses the TS project), but it runs once per CI build (~5 seconds for the whole service tree) and its false-positive rate is zero. + +### 10.3 UI tests use mock facades + +UI components test against a mock facade, never the real service: + +```ts +// ui/screens/AuthScreen.test.tsx + +import { describe, it, expect, vi } from 'vitest'; +import { render } from '@/tests/helpers/ink-render'; +import { AuthScreen } from './AuthScreen'; + +vi.mock('@/services/auth/facade', () => ({ + loginWithDeviceCode: vi.fn().mockResolvedValue({ + user: { id: 'u1', display_name: 'Test User', email: 't@e.st' }, + }), + whoAmI: vi.fn().mockResolvedValue({ + signed_in: false, user: null, token_source: null, + }), +})); + +it('AuthScreen renders signed-in state after login', async () => { + const { lastFrame, stdin } = render(); + stdin.write('\r'); + await new Promise(r => setTimeout(r, 50)); + expect(lastFrame()).toContain('Signed in as Test User'); +}); +``` + +The UI test never touches SQLite, never makes a network call, never reads an environment variable. It's a pure render test. If the UI ever accidentally imports from `services/auth/device-code` directly, ESLint catches it at CI before the test runs. + +## 11. What facades never expose + +Explicit blocklist. Any facade output containing one of these fails the boundary scanner: + +1. **Raw auth tokens** — including session tokens, PATs, API keys, refresh tokens +2. **Full API URLs** — callers learn the endpoint through the service, not as data +3. **Database handles, prepared statements, transaction objects** +4. **Filesystem paths** as strings — if UI needs to show a path to the user, the service returns a `{ user_visible_path: '~/.claudemesh/...' }` where the field name explicitly says "for display" +5. **HTTP response objects** — headers, status codes, raw bodies +6. **Function references to other services** — the facade composes internally; callers get data only +7. **Opaque handles** that require follow-up facade calls to make useful — prefer self-contained returns +8. **Error stack traces** — facades throw domain errors; stack traces go to logs via `runtime/logger.ts` + +## 12. Async streams and cancellation + +For operations that stream data (log tails, message streams, sync progress), facades use async iterators with explicit `AbortSignal`: + +```ts +// services/stream/facade.ts + +import { z } from 'zod'; +import { getStreamService } from './index'; + +const StreamEventSchema = z.object({ + type: z.enum(['message', 'peer_update', 'sync_progress']), + timestamp: z.number().int(), + payload: z.record(z.unknown()), +}).strict(); + +export type StreamEvent = z.infer; + +export async function* subscribeToMesh(input: { + mesh_slug: string; + signal: AbortSignal; +}): AsyncIterable { + const service = getStreamService(); + const stream = service.subscribe(input.mesh_slug); + + try { + input.signal.addEventListener('abort', () => stream.close(), { once: true }); + for await (const raw of stream) { + if (input.signal.aborted) return; + yield StreamEventSchema.parse({ + type: raw.type, + timestamp: raw.timestamp, + payload: raw.payload, + }); + } + } finally { + stream.close(); + } +} +``` + +**Cancellation rules**: + +- Every async iterator facade takes an `AbortSignal` as a required input field +- The facade attaches an `abort` listener that closes the underlying stream +- The `finally` block ensures the stream closes on any exit path (early return, throw, iterator break) +- Consumers MUST pass a signal — there's no "listen forever" mode + +Consumers use it like this: + +```ts +// ui/screens/StreamScreen.tsx +const ctrl = new AbortController(); +useEffect(() => { + (async () => { + for await (const event of subscribeToMesh({ mesh_slug, signal: ctrl.signal })) { + setEvents(prev => [...prev, event]); + } + })(); + return () => ctrl.abort(); +}, [mesh_slug]); +``` + +## 13. Errors and validation + +### 13.1 Input validation + +Every facade input is `unknown` in the public type signature and parsed with Zod at the boundary: + +```ts +export async function doThing(input: unknown): Promise { + const parsed = InputSchema.safeParse(input); + if (!parsed.success) { + throw new InvalidInputError('specific message', parsed.error); + } + // ... use parsed.data safely +} +``` + +Why `unknown` instead of a typed input? Because facade callers sometimes come from dynamic sources (JSON input, command args, user config). Typing the input as `unknown` forces the facade to validate — a typed input would let a caller bypass validation with a cast. + +### 13.2 Output validation + +Every facade output is built explicitly (no spread, no pass-through) and `.parse()`'d through the output schema with `.strict()`. Strict mode rejects extra fields, eliminating the "class instance with matching fields" bypass. + +### 13.3 Error mapping + +Every facade catches all errors and maps them through `toDomainError(err)`: + +- Domain errors already in the error hierarchy → returned as-is +- `ZodError` → mapped to a domain `InvalidInputError` with a generic message (never the raw Zod error, which might contain internal details) +- Node errors (`ENOTFOUND`, `ECONNREFUSED`, etc.) → mapped to `NetworkError` or similar +- Everything else → mapped to a generic `InternalError` with the raw error stored in `cause` for logging + +The caller can catch specific error classes: + +```ts +try { + await loginWithDeviceCode(); +} catch (err) { + if (err instanceof DeviceCodeTimeoutError) { + // specific handling + } else if (err instanceof AuthNetworkError) { + // different handling + } else { + // unexpected — report to telemetry + } +} +``` + +### 13.4 Logging + +Facades never log. Services log via `runtime/logger.ts`. This keeps the facade output deterministic (same input → same output → same thrown error), which is what makes them testable. + +## 14. FAQ + +### Why facades instead of tighter file naming? + +Naming conventions rot. A rule "UI can only import files named `public-*.ts`" works for a week, then someone creates `helper.ts` that's "obviously meant to be public" and imports it. Facades are enforced by ESLint + dependency-cruiser + a boundary scanner test — three layers of tooling, not social pressure. + +### Doesn't this add boilerplate? + +~40-60 lines per service for the facade + schemas + error mapping. In exchange: testable UI, refactorable services, zero accidental leaks, zero circular imports, explicit contract that survives personnel changes. Worth it at scale. + +### What about cross-service composition? + +Services compose through each other's `index.ts`, not through facades. E.g., `services/mesh/publish.ts` imports `getAuthService` from `services/auth/index.ts`. Services are peers; facades are for non-service consumers. + +### What about the MCP server? + +MCP tool handlers run inside the service trust domain. They have access to `service-index` (cross-service composition) and their own internals, but NOT other services' internals. This keeps MCP implementations well-structured without forcing every tool to go through a UI-style facade. + +### What if a facade needs to return a stream? + +Async iterator with required `AbortSignal` (see §12). No callbacks, no EventEmitters, no Node streams exposed. + +### What if two facades need the same internal helper? + +Move the helper to `utils/` (if pure) or to a shared service (if effectful). Facades never share implementation — only types. + +### How do we version facades across CLI releases? + +Facade function signatures are the public contract. Breaking changes require a major version bump. Additive changes (new optional params, new optional return fields) are safe in minor releases. The boundary scanner test doubles as a regression guard — a PR that removes a field from an output schema will cause any test asserting on that field to fail. + +### How do we handle complex flows with progress? + +Either: +- Async iterator yielding progress events (preferred for long-running operations) +- Split into `start*` and `poll*` facade pairs where the UI polls for state +- Callbacks for progress (only progress, not state) — discouraged but allowed with explicit `signal` support + +### What if a service has two types of consumers with different needs? + +One facade per service. If MCP needs more than UI, the facade exposes more — and UI just doesn't call those methods. The facade's surface area is the union of all consumer needs, not the intersection. + +### Can facades import from each other? + +No. A facade belongs to exactly one service and imports only its own internals + other services' `index.ts`. If facade A wants to call facade B, that's a sign the logic belongs in a service, not a facade. The facade is an adapter, not an orchestrator. + +### What about tests of nested service folders? + +The boundary config uses `src/services/*/**/*.test.ts` which catches any depth. Tests can import from their own service's internals freely, as specified in the `service-test` rule. + +### What about facades for utilities? + +No. Utilities in `utils/` are pure functions; they need no facade. The facade pattern exists to bound effectful services, not pure code. + +--- + +**End of spec.** diff --git a/.artifacts/specs/2026-04-10-cli-v2-pass2-final-vision.md b/.artifacts/specs/2026-04-10-cli-v2-pass2-final-vision.md new file mode 100644 index 0000000..60bf974 --- /dev/null +++ b/.artifacts/specs/2026-04-10-cli-v2-pass2-final-vision.md @@ -0,0 +1,1610 @@ +# claudemesh-cli v2 Pass 2 — Final Vision + +> ⚠️ **This document describes v2 Pass 2 — the longer-term vision, NOT the immediate Pass 1 scope.** +> +> For the v2 Pass 1 implementation target, see **`2026-04-11-cli-v2-pass1.md`**. +> +> Pass 1 is narrower: refactor folder structure + add CLI user flows + preserve every v1 behavior + keep broker unchanged. No local-first storage, no Lamport algorithm, no broker security rewrites, no MCP catalog tiering. +> +> This document is retained as reference for future Pass 2 work. + +**Status:** Pass 2 future reference — NOT the Pass 1 implementation target +**Created:** 2026-04-10 +**Consolidated:** 2026-04-10 (post-reviews, all amendments merged into body, no appendices) +**Target version:** v1.0.0 (promoted from v0.11.x after beta) +**Supersedes / absorbs:** `2026-04-10-cli-auth-device-code-pat.md`, `2026-04-10-cli-wizard-architecture-refactor.md` + +**Companion specs (authoritative on their concerns; this spec defers to them):** +- `2026-04-10-cli-v2-ux-design.md` — voice, tone, microcopy, picker rules, accessibility, delight beats +- `2026-04-10-cli-v2-local-first-storage.md` — SQLite schema, lamport algorithm, sync protocol, single-writer queue +- `2026-04-10-cli-v2-facade-pattern.md` — UI↔services boundary enforcement +- `2026-04-10-cli-v2-shared-infrastructure.md` — broker-backed services: Postgres, Neo4j, Qdrant, MinIO, MCP registry, vault, URL watch + +**Related:** `2026-04-10-anthropic-vision-meshes-invites.md` (product vision) + +## Reading order for new contributors + +The v2 spec surface totals ~8,000 lines across 5 documents. A new developer should read in this order: + +1. **This document (final-vision)** — start with §0 executive summary, §1 governing rule, §2 dream experiences, §3 architectural principles, §4 mesh state model, §6 source tree overview, §11 command surface, §16 implementation phases +2. **`cli-v2-ux-design.md`** — for every design question. Read §1–§6 fully (philosophy, rules, voice, first-run, session kinds, microcopy catalog) +3. **`cli-v2-local-first-storage.md`** — before implementing any tool that touches SQLite. §1–§7 are load-bearing (principles, runtime, file layout, Lamport algorithm, schema, vector model, memory recall) +4. **`cli-v2-facade-pattern.md`** — before writing any service or facade. §1–§9 (problem, principle, contract, import policy, examples, directory structure, ESLint config, type imports, dynamic imports, re-exports) +5. **`cli-v2-shared-infrastructure.md`** — before implementing any broker-backed tool. §1 hybrid architecture + §3 RBAC + the specific section for the feature being implemented (§4 Postgres, §5 Neo4j, §6 Qdrant, §7 MinIO, §8–§9 MCP registry, §10 vault, §11 URL watch, §12 default catalog) + +**Conflict resolution** between documents: this final-vision document is authoritative for architectural questions. When two documents disagree, the companion spec wins for its own domain (UX questions → ux-design, storage questions → local-first-storage, boundary questions → facade-pattern, broker questions → shared-infrastructure). + +--- + +## 0. Executive summary + +claudemesh-cli v2 is a ground-up rewrite of `apps/cli/` that delivers a **zero-friction, local-first-for-personal, broker-backed-for-shared, Apple-grade terminal experience** for spawning Claude Code sessions into a peer mesh. It ships as a sibling `apps/cli-v2/` scaffolded against v0.10.5 as reference, atomically swapped in once complete, and published to npm as `claudemesh-cli@1.0.0`. + +The rewrite is justified by four converging needs that cannot be satisfied by incremental refactoring of v1: + +1. **UX debt** — 27 subcommands with imperative branching, overloaded flags, and terminal-state bleed on wizard→claude handoff +2. **Architecture debt** — business logic scattered between commands and runtime, no enforced dependency boundaries, no facade pattern isolating UI from effectful services +3. **Missing capabilities** — no CLI auth (all account actions require the web), no local-first storage (broker is in the critical path for per-peer data), no dependency-injected services layer that would make testing tractable +4. **Visual inconsistency** — no central palette, no shared layout primitives, no status row pattern, ad-hoc colors per screen + +The v2 rewrite addresses all four in one coordinated pass. Features that work in v1 are preserved — v2 is a **restructuring**, not a feature cut. Everything the marketing page promises today ships in v1.0.0. + +--- + +## 1. The governing rule + +> **A first-time user runs `claudemesh`, clicks Approve in a browser once, and is inside Claude Code with a working mesh. A returning user runs `claudemesh` and the terminal becomes Claude Code. Everything else in this document is a consequence of that rule.** + +Every feature, every screen, every command, every error message gets held up against this sentence. If it introduces a step that isn't strictly necessary to satisfy the rule, it doesn't ship in v1.0. + +--- + +## 2. The dream experiences (verbatim, tested end to end) + +These scenarios are the acceptance test for the governing rule. Each is locked copy reviewed by design before shipping. + +### 2.1 First run, fresh machine (brand new user) + +``` +$ claudemesh + + claudemesh + Peer mesh for Claude Code sessions. + + Creating your mesh… + + ✔ Signed in as Alejandro + ✔ Your mesh "alejandro-mbp" is ready + + You're in. + + Opening Claude Code… +``` + +**Elapsed wall time target:** < 8 seconds including browser round-trip. +**Questions asked of the user:** 1 (Approve button in browser). +**Keystrokes in terminal:** 0. +**Decisions made silently:** mesh name (hostname), display name (account name), role (member), broker URL (default), claude args (none), template (solo). + +### 2.2 Daily use, returning machine + +``` +$ claudemesh +[terminal becomes Claude Code, instantly] +``` + +**Elapsed wall time target:** < 400ms of CLI overhead before handoff. +**Frames rendered:** 0 (no wizard, no welcome, no banner). +**State consulted:** `~/.claudemesh/state.json` for last-used mesh, name, role. + +### 2.3 Teammate sends an invite + +Terminal 1 (Alice): +``` +$ claudemesh invite bob@example.com + + ✔ Sent to bob@example.com. + ✔ Also copied to clipboard. +``` + +Terminal 2 (Bob, with the link in clipboard): +``` +$ claudemesh + + Detected invite in clipboard. +▸ Join "alice-team" + Continue to "bob-mbp" +``` + +Bob hits Enter. Claude Code launches in `alice-team`. Total keystrokes: one `claudemesh`, one Enter. + +### 2.4 Starting a new mesh for a team + +``` +$ claudemesh new + + Name? Platform team + + ✔ Created "platform-team". + ✔ You're in. + + Invite teammates: claudemesh invite +``` + +One prompt (name). Slug auto-derived. Template = team (if flag given) or solo (default). `claudemesh invite` afterwards takes zero arguments — defaults to current mesh, 7-day expiry, unlimited uses, clipboard + optional email. + +### 2.5 Broker goes down mid-session + +Claude Code is running in a shared mesh. Broker drops. The status line (Claude Code's bottom bar) transitions from green `◉` to yellow `◉` (reconnecting), then gray `◎` (offline). No modal. No interruption. Messages queue locally. + +When the broker returns: +``` +◉ Reconnected. +``` +One word, one line, auto-dismissed after 2 seconds. Peer count is visible in the persistent status line, not repeated in the notification. + +### 2.6 Token expired + +``` +$ claudemesh peers + + Your sign-in expired. Refreshing in browser… + ⠋ + + alice idle working on auth spec + bob working launching CI builds +``` + +Re-auth is invisible recovery, not a user task. The user typed `claudemesh peers` and got peers — the refresh happened silently. The status line appears only because the refresh takes longer than 200ms (see rule: no spinners under 200ms). + +### 2.7 Power user, fully scripted + +```bash +#!/usr/bin/env bash +# CI pipeline +export CLAUDEMESH_TOKEN="$CI_PAT" +claudemesh new "ci-run-$GITHUB_RUN_ID" --template ci --json > mesh.json +claudemesh launch --mesh "ci-run-$GITHUB_RUN_ID" -- --print "Analyze this PR" < diff.txt +``` + +Non-interactive. No prompts. Exits with clear status codes. `--json` produces parseable output. PAT resolved from environment variable. Clean fail-fast if required flags are missing. + +### 2.8 First-run failure modes (catalog) + +Every failure mode produces a specific Anthropic-voice error message. Full taxonomy in the UX spec §6. + +| Scenario | Message | +|---|---| +| Browser won't open | "Open this URL to sign in: https://... (we couldn't open it automatically)" | +| Browser opens but user closes it | After 10 min: "Sign-in timed out. Run `claudemesh` to try again." | +| User denies in browser | "Sign-in canceled. Run `claudemesh` to try again." | +| No network | "Can't reach claudemesh.com. Check your connection and try again." | +| claudemesh.com is down | "The dashboard is reachable but the mesh broker isn't. Retrying in 10s…" | +| Broker up, mesh creation fails | "Your account is set up, but mesh creation failed. Run `claudemesh new` to retry." | +| Claude binary missing | "Claude Code isn't installed. Install it from https://claude.ai/code and run `claudemesh` again." | + +--- + +## 3. Architectural principles + +These are inviolable. Every PR, every screen, every refactor checks against them. Violation = revision. + +### 3.1 The governing rule (restated as architectural constraint) + +Design every code path to minimize distance from the governing rule. If a new feature adds a screen, a flag, or a confirmation beat on the happy path, it doesn't ship in v1. + +### 3.2 Hybrid architecture: local-first for personal data, broker-backed for shared data + +The v2 architecture is **hybrid**, not pure local-first: + +- **Local-first (SQLite)** is source of truth for per-peer data: memory, state, personal files, task claims, profile, display name, last-used cache. These tools work fully offline. See `cli-v2-local-first-storage.md` for the complete schema and sync protocol. +- **Broker-backed** is source of truth for shared-mesh data: SQL tables (Postgres schema-per-mesh), graph (Neo4j database-per-mesh), vector search (Qdrant collection-per-mesh), large files (MinIO bucket-per-mesh), deployed MCP servers (Docker-sandboxed on broker VPS), vault credentials, URL watches. See `cli-v2-shared-infrastructure.md` for isolation models, RBAC, resource limits, and the default MCP catalog. + +The rule for deciding which side owns a feature: + +> **If a feature requires reading another peer's data in real time, it's broker-backed. If it only needs your own data, it's local-first.** + +This is what v1 already does. v2 makes it explicit in the spec. + +### 3.2.1 Aggregate tool consistency model + +Some tools aggregate data from both sides: `mesh_info`, `mesh_stats`, `list_peers`, `peers` command output. These are explicitly annotated with their **staleness guarantees** and **consistency mode**: + +| Tool | Local data | Broker data | Consistency model | Staleness signal | +|---|---|---|---|---| +| `mesh_info` | slug, name, kind, peer_count (cached), role | broker_url, schema_version, feature flags | Eventually consistent; local cache refreshed on broker connect | `last_synced_at` timestamp in response | +| `mesh_stats` | local tool call counts, outbox/inbox lag | broker-side peer count, storage sizes, deployed MCP count | Read-through: broker query if online, cached if offline | `fresh: true/false` flag; cache TTL 60s | +| `list_peers` | peer cache from last broker update | (none — always uses cache) | Snapshot consistent; marked stale after 5 min | `stale: true` if age > 5 min, also `last_seen_at` per peer | +| `peers` command | local peer cache | peers service query (live) | Live read: broker query with 5s timeout, fall back to cache on failure | Shows "(cached, N seconds ago)" suffix if stale | +| `mesh_clock` | local lamport counter | (none) | Honestly local; returns `sync_state: offline` if broker unreachable | `sync_state: synced/stale/offline` field | + +**Key principles**: +- Aggregates NEVER silently merge local + broker data. Either the response is fully local (with staleness annotation) or a fresh broker read (with timeout + fallback). +- Every aggregate response includes a staleness signal the caller can check. +- When the broker is unreachable, aggregates degrade gracefully to local data with explicit `stale: true` flagging. +- "Source of truth" for aggregates is the local cache — updated from the broker opportunistically. + +### 3.3 One-way dependency graph + +Enforced by ESLint `boundaries` plugin + `dependency-cruiser` at CI. Full rules in the facade pattern spec. + +``` +entrypoints/ → everything (top of the graph) +commands/ → cli, ui, service-facade, utils, types, constants, locales +mcp/ → service-facade, service-index, templates, utils, types, constants, locales +cli/ → service-facade, utils, types, constants, locales (non-Ink I/O plumbing) +ui/ → service-facade, utils, types, constants, locales (Ink rendering only) +services/* → services/*, templates, locales, utils, types, constants +templates/ → utils, types, constants +locales/ → types +utils/ → types +constants/ → (nothing) +types/ → types only +migrations/ → services/config, services/auth, types, utils +``` + +Two load-bearing constraints: + +1. **`services/*` is the only layer that touches filesystem, network, crypto, or env.** Everything above it composes services. Everything below it is pure. +2. **UI and commands go through `services//facade.ts`, never through internal service files.** Facades are narrow Zod-validated interfaces that hide implementation details. See `cli-v2-facade-pattern.md`. + +### 3.4 Service composition via explicit dependency injection + +Services compose at the **facade layer**, not through `index.ts`. A service that needs another service's functionality imports from `services//facade.ts`. The `index.ts` file is a thin factory barrel used only by `entrypoints/cli.ts` for DI wiring. + +Service wiring happens in one place — `entrypoints/cli.ts` — and services receive their dependencies explicitly at construction time: + +```ts +// entrypoints/cli.ts +const authService = createAuthService({ + tokenStore: createTokenStore({ path: config.authPath }), + apiClient: createApiClient({ baseUrl: config.apiUrl }), +}); +const meshService = createMeshService({ + authService, + brokerClient: createBrokerClient({ wsUrl: config.brokerUrl }), + db: sqliteDb, +}); +const inviteService = createInviteService({ meshService, authService, apiClient }); +``` + +**No service holds another as a module-level singleton.** `services/*/index.ts` exposes lazy getters (`getAuthService()`) backed by the injected instances. The top-level wiring in `entrypoints/cli.ts` is a linear script: dependencies are constructed in order, each later service receiving references to earlier ones. + +**What this prevents**: +- **Module-level import cycles**: impossible because the top-level wiring imports from each service's `index.ts` once, and service factories only import types (not implementations) from other services. +- **Accidental singleton drift**: every service is explicitly constructed with its dependencies; no `require()`-style hidden singletons. + +**What this does NOT automatically prevent** (requires discipline + explicit layering): +- **Runtime mutual calls**: service A calling service B's method while B also calls A's method is a design decision, not an import cycle. The DI pattern doesn't block it, but the service-tier list below does constrain which services can depend on which. +- **Hidden runtime coupling**: if a service stores a reference to another service and calls it later, that's a real dependency even if there's no import cycle. Track these explicitly in the service's `README.md`. + +### 3.4.1 Service dependency tiers (enforced via dependency-cruiser) + +To prevent hidden layering cycles between services, `services/*` is organized into explicit tiers. A service can only depend on services in lower-numbered tiers (or same-tier for peer services). Dependency-cruiser enforces this at CI. + +| Tier | Services | Rationale | +|---|---|---| +| **1 — foundational** | `crypto`, `config`, `state`, `device`, `clipboard`, `spawn`, `i18n`, `telemetry`, `logger`, `update`, `lifecycle` | Pure services or thin wrappers over OS/filesystem; no business logic | +| **2 — infrastructure** | `api`, `store` | HTTP client and SQLite store; used by higher-tier services | +| **3 — auth** | `auth` | Depends on api (HTTP) and store (token persistence) | +| **4 — broker** | `broker` | Depends on auth (for authenticated WS), api, crypto, store | +| **5 — mesh** | `mesh` | Depends on auth, broker, store, crypto, config, device | +| **6 — mesh features** | `invite`, `health` | Depends on mesh, auth, broker, api | + +**Rules**: +- A service at tier N can import from services at tiers 1..N (facades only) and same-tier peers (if explicitly documented as peer services). +- Cross-tier upward imports are forbidden: `auth` cannot import from `mesh`, even through the facade. +- Dependency-cruiser enforces this with tier-aware rules in `dependency-cruiser.config.js`. + +The tier list is documented in `apps/cli-v2/src/services/README.md` and validated at CI by a rule that reads the tier assignments from that file. + +### 3.5 Feature-folder, not layer-folder + +Each feature lives in `services//` with everything it needs: client, logic, schemas, types, tests, facade. Claude Code's pattern, validated at ~200k-LOC scale. Rejected alternative: split by layer (`runtime/` + `operations/`) — adds folder hops without adding boundary enforcement that feature-folders + dependency-cruiser don't already provide. + +### 3.6 No silent magic, no silent defaults that matter + +It's OK to auto-pick the mesh name on first run because the user can rename it with one command. It's NOT OK to silently use a default the user can't easily inspect or change. Everything the CLI decided for you is visible via `claudemesh whoami --verbose`. + +### 3.7 Visual restraint as a design principle + +Six semantic color roles, ten icons, two-space indent. No boxes. No borders. No ASCII art. No animations. No fake typing effects. Every frame is deliberate. Full design system in the UX spec. + +### 3.8 Zero runtime or code dependencies on v1 + +**v2 is a clean rewrite, not a refactor.** The `apps/cli-v2/` tree has **no imports from `apps/cli/`**, no shared types, no shared tests, no shared fixtures, no helper modules reused from v1. If v2 needs a piece of logic that exists in v1, it is **ported** into v2 (rewritten in the v2 architecture) or **deferred** to v1.1+. + +Consequences: + +1. **No `import` or `require`** pointing at `apps/cli/` from anywhere under `apps/cli-v2/`. CI has a lint rule: `no-v1-imports` fails any PR that tries. +2. **No shared workspace helpers** — v2 has its own `tests/helpers/`, its own `.eslintrc.cjs`, its own build pipeline. Not `@claudemesh/test-utils` or similar. +3. **No shared SQLite schema, config format, or wire protocol assumptions** — v2's `services/store` uses a fresh schema; migration from v1 config is explicit (see §15) and only reads the old file format, it does not call v1 code to do it. +4. **No dependency on `apps/broker/src/telegram-bridge.ts`** — the v1 telegram bridge is broker-side hardcoded code. v2 replaces it with a deployed MCP connector (see shared-infrastructure spec §9 and §12). The v2 CLI never connects to the v1 telegram bridge endpoint. +5. **Broker surface is versioned** — v2 broker ships as a new broker image (`claudemesh/broker:1.0.0`) with a separate WS protocol endpoint. v1 and v2 brokers can run side-by-side during the transition, but v2 does not speak the v1 protocol. +6. **v1 → v2 cutover is user-side** — users migrate by running `claudemesh advanced migrate` on first v2 launch, which reads their v1 `~/.claudemesh/config.json` and translates it to v2 shape. v2 never links against v1 code to do this. + +**Why this is non-negotiable**: allowing v2 to import from v1 would couple their release cycles, prevent v1 from being deleted after the coordinated swap (Phase 10), and turn the "atomic swap" into a dependency-untangling exercise. The whole point of v2 is a clean slate. + +### 3.9 Pre-1.0 is for breaking; 1.0 is for keeping + +v0.11.x through v0.19.x are open season for breaking changes. v1.0.0 is the commitment: after that, deprecations need a minor-version cycle and a migration path. The v2 rewrite ships as v0.11.0-alpha.1 → v0.11.0 stable → v1.0.0 once proven. + +### 3.10 Every write is inside a transaction, through the queue + +(Inherited from the storage spec.) No "loose" writes to SQLite. Every state-changing SQL statement runs inside a transaction enqueued on the single-writer queue. Lamport counter updates happen in the same transaction as the domain row write. This is what makes the local-first storage layer correct under concurrency. + +### 3.11 Facades, not raw services + +(Inherited from the facade spec.) UI components and commands never import from `services//device-code.ts` or `services//client.ts`. They import from `services//facade.ts` and get a narrow, Zod-validated, Promise-returning interface. This is enforced by tooling at CI, not by convention. + +--- + +## 4. The mesh state model + +Three states, one mental model. The CLI presents the same tool surface in all three. + +### 4.1 Personal mesh + +- **Identity**: unique per machine, created on first run +- **Storage**: `~/.claudemesh/data.db` (SQLite) +- **Peers**: just you +- **Broker**: not connected (no one to sync to) +- **Auth**: none required +- **Tools**: all local-first MCP tools work against local storage; broker-backed tools return "not available in personal mesh" +- **Invitable**: no (must be published first) + +**Value proposition**: persistent memory, vector search (local sqlite-vec fallback for personal mesh), state, and file staging for Claude across sessions, with no network dependency. + +### 4.2 Shared mesh, owned + +- **Identity**: registered server-side with a slug, you're the owner +- **Storage**: per-peer data in local SQLite; shared data in broker-backed services (Postgres, Neo4j, Qdrant, MinIO) +- **Peers**: you + anyone with an invite +- **Broker**: connected +- **Auth**: yes, to create (not to use afterwards) +- **Tools**: complete tool surface — local-first + broker-backed +- **Invitable**: yes, via `claudemesh invite` + +### 4.3 Shared mesh, guest + +- **Identity**: someone else's mesh, you joined via invite +- **Storage**: per-peer data in local SQLite; shared data accessed via broker +- **Peers**: everyone in the mesh +- **Broker**: connected +- **Auth**: optional — guests use ephemeral keypairs by default, no account required +- **Tools**: same surface as owner, with some operations gated by role (rename/archive/delete are owner-only) +- **Invitable**: depends on mesh policy + +### 4.4 Transitions + +- **Personal → Shared owned** (`claudemesh share` / `publish`): auth triggers if not already, creates server-side mesh record, sync daemon wakes up, generates first invite URL. Per-peer SQLite data stays local; broker-backed services are initialized fresh on the broker side. +- **No account → Guest** (`claudemesh `): ephemeral keypair, joins, no auth required +- **Guest → Shared owned**: not applicable; guests use `claudemesh new` to create their own +- **Shared owned → Personal**: not supported (would confuse other members). Leave with `claudemesh leave`, keep local state. + +--- + +## 5. File system layout + +All paths are XDG-compliant. On macOS defaults to `~/.claudemesh/`; on Linux respects `$XDG_DATA_HOME`, `$XDG_CONFIG_HOME`, `$XDG_CACHE_HOME`; on Windows uses `%APPDATA%\claudemesh\`. + +``` +~/.claudemesh/ +├── config.json # user preferences (broker URL, locale, telemetry opt-out) +├── state.json # last-used cache (mesh, name, role, session counters) +├── auth.json # 0600, raw token; file perms are v1 security posture +├── data.db # SQLite source of truth for local-first data +├── data.db-wal # write-ahead log +├── data.db-shm # shared memory file +├── keys/ # 0700 dir, per-mesh keypairs, 0600 files +│ ├── personal.key +│ └── .key +├── blobs/ # 0700 dir, content-addressed local blobs (< 64 KB files + cache) +│ └── / +│ └── +├── cache/ +│ ├── update-check.json # last npm registry poll (24h TTL) +│ └── mesh-metadata/ # cached mesh metadata +│ └── .json +├── logs/ +│ ├── cli.log # rotated +│ ├── mcp.log # MCP server logs +│ └── metrics.jsonl # local telemetry log (never transmitted) +└── tmp/ # scratch space, cleaned on exit +``` + +**Permissions:** `~/.claudemesh/` is `0700`. `auth.json` and `keys/*` are `0600`. Other files are `0644`. On read, the CLI warns if permissions have drifted more permissive than the baseline; on write, it enforces the baseline. + +**Token storage is file-permission based, not encrypted.** Server-side tokens are argon2-hashed by Better Auth's `apiKey` plugin, but the client stores the raw token in `auth.json` protected by `0600` and parent directory `0700`. v1.0.0 does NOT use OS keychain integration (deferred to v1.1+). This is a conscious tradeoff — keychain integration adds significant platform-specific code and dependency weight for a modest security improvement on single-user machines. + +--- + +## 6. The target source tree + +``` +apps/cli-v2/ +├── package.json # name: claudemesh-cli +├── tsconfig.json +├── bunfig.toml +├── build.ts # Bun bundler driver +├── dependency-cruiser.config.js # enforces folder-level dep rules +├── .eslintrc.cjs # enforces boundary rules (facade pattern spec §7) +├── biome.json # linter/formatter config +├── .gitignore +├── CHANGELOG.md +├── README.md +├── bin/ +│ └── claudemesh # shebang entry → dist/entrypoints/cli.js +│ +├── src/ +│ ├── entrypoints/ +│ │ ├── cli.ts # interactive CLI entry, wires services, fires early prefetches +│ │ └── mcp.ts # `claudemesh mcp` → stdio MCP server +│ │ +│ ├── cli/ # non-Ink I/O plumbing +│ │ ├── argv.ts # parse process.argv → normalized args +│ │ ├── print.ts # stdout helpers, respect NO_COLOR/FORCE_COLOR +│ │ ├── structured-io.ts # --json, --output-format ndjson +│ │ ├── exit.ts # exit codes + cleanup hooks +│ │ ├── update-notice.ts # "new version available" banner +│ │ ├── handlers/ +│ │ │ ├── signal.ts # SIGINT/SIGTERM graceful shutdown +│ │ │ └── error.ts # top-level error → user message +│ │ └── output/ # non-interactive renderers +│ │ ├── list.ts +│ │ ├── peers.ts +│ │ ├── whoami.ts +│ │ └── version.ts +│ │ +│ ├── commands/ +│ │ ├── launch.ts # default: bare `claudemesh` +│ │ ├── join.ts # `claudemesh ` positional also routes here +│ │ ├── new/ # multi-step wizard +│ │ │ ├── index.ts +│ │ │ ├── NameStep.tsx +│ │ │ ├── TemplateStep.tsx +│ │ │ └── ConfirmStep.tsx +│ │ ├── invite.ts +│ │ ├── list.ts +│ │ ├── rename.ts +│ │ ├── leave.ts +│ │ ├── peers.ts +│ │ ├── login.ts # rarely needed; auth is lazy +│ │ ├── logout.ts +│ │ ├── whoami.ts +│ │ ├── share.ts # publish personal mesh as shared +│ │ ├── publish.ts # alias for share +│ │ ├── advanced/ +│ │ │ ├── doctor/ +│ │ │ │ ├── index.ts +│ │ │ │ └── DoctorScreen.tsx +│ │ │ ├── mcp/ # advanced MCP commands +│ │ │ │ ├── catalog.ts # list default MCP catalog +│ │ │ │ ├── deploy.ts # deploy an MCP from catalog or source +│ │ │ │ └── index.ts +│ │ │ ├── hook.ts # internal: Claude Code hook handler +│ │ │ ├── seed-test-mesh.ts +│ │ │ ├── install.ts # register MCP server with Claude Code +│ │ │ ├── uninstall.ts +│ │ │ ├── connect.ts # external bridges (telegram, etc.) +│ │ │ ├── disconnect.ts +│ │ │ ├── migrate.ts # explicit migration runner +│ │ │ ├── telemetry.ts # telemetry on/off +│ │ │ └── index.ts +│ │ └── index.ts # command registry + help grouping +│ │ +│ ├── services/ +│ │ ├── auth/ # device-code + PAT authentication +│ │ │ ├── client.ts +│ │ │ ├── device-code.ts +│ │ │ ├── pat.ts +│ │ │ ├── token-store.ts +│ │ │ ├── refresh.ts +│ │ │ ├── implementation.ts +│ │ │ ├── schemas.ts +│ │ │ ├── errors.ts +│ │ │ ├── types.ts +│ │ │ ├── index.ts +│ │ │ ├── facade.ts +│ │ │ └── auth.test.ts +│ │ │ +│ │ ├── mesh/ # mesh lifecycle (create, list, join, publish, etc.) +│ │ │ ├── client.ts +│ │ │ ├── bootstrap.ts # first-run personal mesh +│ │ │ ├── create.ts +│ │ │ ├── publish.ts +│ │ │ ├── join.ts +│ │ │ ├── list.ts +│ │ │ ├── rename.ts +│ │ │ ├── leave.ts +│ │ │ ├── resolve-target.ts +│ │ │ ├── implementation.ts +│ │ │ ├── schemas.ts +│ │ │ ├── errors.ts +│ │ │ ├── types.ts +│ │ │ ├── index.ts +│ │ │ ├── facade.ts +│ │ │ └── mesh.test.ts +│ │ │ +│ │ ├── invite/ # invite generation, parsing, claiming +│ │ │ ├── generate.ts +│ │ │ ├── parse-url.ts +│ │ │ ├── claim.ts +│ │ │ ├── send-email.ts +│ │ │ ├── schemas.ts +│ │ │ ├── errors.ts +│ │ │ ├── implementation.ts +│ │ │ ├── index.ts +│ │ │ ├── facade.ts +│ │ │ └── invite.test.ts +│ │ │ +│ │ ├── broker/ # WebSocket client + shared-service gateway +│ │ │ ├── ws-client.ts # raw WS with reconnect/backoff +│ │ │ ├── peer-crypto.ts # crypto_box envelope wrapping +│ │ │ ├── sync-daemon.ts # reads outbox, applies inbox +│ │ │ ├── shared-sql.ts # broker WS wrapper for mesh_query/mesh_execute/mesh_schema +│ │ │ ├── shared-graph.ts # wrapper for graph_query/graph_execute +│ │ │ ├── shared-vectors.ts # wrapper for vector_store/vector_search (Qdrant via broker) +│ │ │ ├── shared-files.ts # wrapper for large file ops (MinIO via broker) +│ │ │ ├── mcp-registry.ts # wrapper for mesh_mcp_* tools +│ │ │ ├── url-watch.ts # wrapper for mesh_watch tools +│ │ │ ├── vault.ts # wrapper for vault_* tools +│ │ │ ├── implementation.ts +│ │ │ ├── schemas.ts +│ │ │ ├── errors.ts +│ │ │ ├── index.ts +│ │ │ ├── facade.ts +│ │ │ └── broker.test.ts +│ │ │ +│ │ ├── api/ # base HTTP client for /api/my/* +│ │ │ ├── client.ts +│ │ │ ├── my.ts +│ │ │ ├── public.ts +│ │ │ ├── errors.ts +│ │ │ ├── with-retry.ts +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── crypto/ +│ │ │ ├── keypair.ts +│ │ │ ├── box.ts +│ │ │ ├── random.ts +│ │ │ ├── index.ts +│ │ │ └── crypto.test.ts +│ │ │ +│ │ ├── store/ # local SQLite source of truth (local-first data) +│ │ │ ├── db.ts # connection + PRAGMA + migration runner +│ │ │ ├── write-queue.ts # single-writer queue +│ │ │ ├── lamport.ts # atomic lamport tick +│ │ │ ├── conflict.ts # bytewise tuple comparison +│ │ │ ├── memory.ts # memory table CRUD +│ │ │ ├── vectors.ts # local sqlite-vec (personal mesh only) +│ │ │ ├── state.ts # local state_kv cache +│ │ │ ├── files.ts # local blob store + sha256 addressing +│ │ │ ├── tasks.ts +│ │ │ ├── peers.ts # peer cache +│ │ │ ├── outbox.ts # pending sync operations +│ │ │ ├── inbox.ts # incoming sync operations +│ │ │ ├── migrations/ +│ │ │ │ ├── 001-initial.sql +│ │ │ │ └── 002-add-broker-epoch.sql +│ │ │ ├── implementation.ts +│ │ │ ├── schemas.ts +│ │ │ ├── index.ts +│ │ │ ├── facade.ts +│ │ │ └── store.test.ts +│ │ │ +│ │ ├── config/ +│ │ │ ├── read.ts +│ │ │ ├── write.ts +│ │ │ ├── schemas.ts +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── state/ # last-used cache (NOT the mesh state_kv — that's store/state.ts) +│ │ │ ├── last-used.ts +│ │ │ ├── session-counter.ts # for 100th-use milestone +│ │ │ ├── schemas.ts +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── device/ +│ │ │ ├── info.ts +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── clipboard/ +│ │ │ ├── read.ts +│ │ │ ├── detect-invite.ts +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── spawn/ +│ │ │ ├── claude.ts # single choke point for exec'ing claude +│ │ │ ├── browser.ts # single choke point for opening URLs +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── telemetry/ +│ │ │ ├── emit.ts +│ │ │ ├── opt-out.ts +│ │ │ ├── events.ts +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── health/ # doctor checks +│ │ │ ├── check-auth.ts +│ │ │ ├── check-broker.ts +│ │ │ ├── check-crypto.ts +│ │ │ ├── check-paths.ts +│ │ │ ├── check-install.ts +│ │ │ ├── check-version.ts +│ │ │ ├── check-store.ts +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── update/ +│ │ │ ├── check.ts # npm registry poll, 24h cache +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── i18n/ +│ │ │ ├── resolve.ts # locale detection +│ │ │ ├── format.ts # ICU MessageFormat wrapper +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ └── lifecycle/ +│ │ ├── service-manager.ts # start/stop long-running services +│ │ ├── index.ts +│ │ └── facade.ts +│ │ +│ ├── ui/ # Ink-only rendering layer (design spec) +│ │ ├── styles.ts # six semantic color roles + ten icons +│ │ ├── store.ts # LaunchStore +│ │ ├── router.ts # flow cursor + overlay stack +│ │ ├── flows.ts # FLOWS = { Launch, Join, New, Invite, Auth } +│ │ ├── screen-registry.ts +│ │ ├── start.ts # Ink bootstrap +│ │ ├── terminal.ts # resetTerminal() — single UI→CLI handoff point +│ │ ├── keybindings.ts # global keymap (Tab is no-op per UX spec) +│ │ ├── session-kind.ts # first_run | recovery | daily_launch | interactive | non_interactive | rescue +│ │ ├── hooks/ +│ │ │ ├── useKeybindings.ts +│ │ │ ├── useInterval.ts +│ │ │ ├── useAsync.ts +│ │ │ ├── useTerminalSize.ts +│ │ │ ├── useService.ts +│ │ │ └── index.ts +│ │ ├── primitives/ +│ │ │ ├── CardLayout.tsx +│ │ │ ├── PickerMenu.tsx # bold + ▸ + position cues (a11y matrix) +│ │ │ ├── StatusRows.tsx +│ │ │ ├── LoadingLine.tsx +│ │ │ ├── TextBlock.tsx +│ │ │ ├── Divider.tsx +│ │ │ ├── ErrorBlock.tsx +│ │ │ └── index.ts +│ │ ├── screens/ +│ │ │ ├── WelcomeScreen.tsx # typography-only, no brand mark +│ │ │ ├── AuthScreen.tsx +│ │ │ ├── MeshPickerScreen.tsx +│ │ │ ├── ConfirmScreen.tsx +│ │ │ ├── HandoffScreen.tsx # unmount → resetTerminal → spawn(claude) +│ │ │ └── index.ts +│ │ └── overlays/ +│ │ ├── BrokerDisconnected.tsx +│ │ ├── InviteInvalid.tsx +│ │ ├── AuthExpired.tsx +│ │ ├── UpdateAvailable.tsx +│ │ └── index.ts +│ │ +│ ├── mcp/ # MCP stdio server (exposes tools to Claude Code) +│ │ ├── server.ts +│ │ ├── router.ts # tool dispatch + middleware +│ │ ├── tools/ # one file per tool family +│ │ │ ├── memory.ts # local SQLite +│ │ │ ├── state.ts # local SQLite +│ │ │ ├── tasks.ts # local SQLite with tentative claim semantics +│ │ │ ├── peers.ts # list_peers, send_message, check_messages (sync via outbox) +│ │ │ ├── profile.ts # set_profile, set_status, set_summary, set_visible +│ │ │ ├── groups.ts # join_group, leave_group +│ │ │ ├── scheduling.ts # schedule_reminder, list_scheduled, cancel_scheduled +│ │ │ ├── mesh-meta.ts # mesh_info, mesh_stats, mesh_clock (read), ping_mesh +│ │ │ ├── contexts.ts # share_context, get_context, list_contexts (via broker Postgres) +│ │ │ ├── skills.ts # share_skill, get_skill, list_skills, remove_skill, mesh_skill_deploy +│ │ │ ├── files.ts # share_file, get_file, grant_file_access, read_peer_file, etc (via broker MinIO) +│ │ │ ├── vectors.ts # vector_store, vector_search, vector_delete (via broker Qdrant) +│ │ │ ├── sql.ts # mesh_query, mesh_execute, mesh_schema (via broker Postgres) +│ │ │ ├── graph.ts # graph_query, graph_execute (via broker Neo4j) +│ │ │ ├── streams.ts # create_stream, publish, subscribe, list_streams +│ │ │ ├── mcp-registry.ts # mesh_mcp_register, mesh_mcp_list, mesh_tool_call, mesh_mcp_remove, mesh_mcp_deploy, undeploy, update, logs, scope, schema, catalog +│ │ │ ├── vault.ts # vault_set, vault_list, vault_delete +│ │ │ ├── url-watch.ts # mesh_watch, mesh_unwatch, mesh_watches +│ │ │ ├── clock-write.ts # mesh_set_clock, mesh_pause_clock, mesh_resume_clock +│ │ │ ├── webhooks.ts # create_webhook, list_webhooks, delete_webhook +│ │ │ ├── tools.test.ts +│ │ │ └── index.ts # tool registry +│ │ ├── middleware/ +│ │ │ ├── auth.ts +│ │ │ ├── rate-limit.ts +│ │ │ ├── logging.ts +│ │ │ └── error-handler.ts +│ │ └── handlers/ +│ │ ├── stdio.ts +│ │ └── jsonrpc.ts +│ │ +│ ├── constants/ +│ │ ├── paths.ts +│ │ ├── urls.ts +│ │ ├── timings.ts +│ │ ├── tokens.ts # cm_ prefix, regex +│ │ ├── exit-codes.ts +│ │ ├── limits.ts # per-mesh resource limits +│ │ └── index.ts +│ │ +│ ├── types/ +│ │ ├── api.ts +│ │ ├── mesh.ts +│ │ ├── peer.ts +│ │ ├── invite.ts +│ │ ├── store.ts +│ │ └── index.ts +│ │ +│ ├── utils/ +│ │ ├── levenshtein.ts +│ │ ├── slug.ts +│ │ ├── url.ts +│ │ ├── format.ts +│ │ ├── semver.ts +│ │ ├── assert.ts +│ │ ├── retry.ts +│ │ └── index.ts +│ │ +│ ├── locales/ # ICU MessageFormat strings +│ │ ├── en.ts +│ │ ├── es.ts +│ │ └── index.ts +│ │ +│ ├── templates/ # pluggable mesh templates +│ │ ├── solo.ts +│ │ ├── team.ts +│ │ ├── ci.ts +│ │ ├── research.ts +│ │ └── index.ts +│ │ +│ └── migrations/ # on-disk config migrations (v1 → v2) +│ ├── 0001-v1-config.ts +│ ├── 0002-v1-auth.ts +│ └── index.ts +│ +└── tests/ + ├── integration/ + │ ├── auth.test.ts + │ ├── mesh.test.ts + │ ├── invite.test.ts + │ ├── sync-daemon.test.ts + │ ├── shared-infra.test.ts # against staging broker with Postgres/Neo4j/Qdrant/MinIO + │ └── full-flow.test.ts + ├── e2e/ + │ ├── device-code-flow.test.ts + │ └── mcp-deploy-catalog.test.ts + ├── fuzz/ + │ └── store.test.ts # 100k random ops per CI run + ├── bench/ + │ ├── store.bench.ts + │ └── cold-start.bench.ts + ├── fixtures/ + │ ├── auth/ + │ ├── meshes/ + │ ├── invites/ + │ └── tokens/ + └── helpers/ + ├── mock-broker.ts + ├── mock-api.ts + ├── temp-home.ts + ├── ink-render.ts + └── sqlite-fixture.ts +``` + +**Total: ~200 files at scaffold time.** Every file has a single responsibility and a module-header comment pointing to the spec section it implements. + +--- + +## 7. Local-first storage + +Source of truth for per-peer data: memory, state (local cache), personal files, tasks, peer cache, outbox, inbox, lamport clocks, profile. + +**This spec defers to `cli-v2-local-first-storage.md` for all storage details.** That spec includes: +- Complete SQLite schema with all constraints and indexes +- Atomic Lamport clock algorithm (race-free, with tests) +- Conflict resolution rules per tool family +- Single-writer queue with async op handling +- Sync protocol (outbox drain, inbox apply, broker epoch handling) +- Personal → shared publish upgrade protocol (6 phases, all resumable) +- Task claim semantics (all 4 branches: open, claimed, completed/cancelled, same-peer reclaim) +- File blob storage with refcount GC +- Migration runner and shutdown protocol + +Key guarantees: +- Every local-first tool operation succeeds offline +- Broker outages are invisible to Claude Code's tool surface for local-first tools +- Exactly-once delivery via `client_op_id` on outbox ops + `UNIQUE(mesh_slug, broker_epoch, broker_seq)` on inbox +- Deterministic cross-peer conflict resolution via bytewise `(lamport, peer_id)` tuple comparison + +--- + +## 8. Shared infrastructure + +Broker-backed services for data that requires cross-peer queries: shared SQL (Postgres), graph (Neo4j), vector search (Qdrant), large files (MinIO), MCP registry (peer-hosted and broker-deployed), vault, URL watch. + +**This spec defers to `cli-v2-shared-infrastructure.md` for all broker-backed details.** That spec includes: +- Hybrid architecture diagram and owner-per-feature map +- Per-mesh isolation models for each backend +- RBAC matrix (guest / member / admin / owner) +- Complete tool surface for the ~30 broker-backed tools +- MCP registry tier 1 (peer-hosted) and tier 2 (broker-deployed) with Docker sandbox config +- Vault encryption (AES-GCM, per-mesh KMS wrapping) +- URL watch polling (hash/json/status modes) +- Default bundled MCP catalog (19 curated official servers) +- Broker deployment requirements (Docker Compose reference) +- Security model (threat table, audit logging, rate limits) + +Key guarantees: +- Cross-mesh data isolation enforced at multiple layers (broker auth + backend-native isolation) +- Deployed MCPs run in hardened Docker sandboxes (read-only root, dropped caps, seccomp, network allowlist) +- Vault credentials never appear in logs or stdout +- Every operation audit-logged with 90-day retention + +--- + +## 9. Authentication + +### 9.1 Lazy, never eager + +**First run does NOT prompt for auth.** Personal mesh works fully offline with no account. Auth is triggered only by: + +1. `claudemesh share` / `publish` — to create a server-side mesh record +2. `claudemesh new --shared` — if the user wants a shared mesh from the start +3. `claudemesh invite` on a personal mesh — triggers publish first +4. Any `/api/my/*` call that returns 401 — silent refresh + +### 9.2 Device code flow (interactive) + +1. CLI requests device code: `POST /api/auth/cli/device-code` with device info +2. CLI opens browser to `claudemesh.com/cli-auth?code=ABCD-EFGH` +3. User approves in browser (after signing in via Better Auth if needed) +4. CLI polls `GET /api/auth/cli/device-code/:device_code` every 1.5s (rate-limited to 1/sec per IP per device_code) +5. On approve, CLI receives a long-lived `cm_session_*` token +6. CLI writes `~/.claudemesh/auth.json` with `0600` perms +7. CLI syncs meshes from `/api/my/meshes` + +### 9.3 Personal access tokens (scripts/CI) + +`cm_pat_<32 base32>` format. Created in dashboard at `/dashboard/settings/cli-tokens` or via `claudemesh login --token `. Resolution order: + +1. `--token` CLI flag +2. `CLAUDEMESH_TOKEN` env var +3. `~/.claudemesh/auth.json` + +### 9.4 Refresh + +Tokens have a 90-day default lifetime, auto-extended on use. When a token expires or is revoked, the next API call returns 401. The CLI silently triggers a device-code re-auth in the background (for interactive contexts) or fails fast with a clear error (for PAT contexts). + +### 9.5 Security + +- **Server-side**: Tokens hashed at rest via Better Auth `apiKey` plugin (argon2) +- **Client-side**: Raw token in `~/.claudemesh/auth.json` protected by file permissions `0600` and parent dir `0700`. No OS keychain in v1.0.0 +- `cm_` prefix enables GitHub/GitGuardian secret scanning +- Rate-limited polling on device-code endpoints +- Audit events for every auth action (`auth.cli.*` namespace) +- No in-memory token cache — every request validates against the DB + +--- + +## 10. Wizard / flow pipeline + +### 10.1 Declarative flow definition + +```ts +// ui/flows.ts +export enum Screen { + Welcome = 'welcome', + Auth = 'auth', + MeshPicker = 'mesh-picker', + NewMeshName = 'new-mesh-name', + NewMeshTemplate = 'new-mesh-template', + Confirm = 'confirm', + Handoff = 'handoff', +} + +export enum Flow { + Launch = 'launch', + Join = 'join', + New = 'new', + Invite = 'invite', + Auth = 'auth', +} + +export const FLOWS: Record = { + [Flow.Launch]: [ + { screen: Screen.Welcome, show: s => s.isFirstRun, isComplete: s => s.welcomed }, + { screen: Screen.MeshPicker, show: s => s.ambiguousMesh, isComplete: s => s.meshSlug !== null }, + { screen: Screen.Confirm, show: s => s.requiresConfirmation,isComplete: s => s.confirmed }, + { screen: Screen.Handoff, isComplete: () => false }, // terminal + ], + [Flow.New]: [ + { screen: Screen.NewMeshName, isComplete: s => s.newMeshName !== null }, + { screen: Screen.NewMeshTemplate, show: s => s.templateMatters, isComplete: s => s.template !== null }, + { screen: Screen.Confirm, isComplete: s => s.confirmed }, + { screen: Screen.Handoff, isComplete: () => false }, + ], + // ... +}; +``` + +### 10.2 Router with overlay stack + +```ts +// ui/router.ts +export class Router { + private overlays: Overlay[] = []; + constructor(private flow: FlowEntry[]) {} + + resolve(session: Session): Screen | Overlay { + if (this.overlays.length > 0) return this.overlays.at(-1)!; + for (const entry of this.flow) { + if (entry.show && !entry.show(session)) continue; + if (entry.isComplete && entry.isComplete(session)) continue; + return entry.screen; + } + return this.flow.at(-1)!.screen; + } + + pushOverlay(o: Overlay) { this.overlays.push(o); } + popOverlay() { this.overlays.pop(); } +} +``` + +Overlays are interrupts: `BrokerDisconnected`, `InviteInvalid`, `AuthExpired`, `UpdateAvailable`. Pushed from anywhere (broker service, auth middleware, version check), popped when dismissed. The flow underneath resumes cleanly. + +### 10.3 `session_kind` determines output budget + +Per UX spec. Six modes drive visibility decisions: + +| Mode | Pre-handoff output | Frames rendered | +|---|---|---| +| `first_run` | Up to 8 lines (welcome + status rows + closing sentence) | 1 Ink frame | +| `recovery` | 1 status line | 0 frames | +| `daily_launch` | 0 lines | 0 frames | +| `interactive` | Flow pipeline, no budget | N frames | +| `non_interactive` | Structured output only | 0 frames | +| `rescue` | Full diagnostic output | 0 frames | + +Detection in `entrypoints/cli.ts`: +- `first_run` → no `~/.claudemesh/state.json` +- `recovery` → previous session ended with non-zero exit + cache exists +- `daily_launch` → cache exists, no flags specifying new behavior, TTY, not `-y` with missing args +- `non_interactive` → `!process.stdout.isTTY` OR `--json` OR `CI` env +- `interactive` → explicit subcommand +- `rescue` → explicit `doctor`/`--help`/`whoami`/`--version` + +### 10.4 `-y` semantics + +`-y` / `--yes` means: walk the flow, for each visible-and-incomplete entry, check if required fields can be filled from flags. If yes, mark complete. If no, fail fast with a clear error naming the missing flag. + +No implicit defaults. No env-var magic. One flag, one meaning. + +### 10.5 Terminal teardown choke point + +Exactly one place handles the wizard → claude handoff: + +```ts +// ui/screens/HandoffScreen.tsx +useEffect(() => { + (async () => { + await inkApp.unmount(); + await inkApp.waitUntilExit(); + resetTerminal(); // ui/terminal.ts + await flushStdout(); + await spawnClaude(claudeArgs); // services/spawn/claude.ts + })(); +}, []); +``` + +`resetTerminal()` emits the full ANSI reset sequence (SGR, cursor, alt-screen, mouse tracking, bracketed paste, raw mode). No other code in the CLI emits ANSI reset — this is the one place. See the storage spec's §18 for shutdown coordination. + +--- + +## 11. Command surface + +Main help shows 8 primary commands plus a "When something's wrong" section. Advanced commands are hidden behind `claudemesh help advanced`. + +``` +$ claudemesh --help + +claudemesh — peer mesh for Claude Code sessions +v1.0.0 + +USAGE + claudemesh start a session in your mesh (creates one if needed) + claudemesh join a mesh from an invite link + claudemesh new create a new mesh + claudemesh invite [email] generate an invite (copies to clipboard) + claudemesh list see your meshes + claudemesh rename rename the current mesh + claudemesh leave [mesh] leave a mesh + claudemesh peers see who's in the current mesh + +When something's wrong + claudemesh doctor diagnose install/config/connection issues + claudemesh whoami show current identity + +More: claudemesh help advanced +``` + +Advanced help exposes: `login`, `logout`, `share`/`publish`, `install`, `uninstall`, `migrate`, `telemetry`, `mcp catalog`, `mcp deploy`, plus the internal `mcp`, `hook`, `seed-test-mesh` commands. + +**Connectors (Telegram, Slack, Discord, GitHub, etc.) are deployed MCPs, not dedicated commands.** A user who wants a Telegram bridge runs: + +``` +claudemesh advanced mcp deploy telegram --env TELEGRAM_BOT_TOKEN=$vault:tg_token --scope mesh +``` + +v2 does NOT ship a dedicated `connect`/`disconnect` command for bridges because that creates two ways to do the same thing (deployed MCP or dedicated bridge). The v1 `apps/broker/src/telegram-bridge.ts` hardcoded bridge is **not ported** to v2 — users who need Telegram deploy the Telegram connector MCP from the default catalog instead. See the connector story in §16.5 and shared-infrastructure §9 + §12.2. + +### 11.1 Flag conventions + +- `-y` / `--yes` — skip all wizard prompts, fail fast on missing required input +- `-q` / `--quiet` — suppress non-essential output +- `-v` / `--verbose` — increase log detail +- `--json` / `--output-format json` — machine-readable output with top-level `schema_version` field +- `--mesh ` — override mesh selection +- `--token ` — override auth token +- `--help` / `-h` — per-command help + +### 11.2 Exit codes + +| Code | Meaning | +|---|---| +| 0 | Success | +| 1 | User cancelled (Ctrl-C, declined) | +| 2 | Authentication failed | +| 3 | Invalid arguments | +| 4 | Network error — **only when the user explicitly required network** (share, login, invite --email, or a broker-backed tool call). Local-first operations never exit 4. | +| 5 | Not found (mesh, invite, peer) | +| 6 | Already exists (slug collision) | +| 7 | Permission denied (role, token scope) | +| 8 | Internal error (bug) | +| 9 | Claude Code binary missing (with stderr hint to install from claude.ai/code) | + +### 11.3 Risk tiers for advanced commands + +Not all advanced commands are equally dangerous. v2 assigns risk tiers: + +| Tier | Commands | Behavior | +|---|---|---| +| **Safe** | `whoami`, `doctor`, `login`, `logout`, `mcp catalog` | No confirmation needed | +| **Reversible** | `telemetry`, `connect`, `disconnect`, `install`, `migrate` | No confirmation needed | +| **Destructive** | `uninstall`, `leave`, `mcp deploy` with non-`peer` scope | Typed confirmation: `claudemesh uninstall` prompts `Type "uninstall" to confirm:` | +| **Developer** | `seed-test-mesh`, `hook`, internal `mcp` | Only runs if `CLAUDEMESH_DEV=1` or called by another process | + +--- + +## 12. MCP server tool surface + +The CLI's MCP server (`claudemesh mcp` stdio entry) exposes ~80 tools organized into ~20 families. Local-first tools operate on SQLite; broker-backed tools route through the broker facade. + +### 12.1 Tool families + +| Family | Tools | Backend | Count | +|---|---|---|---| +| **Messaging** | send_message, list_peers, check_messages, message_status | local outbox + broker | 4 | +| **Profile** | set_profile, set_status, set_summary, set_visible | local + broker | 4 | +| **Groups** | join_group, leave_group | local + broker | 2 | +| **State (local)** | set_state, get_state, list_state | local SQLite | 3 | +| **Memory** | remember, recall, forget | local SQLite | 3 | +| **Files (local + MinIO)** | share_file, get_file, list_files, file_status, delete_file, grant_file_access, read_peer_file, list_peer_files | local blobs + broker MinIO | 8 | +| **Vectors (Qdrant)** | vector_store, vector_search, vector_delete, list_collections | broker Qdrant | 4 | +| **Shared SQL (Postgres)** | mesh_query, mesh_execute, mesh_schema | broker Postgres | 3 | +| **Graph (Neo4j)** | graph_query, graph_execute | broker Neo4j | 2 | +| **Streams** | create_stream, publish, subscribe, list_streams | broker pub-sub | 4 | +| **Contexts** | share_context, get_context, list_contexts | broker Postgres | 3 | +| **Tasks** | create_task, claim_task, complete_task, list_tasks | local SQLite + sync | 4 | +| **Scheduling** | schedule_reminder, list_scheduled, cancel_scheduled | broker scheduler | 3 | +| **Mesh meta (read)** | mesh_info, mesh_stats, mesh_clock, ping_mesh | local + broker | 4 | +| **Mesh clock write** | mesh_set_clock, mesh_pause_clock, mesh_resume_clock | broker | 3 | +| **MCP registry (peer-hosted)** | mesh_mcp_register, mesh_mcp_list, mesh_mcp_remove, mesh_tool_call | broker relay | 4 | +| **MCP registry (broker-deployed)** | mesh_mcp_deploy, mesh_mcp_undeploy, mesh_mcp_update, mesh_mcp_logs, mesh_mcp_scope, mesh_mcp_schema, mesh_mcp_catalog | broker Docker sandboxes | 7 | +| **Skills** | share_skill, get_skill, list_skills, remove_skill, mesh_skill_deploy | broker Postgres + MinIO | 5 | +| **Webhooks** | create_webhook, list_webhooks, delete_webhook | broker HTTP server | 3 | +| **Vault** | vault_set, vault_list, vault_delete | broker encrypted store | 3 | +| **URL watch** | mesh_watch, mesh_unwatch, mesh_watches | broker scheduler | 3 | + +**Total: ~80 tools across 21 families.** Full details for local-first tools in the storage spec §12; full details for broker-backed tools in the shared infrastructure spec §15. + +### 12.2 Middleware + +Every tool call goes through a middleware chain: + +1. **Auth** — validates the caller's token (for broker-backed tools) +2. **Rate limit** — per-tool per-second caps (full table in shared-infra spec §14.3) +3. **Logging** — structured logs to `~/.claudemesh/logs/mcp.log` +4. **Error handler** — catches exceptions, maps to MCP error responses with domain error codes + +--- + +## 13. Visual design system + +**This spec defers to `cli-v2-ux-design.md` for all design details.** The key locked values: + +- **Six semantic color roles**: `primary`, `success`, `error`, `warning`, `muted`, `dim`. No custom hex colors. Works in any terminal theme including light/dark/monochrome. +- **Ten icons**: `✔ ✘ ⚠ ▶ ▸ • ◆ █ ◉ ◎`. All BMP Unicode, ASCII fallback for old terminals. +- **Typography-only branding**: no brand mark, no ASCII art. First-run welcome uses the product name in `primary` color, tagline in `muted`. That's it. +- **Four delight beats** per major version: `"You're in."`, `"Your mesh is live. Anyone with the invite can join."`, `"Sent."`, `"Nice to see you again."` (the 100th-session easter egg). +- **Trust surfaces** (distinct category from delight): telemetry disclosure, audit access, data deletion — neutral voice, leading `~` marker. +- **Main help line**: `claudemesh start a session in your mesh (creates one if needed)` — works for first-run and daily-use states. +- **Error structure**: 1–3 sentences, what/why/action. Exactly one primary recovery action per error. +- **Accessibility matrix**: every state has 3 cues (icon + text + position). At least 2 legible in any a11y config. WCAG contrast targets per role. +- **ICU MessageFormat** for all pluralization and locale-sensitive strings. + +--- + +## 14. Build & ship + +### 14.1 Bundler + +Bun's built-in bundler, target Node (for compatibility with users on non-Bun systems). Output per-entrypoint bundles in `dist/entrypoints/`. + +```ts +// build.ts +import { build } from 'bun'; + +await build({ + entrypoints: ['src/entrypoints/cli.ts', 'src/entrypoints/mcp.ts'], + outdir: 'dist/entrypoints', + target: 'node', + minify: true, + sourcemap: 'external', + format: 'esm', +}); +``` + +### 14.2 Binary + +`bin/claudemesh` is a shell shim that execs Node on `dist/entrypoints/cli.js`. `claudemesh mcp` re-execs into `dist/entrypoints/mcp.js`. + +### 14.3 Honest bundle size targets + +Per the storage spec's §17, the 800 KB JS target was optimistic. Realistic: + +| Metric | Target | +|---|---| +| JS bundle gzipped | ~1.0 MB | +| Native addon per platform (better-sqlite3 + sqlite-vec) | ~2.8–3.5 MB | +| Total npm install (macOS arm64) | 8–10 MB | +| Total npm install (Linux x64) | 9–11 MB | +| Total npm install (Windows x64) | 10–12 MB | +| Cold start to first output | **200–400 ms** on Apple M2 Pro | + +100 ms cold start was fantasy with a native SQLite addon. 200–400 ms is realistic and competitive. + +### 14.4 Tests + +- **Unit**: colocated `*.test.ts`, run via `bun test` +- **Fuzz**: `tests/fuzz/store.test.ts`, 100k random ops per CI run +- **Integration**: `tests/integration/*.test.ts`, against staging broker + ephemeral SQLite, `INTEGRATION=1 bun test` +- **E2E**: `tests/e2e/*.test.ts`, Playwright drives browser device-code flow, `E2E=1 bun test` +- **Benchmarks**: `tests/bench/*.bench.ts`, tracked over time, regression >20% fails CI + +### 14.5 Publish + +```bash +# after atomic swap (post phase 10) +cd apps/cli +bun test && bun build.ts +pnpm publish --access public --no-git-checks +``` + +--- + +## 15. Migration from v1 + +### 15.1 On-disk migration runner + +`migrations/index.ts` exports an ordered list of migrations. On CLI start, `services/config/read.ts` detects the config version, runs pending migrations, and writes back. Failures halt startup with a clear error and preserve the old file as `config.json.backup`. + +Specific migrations: + +1. **`0001-v1-config.ts`** — transform v1 `config.json` shape (flat keys) to v2 shape (namespaced under `mesh`, `auth`, `ui`) +2. **`0002-v1-auth.ts`** — migrate any existing tokens from v1 locations (unlikely — v1 has no CLI auth) + +### 15.2 The v1 Telegram bridge (`apps/broker/src/telegram-bridge.ts`) + +**Not ported.** v2 does not include the v1 hardcoded Telegram bridge. The v2 connector story is: + +- All connectors (Telegram, Slack, Discord, GitHub webhooks, Linear, Notion, etc.) ship as **deployed MCP servers** via the tier-2 shared infrastructure MCP registry (see `cli-v2-shared-infrastructure.md` §9 and §12.2) +- OAuth / token credentials live in the per-peer vault (`vault_set`) and are injected into the connector container at startup via `$vault:` env var substitution +- Connector MCPs run in hardened Docker sandboxes with egress-controlled networks (see shared-infrastructure §9.4.1) +- The default MCP catalog already includes tier-2 entries for `github`, `gitlab`, `slack`, `linear`, `notion`, `stripe`, `google-drive`, `google-maps` — these are claudemesh-audited connectors ready for one-command deployment + +**User migration path for Telegram users**: +1. On v2 launch, the migration runner detects an active v1 telegram bridge in the user's mesh config +2. Prints a one-time notice: `"The v1 Telegram bridge is no longer built-in. Deploy the Telegram connector MCP with:\n claudemesh advanced mcp deploy telegram --env TELEGRAM_BOT_TOKEN=$vault:tg_token --scope mesh\nYour existing Telegram Bot token can be stored via claudemesh advanced vault set tg_token "` +3. The user runs the one-liner, and Telegram resumes working with the same bot token, same chat routing, but now sandboxed + egress-controlled + +**Why this is a breaking change for Telegram users**: they must re-deploy the connector manually. Acceptable because (a) the new deployment is more secure, (b) it unifies connector handling, and (c) v1.0.0 is allowed to break pre-1.0 patterns (see §3.9). + +**Shipping order**: the v1.0.0 default MCP catalog ships WITHOUT a `telegram` entry initially (because there's no well-known upstream Anthropic MCP for Telegram). A claudemesh-maintained `claudemesh-mcp-telegram` package ships as a separate npm package in parallel with v1.0.0, and the catalog adds it in v1.0.1. + +### 15.3 v1 → v2 cutover plan + +1. v2 scaffolded as `apps/cli-v2/` (Phase 0) +2. v2 fleshed out by Opus 4.6 1M against v1 as reference (Phases 1–9) +3. v2 reaches feature parity (Phase 9) +4. Atomic swap: `rm -rf apps/cli && mv apps/cli-v2 apps/cli` (Phase 10) +5. v0.11.0-alpha.1 published +6. Feedback loop → v0.11.0 stable +7. After 30 days stable → v1.0.0 + +--- + +## 16. Implementation phases + +Each phase ends with a shippable release. No "PR of doom" — every phase is a thing users can install and try. + +### Phase 0 — Scaffolding (1–2 days) + +- Create `apps/cli-v2/` with the full file tree +- Empty files with module-header comments pointing to relevant spec sections +- Type stubs that throw `NotImplementedError` +- `package.json`, `tsconfig.json`, `bunfig.toml`, `dependency-cruiser.config.js`, `.eslintrc.cjs`, `biome.json` +- `CHANGELOG.md` stub for v0.11.0-alpha.1 +- README pointers to all 5 specs +- CI passes (type-check green) + +### Phase 1 — Foundation layers (2–3 days) + +- `types/`, `constants/`, `utils/`, `locales/` fully filled in +- `services/crypto/`, `services/device/`, `services/clipboard/`, `services/config/`, `services/state/`, `services/api/client.ts`, `services/update/`, `services/i18n/`, `services/lifecycle/` +- Facade files for each service +- Unit tests for each +- No user-visible change yet + +### Phase 2 — Local store (4–5 days) + +- `services/store/` with SQLite connection, all tables, migrations +- `services/store/write-queue.ts` with async op handling +- `services/store/lamport.ts` with atomic tick +- `services/store/memory.ts`, `state.ts`, `vectors.ts`, `files.ts`, `tasks.ts`, `peers.ts`, `outbox.ts`, `inbox.ts` +- Full unit tests with 100% coverage per storage spec §19 +- Fuzz test harness + +### Phase 3 — Auth (3–4 days) + +- `services/auth/` full device-code + PAT implementation +- `services/api/my.ts`, `public.ts` +- Backend work (web app): Better Auth apiKey plugin, device-code endpoints, dashboard PAT UI +- CLI commands: `login`, `logout`, `whoami` +- Integration tests against staging +- **v0.11.0-alpha.1 published** — auth works, personal mesh works offline + +### Phase 4 — Mesh core + broker client (4–5 days) + +- `services/mesh/` with bootstrap, create, publish, join, list, rename, leave +- `services/invite/` with generate, parse-url, claim +- `services/broker/ws-client.ts`, `peer-crypto.ts`, reconnect logic +- `services/broker/shared-sql.ts`, `shared-graph.ts`, `shared-vectors.ts`, `shared-files.ts`, `mcp-registry.ts`, `url-watch.ts`, `vault.ts` — WS wrappers for broker-backed tools +- CLI commands: `new`, `invite`, `list`, `rename`, `leave`, `peers`, `share`, `publish` +- Integration tests +- **v0.11.0-alpha.2 published** — all mesh operations work + +### Phase 5 — Sync daemon (3–4 days) + +- `services/broker/sync-daemon.ts` with outbox drain + inbox apply +- Conflict resolution rules per storage spec §13 +- Offline tests: disconnect broker mid-session, verify all local-first ops work, reconnect, verify convergence +- Broker epoch change handling +- **v0.11.0-alpha.3 published** — local-first is real + +### Phase 6 — Wizard + UI (4–5 days) + +- `ui/` full flow pipeline: store, router, flows, screen-registry, primitives, screens, overlays +- `ui/terminal.ts` resetTerminal() choke point +- `ui/keybindings.ts` with Tab as no-op +- `ui/session-kind.ts` with all 6 modes +- All screens typography-only, no brand mark +- HandoffScreen as the single teardown point +- Accessibility matrix implementation (token-signal + VoiceOver patterns) +- **v0.11.0-beta.1 published** — wizard UX matches the design spec + +### Phase 7 — MCP server (5–6 days) + +- `mcp/` full stdio server +- All 21 tool families under `mcp/tools/`: + - Local-first tool handlers call `services/store/facade.ts` + - Broker-backed tool handlers call `services/broker/facade.ts` +- Middleware layer (auth, rate-limit, logging, error handler) +- Handlers for stdio and JSON-RPC +- Per-tool integration tests +- **v0.11.0-beta.2 published** — Claude Code gets the full ~80-tool surface + +### Phase 8 — Commands + CLI polish (3–4 days) + +- `commands/` all verbs implemented as thin adapters +- `cli/` I/O plumbing (print, structured-io, exit, update-notice, handlers, output) +- `commands/advanced/mcp/catalog.ts` and `deploy.ts` for default MCP catalog +- Help text in en + es with ICU +- Typo recovery (levenshtein-based) +- Clipboard-aware launch +- Risk tiers for advanced commands +- **v0.11.0-rc.1 published** — feature complete + +### Phase 9 — Migration + docs (2–3 days) + +- `migrations/` runner + v1→v2 migrations +- README rewrite for `apps/cli-v2/` and the root +- CHANGELOG with full v0.11.0 entry +- `docs/quickstart.md`, `docs/architecture.md`, `docs/security.md` updated +- Broker deployment docs updated (references shared-infra spec §13) +- Migration guide for v1 users upgrading +- **v0.11.0 stable published** + +### Phase 10 — Coordinated swap + v1.0.0 (1–2 days) + +Rather than a destructive `rm -rf` atomic swap, use a two-step coordinated cutover that preserves git history and doesn't break open PRs: + +**Day 1 — announce freeze + sibling-mode verification**: +1. Announce a merge freeze on `apps/cli/` (legacy) — close all open PRs against it or rebase them onto `apps/cli-v2/` first. +2. Run the v0.11.0 stable build from `apps/cli-v2/` in parallel with v0.10.x from `apps/cli/`. Both packages coexist during this phase — v2 ships as `claudemesh-cli@0.11.0-stable` while v1 continues as `claudemesh-cli@0.10.x` for legacy users. +3. Monitor v0.11.0 stable in the wild for at least 1 week. Revert if major issues surface. + +**Day 2 — rename cutover**: +1. `git mv apps/cli apps/cli-legacy-v1` (preserves history, marks the old tree explicitly) +2. `git mv apps/cli-v2 apps/cli` (v2 becomes the canonical name) +3. Update CI workflows, `pnpm-workspace.yaml`, `turbo.json`, `CLAUDE.md`, root `README.md`, `.github/CODEOWNERS`, and any hardcoded paths in a single atomic commit. +4. Bump `apps/cli/package.json` to `1.0.0` +5. Publish to npm: `pnpm publish --access public` +6. Tag `v1.0.0` on the commit +7. Delete `apps/cli-legacy-v1/` in a follow-up commit after 30 days (by which point any outstanding PRs would have been updated or abandoned). + +This approach: +- **Preserves git history**: `git log --follow` continues to work across the rename +- **Doesn't break open PRs**: they surface as rename conflicts, not delete conflicts, which git handles gracefully +- **Allows rollback**: if v1.0.0 has a catastrophic bug in the first 30 days, `apps/cli-legacy-v1/` is still in the tree and can be restored with a single `git mv` +- **No "destructive delete" moment**: the atomic commit is a rename, not a `rm -rf` + +### Total timeline + +**Realistic: 32–42 days** of focused work for one developer. That's 6–8 weeks at a steady pace with review cycles and feedback loops. + +**Compressed with Opus 4.6 1M and aggressive parallelism**: +- Phases 0–5 (architectural skeleton + core services + auth + mesh + sync) in **5–7 days** by leveraging the 1M context window for holistic passes +- Phases 6–8 (wizard + MCP server + commands) in **4–5 days** +- Phases 9–10 (polish + ship) in **1–2 days** + +**Compressed total: ~10–14 days** minimum with careful spec adherence. The earlier "8–10 days" estimate was optimistic; **12 days is a more honest floor** given the ~200 file scaffold and ~15k LOC of implementation + tests. + +--- + +## 17. Testable acceptance criteria for v1.0.0 + +Every criterion has a threshold, a test environment, and can be validated by running a specific command. + +### First-run + +- [ ] **`claudemesh` on a fresh machine with no config, no auth, and no network** bootstraps a personal mesh offline in **under 1 second** (measured on Apple M2 Pro with fresh `~/.claudemesh/` deletion) +- [ ] **`claudemesh` on a fresh machine with network** opens the browser, completes device-code flow, creates a personal mesh, and launches Claude Code in **under 8 seconds** end to end (measured from `claudemesh` Enter to Claude Code's first prompt) +- [ ] User is never asked to type a mesh name, display name, or role on first run (grep the wizard screens for `TextInput` usage) +- [ ] User is never shown more than one wizard screen on first run (trace the flow pipeline for `Flow.Launch` with `session_kind=first_run`) + +### Daily use + +- [ ] **`claudemesh` on a machine with a last-used mesh** adds **less than 400ms of CLI overhead** before Claude Code takes over (measured: `time claudemesh` minus the `claude` binary's own startup time) +- [ ] Zero frames rendered for `session_kind=daily_launch` (verify by spying on Ink's `render` calls) +- [ ] Last-used mesh, name, and role are applied silently (no announcement strings) + +### Sharing + +- [ ] `claudemesh invite` on a shared mesh copies a working URL to the system clipboard (verify with `pbpaste` / `xclip`) +- [ ] `claudemesh invite alice@example.com` sends an email with the same URL (requires email provider wired up — verified via mock in CI, real in staging) +- [ ] `claudemesh share` converts a personal mesh to shared, triggers device-code auth if needed, and prints the first invite URL +- [ ] Invites expire 7 days by default, overridable with `--expires` + +### Joining + +- [ ] `claudemesh ` joins as a guest with no auth required +- [ ] `claudemesh` with an invite URL in the clipboard offers to join +- [ ] Guest meshes appear in `claudemesh list` +- [ ] `claudemesh leave` removes a joined mesh from local state + +### Auth + +- [ ] `claudemesh login` on a fresh machine completes end-to-end in **under 30 seconds** +- [ ] `claudemesh login --token ` works non-interactively +- [ ] `CLAUDEMESH_TOKEN=` works for all commands +- [ ] `claudemesh logout` revokes server-side and deletes local credentials +- [ ] `claudemesh whoami` shows identity, mesh count, and token source +- [ ] Expired token triggers silent re-auth on next command (test: force-expire the token, run any command, assert no user prompt) +- [ ] Revoked token produces a clear error and prompts re-login + +### Local-first + +- [ ] Every local-first tool works with broker disconnected (verified via fuzz test that toggles network mid-session) +- [ ] Memory, vectors (personal), state, files, tasks persist across CLI restarts +- [ ] Offline changes sync automatically when broker returns (verify via integration test) +- [ ] No tool operation loses data on broker outage (fuzz test assertion) +- [ ] `claudemesh doctor` reports local store integrity + +### Shared infrastructure + +- [ ] `mesh_execute("CREATE TABLE test (id int)")` creates a table in the per-mesh Postgres schema (integration test against staging broker) +- [ ] `mesh_query("SELECT * FROM test")` returns rows +- [ ] Cross-mesh query attempt (e.g. trying to `SELECT FROM mesh_other.test`) fails with permission denied +- [ ] `graph_execute("CREATE (n:Bug {id: 1})")` works in the per-mesh Neo4j database +- [ ] `vector_store` + `vector_search` in the same collection returns semantically similar results +- [ ] `share_file` with a >64 KB file uploads to MinIO and returns a file ID +- [ ] `mesh_mcp_deploy({ catalog: "github", env: { GITHUB_PERSONAL_ACCESS_TOKEN: "$vault:test" }, scope: "mesh" })` deploys a sandboxed GitHub MCP server +- [ ] The deployed GitHub MCP responds to `mesh_tool_call("github", "get_issue", { repo, number })` +- [ ] `vault_set("test", "secret")` stores an encrypted credential; `vault_list()` returns metadata but not the value +- [ ] `mesh_watch("https://example.com", { interval: 5 })` creates a watch; simulated content change triggers a notification + +### MCP server + +- [ ] Claude Code discovers all ~80 tools via stdio (verify by counting `tools/list` response entries) +- [ ] Tools respect RBAC (guest can't run `mesh_execute`, etc.) +- [ ] Rate limits enforced (101st `mesh_execute` in a minute returns rate-limit error) +- [ ] Claude Code status line shows mesh name and peer count when in a shared mesh + +### Visual / UX + +- [ ] All colors come from `ui/styles.ts` — CI lint rule `no-inline-colors` passes (zero violations) +- [ ] All icons come from `Icons` — CI lint rule `no-raw-glyphs` passes +- [ ] Main `--help` shows exactly 8 commands plus the "When something's wrong" section +- [ ] `help advanced` shows the rest +- [ ] Errors are 1–3 sentences, user-actionable, no stack traces (per-error assertion in test suite) +- [ ] Typo recovery suggests correct mesh slugs for levenshtein distance ≤ 2 +- [ ] First-run welcome is typography-only, no brand mark, no boxes + +### Build / ship + +- [ ] Bundle size (gzipped JS) **under 1.2 MB** on the CI runner (CI fails on regression >20%) +- [ ] Cold start **under 400 ms** on Apple M2 Pro, **under 600 ms** on Linux x64 (GitHub Actions `ubuntu-latest`), **under 800 ms** on Windows x64 (GitHub Actions `windows-latest`) — measured by `tests/bench/cold-start.bench.ts` +- [ ] `bun test` passes with **80%+ branch coverage** on `services/*` excluding `services/broker/*` (broker is integration-tested only) +- [ ] `services/broker/*` has **70%+ branch coverage** via integration tests against staging backends +- [ ] Integration tests pass against staging broker with all four backends (Postgres, Neo4j, Qdrant, MinIO) on Linux x64 +- [ ] E2E tests pass for browser device-code flow on macOS arm64, Linux x64, Windows x64 +- [ ] Published to npm as `claudemesh-cli@1.0.0` with platform-specific native addons for `darwin-arm64`, `darwin-x64`, `linux-x64`, `linux-arm64`, `win32-x64` +- [ ] Dependency-cruiser + ESLint boundaries + 3 custom rules (`no-index-reexport-internal`, `type-imports-count-as-edges`, `no-dynamic-service-imports`) enforce the dep graph in CI with zero violations +- [ ] `tests/unit/facade-boundaries-classification.test.ts` passes — verifies pattern precedence +- [ ] `tests/unit/facade-boundary-scan.test.ts` passes — AST-based scan of all facade output types for forbidden keys + +### Security (new category) + +- [ ] Token storage: `~/.claudemesh/auth.json` is `0600` on write, warns on drift, never logged +- [ ] TLS: all HTTPS connections use `checkServerIdentity` with full certificate validation; no `rejectUnauthorized: false` +- [ ] Vault access: deployed MCPs receive secrets only via env injection, scrubbed from logs, never appear in `mcp_logs` output +- [ ] Cross-mesh isolation tests pass for SQL (verified: `SELECT * FROM "mesh_other".table` is rejected), graph (verified: `MATCH (n) WHERE n.mesh_id = "other"` returns empty on Enterprise + refused on Community), vectors (verified: `scope: self` doesn't leak other peers' vectors), files (verified: MinIO bucket-per-mesh presigned URL cannot access other buckets) +- [ ] Deployed MCP sandbox tests pass: container cannot escape (read-only root confirmed), cannot reach private IPs (`169.254.169.254` metadata endpoint blocked by egress proxy), cannot access host Docker socket (confirmed via attempted mount) +- [ ] Path traversal tests pass: `share_file` with `path: "../../etc/passwd"` rejected, `vault_set` with `mount_path: "../../etc/passwd"` rejected, `files.blob_path` validated on read +- [ ] Rate limiting verified: 101st `mesh_execute` in a minute returns `rate_limited` error; limits apply per peer, not per mesh +- [ ] URL watch SSRF: `mesh_watch` against `169.254.169.254`, `10.0.0.1`, `127.0.0.1` rejected at creation; DNS rebinding attempts disable the watch + +### Migration + +- [ ] v0.10.5 users get auto-migrated on first v2 run +- [ ] Old config file preserved as `config.json.backup` +- [ ] `claudemesh advanced migrate` available for manual re-run +- [ ] Migration never loses joined meshes or local state +- [ ] Schema migration from v2 → v2.1 (hypothetical) preserves backward compat: v2 reading a v2.1 database works for unchanged tables, v2.1 reading a v2 database runs the migration runner on first launch + +### HA / outage behavior (new category) + +- [ ] Broker outage during active session: Claude Code session continues, local-first tools work, broker-backed tools return clear error `"Can't reach the mesh broker right now."` +- [ ] Broker reconnect: sync daemon resumes automatically with exponential backoff, outbox drains on reconnect +- [ ] Broker restart + epoch change: inbox dedupe works via `(mesh_slug, broker_epoch, broker_seq)`, no duplicate apply, no gap +- [ ] Postgres outage: broker returns a clear error, CLI retries with backoff, no data corruption +- [ ] Neo4j outage (shared mesh only): `graph_*` tools fail with clear message, other tools unaffected +- [ ] Qdrant outage: `vector_*` tools fail, local SQLite vectors (personal mesh) still work +- [ ] MinIO outage: file upload/download fails with clear message, local blob store unaffected + +### Migration + +- [ ] v0.10.5 users get auto-migrated on first v2 run +- [ ] Old config file preserved as `config.json.backup` +- [ ] `claudemesh advanced migrate` available for manual re-run +- [ ] Migration never loses joined meshes or local state + +### i18n / a11y + +- [ ] All user-visible strings in `locales/en.ts` and `locales/es.ts` +- [ ] `CLAUDEMESH_LOCALE=es` switches the CLI to Spanish +- [ ] `NO_COLOR=1` disables colors; all states remain legible via icon + bold +- [ ] `FORCE_COLOR=1` enables colors in non-TTY contexts +- [ ] Token-signal matrix verified for every screen (CI test) + +### Security + +- [ ] `~/.claudemesh/` is `0700` +- [ ] `auth.json` and `keys/*` are `0600` +- [ ] Permission drift produces a warning on read and is fixed on write +- [ ] Tokens are never logged, never printed except at creation (grep test on logs) +- [ ] `cm_` prefix enables secret scanning +- [ ] Every broker-backed tool call is audit-logged +- [ ] Rate limits enforced per tool per peer + +### Telemetry + +- [ ] Opt-out notice shown once on first run (Trust surface, not delight) +- [ ] `claudemesh advanced telemetry off` disables immediately +- [ ] Zero PII in telemetry events (schema validation) + +### Ownership + +Each criterion above has a designated owner (CLI-Dev, Web-Dev, Backend-Dev, or Orchestrator) tracked in `.artifacts/backlog/2026-04-10-v1.0.0-acceptance.md`. + +--- + +## 18. Open questions + +1. **Better Auth `apiKey` plugin version**: confirm the monorepo's Better Auth version supports `enableMetadata: true`. Verify in Phase 0. If not, upgrade or fork. +2. **Atomic swap timing**: tag v0.11.0 on the final pre-swap alpha, tag v1.0.0 on the swap commit. +3. **Email sending for `claudemesh invite `**: does the web app already have a transactional email path (Resend/Postmark)? If yes, reuse. If not, Phase 4 includes wiring it. +4. **Self-hosted broker support**: first-class in v1.0.0 or defer to v1.1+? Recommendation: document the config field for v1.0.0 (`broker_url` in `config.json`), full self-hosting guide in v1.1. +5. **MCP tool surface parity with v1**: confirmed ~80 tools, all covered by the tool families in §12. +6. **Windows clipboard detection**: use `clipboardy` (small dep) or native PowerShell? Recommendation: `clipboardy`. +7. **Neo4j edition**: Enterprise (multi-database) or Community (single DB + label filtering)? Recommendation: document both, warn community users of the security implications. + +--- + +## 19. Explicitly out of scope for v1.0.0 + +These are valuable features deferred to v1.1+. Listed here to prevent scope creep. + +- Plugin system — users can't extend the CLI with custom commands +- Remote session resume — can't pick up a session on a different machine +- Multi-account switching — one identity per machine +- Native keychain integration — tokens stay in 0600 files +- Terminal multiplexer awareness — no special tmux/screen integration +- Voice or vim modes in the CLI +- Custom prompt templates +- Scheduled / cron-style automations outside `claudemesh advanced schedule` +- Full dashboard embedded in terminal +- Mobile companion +- Self-update mechanism — `npm i -g claudemesh-cli@latest` is the update path +- Mesh archival / soft delete +- Fine-grained token scopes +- OAuth providers other than Better Auth's built-ins +- Hybrid logical clocks (plain Lamport is sufficient) +- SQLite encryption at rest +- Time-series memory queries +- Vector re-embedding incremental mode +- Import support for arbitrary lamport-stamped data + +--- + +## 20. Future roadmap (v1.1+) + +Rough order of expected value: + +1. **v1.1**: Native keychain integration (macOS Keychain, Windows Credential Manager, GNOME Keyring) +2. **v1.2**: Plugin system with manifest format and install command +3. **v1.3**: Mesh archival + soft delete +4. **v1.4**: Multi-machine personal mesh sync (opt-in, account-level encryption) +5. **v1.5**: Token scopes (`mesh:read`, `mesh:write`, `invite:create`, etc.) +6. **v1.6**: Self-hosted broker first-class support +7. **v1.7**: Peer discovery over LAN (mDNS/Bonjour) for air-gapped meshes +8. **v1.8**: Fleet management dashboard (multiple machines per user) +9. **v2.0**: Plugin marketplace, web extension points, SDK for third-party tools + +Each is a separate spec written closer to implementation time. + +--- + +## 21. The one-paragraph summary + +**claudemesh-cli v2 is a complete rewrite that ships a zero-friction, hybrid local-first + broker-backed, Apple-grade terminal UX on top of a feature-folder architecture enforced by dependency rules and facade boundaries. A new user runs `claudemesh` once, clicks a browser button, and is in Claude Code with a working mesh in under 8 seconds. A returning user runs `claudemesh` and the terminal becomes Claude Code with under 400ms overhead. Per-peer data (memory, state, tasks, personal files) lives in local SQLite with exactly-once sync via lamport-stamped outbox/inbox. Shared-mesh data (SQL tables, graph, vector search, large files, deployed MCP servers) lives on broker-backed services (Postgres, Neo4j, Qdrant, MinIO, Docker sandboxes) with schema-per-mesh isolation and RBAC. Auth is lazy — triggered only by publish, invite, or explicit API calls. The wizard is a declarative flow pipeline with overlay-stack interrupts and a single teardown choke point. The visual system is six semantic color roles, ten icons, typography-only branding. The command surface is eight primary verbs plus an advanced namespace. The default MCP catalog bundles 19 curated official servers for one-command deployment. The codebase is ~200 files organized by feature with strict layer boundaries. It ships as `apps/cli-v2/` scaffolded against v0.10.5 as reference, atomically swapped in once complete, and published as `claudemesh-cli@1.0.0` after 32–42 days of realistic work (or 10–14 days aggressive with Opus 4.6 1M).** + +--- + +**End of spec.** diff --git a/.artifacts/specs/2026-04-10-cli-v2-pass2-local-first-storage.md b/.artifacts/specs/2026-04-10-cli-v2-pass2-local-first-storage.md new file mode 100644 index 0000000..227d128 --- /dev/null +++ b/.artifacts/specs/2026-04-10-cli-v2-pass2-local-first-storage.md @@ -0,0 +1,2060 @@ +# claudemesh-cli v2 Pass 2 — Local-first storage + +> ⚠️ **This document describes v2 Pass 2 work entirely — NOT the Pass 1 scope.** +> +> For the v2 Pass 1 implementation target, see **`2026-04-11-cli-v2-pass1.md`**. +> +> Pass 1 has NO local SQLite source of truth, NO Lamport clock, NO sync daemon, NO write queue, NO conflict resolution, NO publish transaction. v2 Pass 1 uses the broker as the authority for all mesh data (same as v1). Local caching, if any, is ephemeral and read-only. +> +> This entire document describes Pass 2 work that ships later — when the local-first architectural improvement is prioritized over other backlog items. Until then, do not reference this spec for Pass 1 implementation decisions. + +**Status:** Pass 2 future reference — NOT the Pass 1 implementation target +**Created:** 2026-04-10 +**Consolidated:** 2026-04-10 (post-reviews, critical bugs fixed inline) +**Companion to:** `2026-04-10-cli-v2-final-vision.md` (§7 defers to this document for all storage details) +**Purpose:** Complete specification of the local SQLite store, sync protocol, conflict resolution, and broker integration. Every distributed-systems correctness concern lives here. + +This document has been reviewed twice (generic architecture review + GPT-5.3-Codex distributed systems review) and all critical bugs are fixed inline below. When the architecture spec body conflicts with this document, this document wins for storage concerns. + +--- + +## Table of contents + +1. Design principles +2. Runtime and dependencies +3. File layout and permissions +4. Lamport clock algorithm (atomic, race-free) +5. Schema (complete, with all constraints) +6. Vector storage with model fingerprinting +7. Memory recall semantics +8. File blob storage and garbage collection +9. Personal → shared publish upgrade protocol +10. Task claim semantics and audit events +11. Single-writer concurrency model +12. Sync protocol (outbox, inbox, broker epoch, ordering) +13. Conflict resolution per tool family +14. Offline behavior +15. Error recovery +16. Migration between schema versions +17. Bundle size accounting (honest) +18. Shutdown and drain protocol +19. Testing strategy +20. Operational concerns +21. Open questions deferred to v1.1+ + +--- + +## 1. Design principles + +### P1 — SQLite is the source of truth for mesh data + +Every stateful operation writes locally first. The broker is a sync channel. When the broker is unreachable, the CLI is fully functional for data the user already has. + +### P2 — Single writer, many readers + +SQLite WAL mode + a single-writer queue. No "database is locked" errors. No nested transactions across daemon and tool handlers. + +### P3 — Last-writer-wins with total order via (lamport, peer_id_bytes) + +Cross-peer conflicts resolved by comparing `(lamport, peer_id_bytes)` tuples. `peer_id` is compared **byte-wise** on canonical UTF-8 (not `localeCompare`) to guarantee deterministic ordering across hosts with different locales or ICU versions. + +### P4 — Idempotency at every boundary + +Inbox operations are deduplicated by `(broker_epoch, broker_seq)`. Outbox operations carry a stable `client_op_id` (UUIDv7) that the broker honors for dedupe. Retry is always safe. + +### P5 — Append-only where possible + +Vectors, audit events, and message history are append-only. Deletes are tombstones, not row removal. + +### P6 — Content-addressed blobs + +Files over 64 KB live outside SQLite, addressed by SHA256. Refcounted for GC. + +### P7 — Explicit over implicit + +Every query that could cross peers has an explicit scope (`self`, `peer:`, `all`). No magic global queries. + +### P8 — Fail-safe to offline + +Tool handlers always succeed locally. If the sync daemon dies, the tool surface still works. If SQLite dies, the CLI surfaces a clear error and refuses to proceed (no corrupted-state operations). + +### P9 — Every write is inside a transaction, through the queue + +No "loose" writes. Every state-changing SQL statement runs inside a transaction enqueued on the single-writer queue. The lamport tick is part of the same transaction. + +### P10 — Sync durability via outbox, not "fire and forget" + +An operation is not "done" until its outbox row is `synced_at != null`. Broker acks include a stable server identifier that the outbox records. Crash-after-send-before-ack replays are idempotent on the broker side via `client_op_id`. + +--- + +## 2. Runtime and dependencies + +### 2.1 SQLite engine + +**`better-sqlite3`** for the tool handler path. Synchronous API, WAL-friendly, no native async overhead per call. + +Rejected alternatives: +- `node:sqlite` — experimental in Node 22, release cadence unclear +- `bun:sqlite` — Bun-only, but distribution target is Node +- `libsql` — larger, more deps + +### 2.2 Vector extension + +**`sqlite-vec`** (not `sqlite-vss`): +- Actively maintained (vss is stale) +- Smaller binary (~200KB vs ~2MB) +- Pure C, no FAISS dependency +- Simpler `vec0` virtual table API + +Loaded at runtime via `db.loadExtension('sqlite-vec')`. The extension binary is bundled per-platform in the npm package under `node_modules/claudemesh-cli/vendor/sqlite-vec-.`. + +### 2.3 Schema migration runner + +Custom, not `drizzle-kit`. The migration surface is tiny (~5 migrations for v1.0.0), deterministic at startup, and we already write types by hand. + +### 2.4 No ORM + +Hand-written SQL with parameterized placeholders. Typed query wrappers live in `services/store/query.ts`. + +### 2.5 UUID generation + +`uuidv7` from a small pure-JS lib (≈ 2KB). UUIDv7 gives temporal ordering in IDs, which helps index locality and debugging. + +--- + +## 3. File layout and permissions + +``` +~/.claudemesh/ +├── data.db # 0600 — main SQLite database +├── data.db-wal # 0600 — write-ahead log (created by SQLite) +├── data.db-shm # 0600 — shared memory file (created by SQLite) +├── blobs/ # 0700 — content-addressed blob store +│ ├── a1/ +│ │ └── a1b2c3...sha256 # 0600 +│ └── f5/ +│ └── f5e4d3...sha256 # 0600 +└── ... +``` + +**Permission enforcement**: +- At startup, `services/store/db.ts` verifies file modes match baseline; fixes drift with a logged warning +- New files created with umask `077` (0600 files, 0700 dirs) +- `blobs/` subdirectory naming uses first two hex chars of SHA256 to keep per-directory file counts manageable + +--- + +## 4. Lamport clock algorithm + +This is the part the original spec had wrong. The canonical rules here are **load-bearing for correctness**. Every storage write MUST follow them. + +### 4.1 The invariant + +Every peer maintains a per-mesh Lamport counter in `lamport_clocks(mesh_slug, value)`. The counter MUST satisfy: + +``` +∀ local_event: counter_after = counter_before + 1 +∀ merged_event (from remote peer with lamport L): + counter_after = max(counter_before, L) + 1 +``` + +### 4.2 Atomic tick implementation + +The original `SELECT` then `INSERT OR REPLACE` pattern races between concurrent writers. The correct implementation uses a single atomic `UPDATE ... RETURNING`: + +```ts +// services/store/lamport.ts + +export class LamportRaceError extends Error { + readonly code = 'LAMPORT_RACE'; + constructor(meshSlug: string) { + super(`tickLamport: mesh ${meshSlug} row disappeared between INSERT and UPDATE`); + } +} + +export class LamportUnknownMeshError extends Error { + readonly code = 'LAMPORT_UNKNOWN_MESH'; + constructor(meshSlug: string) { + super(`tickLamport: mesh ${meshSlug} does not exist in mesh table`); + } +} + +/** + * Atomically tick the lamport clock for a mesh. MUST be called inside the + * transaction that writes the domain row it's stamping, AND that transaction + * MUST be enqueued on the single-writer queue. + * + * Defensive: validates the mesh exists, validates the UPDATE affected exactly + * one row, and throws clearly-typed errors on any anomaly. + * + * @param db The writer connection (use write queue) + * @param meshSlug The mesh whose clock to tick + * @param incomingLamport The remote event's lamport (for merge) or undefined for local + * @returns The new lamport value to stamp on the row + * @throws LamportUnknownMeshError if the mesh slug does not exist + * @throws LamportRaceError if the UPDATE matched zero rows (should never happen) + */ +export function tickLamport( + db: Database, + meshSlug: string, + incomingLamport?: number, +): number { + // Validate the mesh exists before touching the clock + const meshExists = db.prepare('SELECT 1 FROM mesh WHERE slug = ?').get(meshSlug); + if (!meshExists) { + throw new LamportUnknownMeshError(meshSlug); + } + + // Ensure the lamport_clocks row exists for this mesh + db.prepare(` + INSERT INTO lamport_clocks (mesh_slug, value) + VALUES (?, 0) + ON CONFLICT(mesh_slug) DO NOTHING + `).run(meshSlug); + + // Atomic UPDATE ... RETURNING: compute max(current, incoming) + 1 in SQL + const base = incomingLamport ?? 0; + const result = db.prepare(` + UPDATE lamport_clocks + SET value = MAX(value, ?) + 1 + WHERE mesh_slug = ? + RETURNING value + `).get(base, meshSlug) as { value: number } | undefined; + + // Defensive: RETURNING may yield nothing if the row was deleted between + // the INSERT and UPDATE (e.g. concurrent mesh deletion outside the queue). + // This should be impossible under the single-writer contract, but we check + // anyway and throw a clear error rather than crashing on .value of undefined. + if (!result) { + throw new LamportRaceError(meshSlug); + } + return result.value; +} +``` + +### 4.3 Caller contract + +**Every caller of `tickLamport` MUST**: + +1. Be inside a `db.transaction(() => { ... })` block +2. Enqueue the transaction through the single-writer queue +3. Stamp the returned value on the domain row in the same transaction +4. Never call `tickLamport` twice in the same transaction (one tick per logical event) + +**Failure mode if rule 1 is violated**: the counter updates but the domain row write races separately, breaking the invariant. CI tests enforce this by mocking the write queue and asserting `tickLamport` is always called inside `queue.enqueue(...)`. + +### 4.4 Rollback semantics + +If the enclosing transaction rolls back, the lamport update rolls back with it. The counter goes back to its previous value, and the logical event is treated as if it never happened. This is correct **only if no external effect escaped the transaction** — e.g. no network call was made, no file was written outside the DB. The sync daemon guarantees this by enqueueing outbox rows inside the same transaction as the domain write. + +### 4.5 Tiebreaker: bytewise peer_id comparison on NFC-normalized UTF-8 + +When two operations have the same lamport value, the tiebreaker is byte-wise comparison of the **NFC-normalized** UTF-8 representation of `peer_id`. + +**Normalization is mandatory.** Without NFC normalization, two peers with visually-identical display names encoded differently (NFC vs NFD — e.g. "café" as `café` vs `cafe\u0301`) produce different byte sequences and thus different conflict winners. NFC is enforced at peer registration and before every comparison. + +```ts +// services/store/conflict.ts + +/** Normalize a peer_id to NFC before any comparison or storage. */ +export function normalizePeerId(peerId: string): string { + return peerId.normalize('NFC'); +} + +export function compareOps( + a: { lamport: number; peer_id: string }, + b: { lamport: number; peer_id: string }, +): number { + if (a.lamport !== b.lamport) return a.lamport - b.lamport; + // Both peer_ids MUST be NFC-normalized; this is enforced at write time. + // Buffer.compare is stable across Node/Bun, little-endian/big-endian, and + // platform-independent because UTF-8 byte sequence is canonical. + return Buffer.compare( + Buffer.from(a.peer_id, 'utf8'), + Buffer.from(b.peer_id, 'utf8'), + ); +} + +/** Returns true if A wins over B (A is "more recent"). */ +export function aWins( + a: { lamport: number; peer_id: string }, + b: { lamport: number; peer_id: string }, +): boolean { + return compareOps(a, b) > 0; +} +``` + +**Enforcement at write time**: every code path that inserts a `peer_id` into the database calls `normalizePeerId()` first. This includes: +- Mesh join (new peer registration) +- Outbox ops being enqueued with `peer_id` +- Inbox ops being applied +- Profile updates +- Any schema that has a `peer_id` column (memory, state_kv, vectors, files, tasks, peers) + +A database trigger enforces this at the SQL layer as a backup: + +```sql +-- On every INSERT/UPDATE of peer_id columns, reject if not NFC-normalized +-- (actual NFC check must be done in application code; SQLite has no NFC function) +-- Instead, we validate at the single-writer queue's entry point via a helper. +``` + +Since SQLite doesn't have a native NFC function, the check is in `services/store/normalize.ts` which wraps every writer with an NFC assertion. The application-level enforcement is the primary defense. + +**Never** use `localeCompare` for conflict resolution — it depends on the host's ICU version and locale, which differs across peers and causes divergent winners for the same conflict. + +### 4.6 Hybrid logical clocks (NOT in v1.0.0) + +HLC combines physical time with a logical counter for better causality approximation. Rejected for v1.0.0: +- Physical clock skew introduces new failure modes +- Debugging HLC behavior requires deep familiarity +- Plain Lamport + bytewise tiebreaker is sufficient for LWW +- HLC can be added later as an additive migration (new column, not a replacement) + +### 4.7 Vector clocks (NOT shipped) + +Storage cost (one int per peer per row) and complexity cost. Permanently rejected. If causal consistency becomes a hard requirement for some feature, that feature uses server-side ordering via the broker. + +--- + +## 5. Schema + +### 5.1 Meshes + +```sql +CREATE TABLE IF NOT EXISTS mesh ( + slug TEXT PRIMARY KEY, + name TEXT NOT NULL, + kind TEXT NOT NULL CHECK (kind IN ('personal', 'shared_owner', 'shared_guest')), + broker_url TEXT, + server_id TEXT, + broker_epoch INTEGER NOT NULL DEFAULT 0, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL, + last_sync_at INTEGER, + schema_version INTEGER NOT NULL DEFAULT 1, + sync_paused INTEGER NOT NULL DEFAULT 0, + CHECK ( + (kind = 'personal' AND broker_url IS NULL AND server_id IS NULL) OR + (kind IN ('shared_owner', 'shared_guest') AND broker_url IS NOT NULL AND server_id IS NOT NULL) + ) +); + +CREATE TABLE IF NOT EXISTS lamport_clocks ( + mesh_slug TEXT PRIMARY KEY REFERENCES mesh(slug) ON DELETE CASCADE, + value INTEGER NOT NULL DEFAULT 0 +); +``` + +**`broker_epoch`**: monotonically increasing, managed by the broker. When the broker restarts and reassigns sequence numbers, it increments its epoch. The inbox unique constraint uses `(mesh_slug, broker_epoch, broker_seq)` so a new epoch cannot collide with prior deliveries. + +**Broker epoch ack protocol**: every broker ack message includes the broker's **current** epoch (not the epoch the op was processed under). The CLI updates `mesh.broker_epoch` from the current epoch on every ack. This handles the restart race: + +- CLI sends op under epoch N +- Broker restarts mid-op, becomes epoch N+1 +- Broker replays the op (or the CLI retries) under epoch N+1 +- Ack comes back with `current_epoch: N+1` +- CLI updates `mesh.broker_epoch = N+1` +- Next send uses epoch N+1 + +If an ack arrives with an epoch LOWER than the CLI's current recorded epoch (shouldn't happen, but defensive), the CLI logs a warning and ignores the epoch update but still accepts the ack (the server-seq is valid). + +If the CLI tries to send an op tagged with an old epoch and the broker has moved on, the broker responds with `epoch_mismatch` + current epoch, and the CLI re-tags the outbox op with the new epoch before retrying (no data loss, just a retry delay). + +**`sync_paused`**: set to 1 when the outbox has accumulated too many failed ops for a mesh. Cleared by `claudemesh doctor --resume-sync`. + +### 5.2 Memory + +```sql +CREATE TABLE IF NOT EXISTS memory ( + id TEXT PRIMARY KEY, + mesh_slug TEXT NOT NULL REFERENCES mesh(slug) ON DELETE CASCADE, + peer_id TEXT NOT NULL, + key TEXT NOT NULL, + value TEXT NOT NULL, + tags TEXT, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL, + lamport INTEGER NOT NULL, + tombstone INTEGER NOT NULL DEFAULT 0, + UNIQUE(mesh_slug, peer_id, key) +); + +CREATE INDEX memory_mesh_key_live ON memory(mesh_slug, key) WHERE tombstone = 0; +CREATE INDEX memory_mesh_peer_live ON memory(mesh_slug, peer_id) WHERE tombstone = 0; +``` + +**Upsert logic (NOT `INSERT OR REPLACE`)**: `INSERT ... ON CONFLICT(mesh_slug, peer_id, key) DO UPDATE SET` with an explicit `WHERE` clause comparing `(lamport, peer_id)` tuples. This preserves LWW semantics and avoids losing concurrent writes. + +```ts +export function upsertMemory(db: Database, row: MemoryRow): void { + db.prepare(` + INSERT INTO memory (id, mesh_slug, peer_id, key, value, tags, created_at, updated_at, lamport, tombstone) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(mesh_slug, peer_id, key) DO UPDATE SET + value = excluded.value, + tags = excluded.tags, + updated_at = excluded.updated_at, + lamport = excluded.lamport, + tombstone = excluded.tombstone, + id = excluded.id + WHERE excluded.lamport > memory.lamport + OR (excluded.lamport = memory.lamport AND excluded.peer_id > memory.peer_id) + `).run(row.id, row.mesh_slug, row.peer_id, row.key, row.value, + row.tags ?? null, row.created_at, row.updated_at, row.lamport, row.tombstone); +} +``` + +Note: `excluded.peer_id > memory.peer_id` uses SQLite's default binary comparison, which is byte-wise for BLOB and TEXT. That matches the application-level bytewise rule. + +### 5.3 State KV + +```sql +CREATE TABLE IF NOT EXISTS state_kv ( + mesh_slug TEXT NOT NULL REFERENCES mesh(slug) ON DELETE CASCADE, + key TEXT NOT NULL, + value TEXT NOT NULL, + updated_by TEXT NOT NULL, + updated_at INTEGER NOT NULL, + lamport INTEGER NOT NULL, + tombstone INTEGER NOT NULL DEFAULT 0, + PRIMARY KEY (mesh_slug, key) +); + +CREATE INDEX state_kv_lamport ON state_kv(mesh_slug, lamport); +``` + +**Upsert with LWW predicate**: + +```ts +export function upsertStateKv(db: Database, row: StateKvRow): void { + db.prepare(` + INSERT INTO state_kv (mesh_slug, key, value, updated_by, updated_at, lamport, tombstone) + VALUES (?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(mesh_slug, key) DO UPDATE SET + value = excluded.value, + updated_by = excluded.updated_by, + updated_at = excluded.updated_at, + lamport = excluded.lamport, + tombstone = excluded.tombstone + WHERE excluded.lamport > state_kv.lamport + OR (excluded.lamport = state_kv.lamport AND excluded.updated_by > state_kv.updated_by) + `).run(row.mesh_slug, row.key, row.value, row.updated_by, row.updated_at, row.lamport, row.tombstone); +} +``` + +### 5.4 Vectors + +```sql +CREATE TABLE IF NOT EXISTS vector_models ( + id TEXT PRIMARY KEY, -- fingerprint: sha256(provider:model:version:dim:quant) + provider TEXT NOT NULL, -- e.g. 'voyage-ai', 'openai', 'sentence-transformers' + model TEXT NOT NULL, -- e.g. 'voyage-3-large' + model_version TEXT NOT NULL, -- e.g. '1.0' or 'unknown' + dim INTEGER NOT NULL, + quantization TEXT NOT NULL DEFAULT 'float32', + vec_table TEXT NOT NULL, -- e.g. 'vectors_a1b2c3' + created_at INTEGER NOT NULL, + UNIQUE(provider, model, model_version, dim, quantization) +); + +CREATE TABLE IF NOT EXISTS vector_metadata ( + rowid INTEGER PRIMARY KEY AUTOINCREMENT, + mesh_slug TEXT NOT NULL REFERENCES mesh(slug) ON DELETE CASCADE, + peer_id TEXT NOT NULL, + key TEXT NOT NULL, + content TEXT NOT NULL, + metadata TEXT, + model_id TEXT NOT NULL REFERENCES vector_models(id), + vec_rowid INTEGER NOT NULL, + lamport INTEGER NOT NULL, + created_at INTEGER NOT NULL, + tombstone INTEGER NOT NULL DEFAULT 0 +); + +CREATE INDEX vector_metadata_mesh_model ON vector_metadata(mesh_slug, model_id) WHERE tombstone = 0; +CREATE INDEX vector_metadata_peer ON vector_metadata(mesh_slug, peer_id) WHERE tombstone = 0; + +-- vec tables are created dynamically, one per model fingerprint: +-- CREATE VIRTUAL TABLE vectors_ USING vec0(embedding FLOAT[]); +``` + +**Model fingerprint**: `sha256(provider + ':' + model + ':' + model_version + ':' + dim + ':' + quantization)`. This catches provider-specific model revisions, tokenizer changes, and quantization differences that would silently corrupt cross-machine semantic compatibility. + +### 5.5 Files + +```sql +CREATE TABLE IF NOT EXISTS files ( + id TEXT PRIMARY KEY, + mesh_slug TEXT NOT NULL REFERENCES mesh(slug) ON DELETE CASCADE, + peer_id TEXT NOT NULL, + path TEXT NOT NULL, + sha256 TEXT NOT NULL, + size INTEGER NOT NULL, + storage_kind TEXT NOT NULL CHECK (storage_kind IN ('inline', 'blob')), + inline_content BLOB, + blob_path TEXT, + shared_with TEXT NOT NULL DEFAULT '[]', + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL, + lamport INTEGER NOT NULL, + tombstone INTEGER NOT NULL DEFAULT 0, + CHECK ( + (storage_kind = 'inline' AND inline_content IS NOT NULL AND blob_path IS NULL) OR + (storage_kind = 'blob' AND inline_content IS NULL AND blob_path IS NOT NULL) + ) +); + +CREATE INDEX files_mesh_peer_live ON files(mesh_slug, peer_id) WHERE tombstone = 0; +CREATE INDEX files_sha256 ON files(sha256); + +CREATE TABLE IF NOT EXISTS blob_refs ( + sha256 TEXT PRIMARY KEY, + ref_count INTEGER NOT NULL DEFAULT 0, + bytes INTEGER NOT NULL, + created_at INTEGER NOT NULL, + last_accessed INTEGER NOT NULL, + pending_unlink INTEGER NOT NULL DEFAULT 0 +); +``` + +**`storage_kind`** is explicit instead of inferring from nullable fields. Eliminates the earlier `(inline != null) XOR (blob != null)` check condition. + +**`pending_unlink`** marks blobs whose refcount has dropped to 0 but whose filesystem unlink has not yet completed. A GC sweep retries any rows still `pending_unlink = 1`. + +### 5.6 Tasks + +```sql +CREATE TABLE IF NOT EXISTS tasks ( + id TEXT PRIMARY KEY, + mesh_slug TEXT NOT NULL REFERENCES mesh(slug) ON DELETE CASCADE, + title TEXT NOT NULL, + description TEXT, + status TEXT NOT NULL CHECK (status IN ('open', 'claimed', 'completed', 'cancelled')), + claimed_by TEXT, + claimed_at INTEGER, + completed_at INTEGER, + created_by TEXT NOT NULL, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL, + lamport INTEGER NOT NULL, + tombstone INTEGER NOT NULL DEFAULT 0 +); + +CREATE INDEX tasks_mesh_status ON tasks(mesh_slug, status) WHERE tombstone = 0; + +CREATE TABLE IF NOT EXISTS task_claim_events ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + mesh_slug TEXT NOT NULL REFERENCES mesh(slug) ON DELETE CASCADE, + task_id TEXT NOT NULL REFERENCES tasks(id) ON DELETE CASCADE, + peer_id TEXT NOT NULL, + event_type TEXT NOT NULL CHECK (event_type IN ( + 'claimed', -- peer successfully claimed an open task + 'superseded', -- peer's claim lost to another peer's concurrent claim + 'rejected_terminal', -- late claim for a task already completed/cancelled + 'released', -- peer voluntarily released their claim + 'completed', -- peer marked task complete + 'cancelled' -- task cancelled + )), + lamport INTEGER NOT NULL, + event_time INTEGER NOT NULL, -- sender-provided, not receiver wall time + applied_at INTEGER NOT NULL, -- receiver wall time for debug only + conflict_peer_id TEXT, + conflict_lamport INTEGER +); + +CREATE INDEX task_claim_events_task ON task_claim_events(task_id, lamport); +``` + +**`event_time` vs `applied_at`**: `event_time` is the sender-provided timestamp, used for replication equality. `applied_at` is the receiver wall time, used only for logs and debugging, never for conflict resolution. + +### 5.7 Peers (cache) + +```sql +CREATE TABLE IF NOT EXISTS peers ( + mesh_slug TEXT NOT NULL REFERENCES mesh(slug) ON DELETE CASCADE, + peer_id TEXT NOT NULL, + display_name TEXT, + status TEXT, + summary TEXT, + last_seen_at INTEGER, + PRIMARY KEY (mesh_slug, peer_id) +); +``` + +### 5.8 Outbox (local → broker) + +```sql +CREATE TABLE IF NOT EXISTS outbox ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + mesh_slug TEXT NOT NULL REFERENCES mesh(slug) ON DELETE CASCADE, + op_type TEXT NOT NULL, + payload TEXT NOT NULL, + client_op_id TEXT NOT NULL UNIQUE, -- UUIDv7, broker dedupes on this + server_ack_id TEXT, + broker_epoch INTEGER, -- recorded from the ack + broker_seq INTEGER, -- recorded from the ack + created_at INTEGER NOT NULL, + attempts INTEGER NOT NULL DEFAULT 0, + last_error TEXT, + last_attempt_at INTEGER, + synced_at INTEGER +); + +CREATE INDEX outbox_pending ON outbox(mesh_slug, id) WHERE synced_at IS NULL; +``` + +**The broker MUST honor `client_op_id` for dedupe**. If the CLI sends the same `client_op_id` twice (crash-between-send-and-ack), the broker returns the original `server_ack_id`, epoch, and seq without applying the op a second time. This is the exactly-once delivery contract. + +### 5.9 Inbox (broker → local) + +```sql +CREATE TABLE IF NOT EXISTS inbox ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + mesh_slug TEXT NOT NULL REFERENCES mesh(slug) ON DELETE CASCADE, + broker_epoch INTEGER NOT NULL, + broker_seq INTEGER NOT NULL, + op_type TEXT NOT NULL, + payload TEXT NOT NULL, + received_at INTEGER NOT NULL, + applied_at INTEGER +); + +CREATE UNIQUE INDEX inbox_epoch_seq ON inbox(mesh_slug, broker_epoch, broker_seq); +CREATE INDEX inbox_pending ON inbox(mesh_slug, id) WHERE applied_at IS NULL; +``` + +**Composite uniqueness `(mesh_slug, broker_epoch, broker_seq)`** guards against broker restarts that reset sequence numbers. When a new epoch begins, seq starts at 1 again but collides with nothing because the epoch differs. + +### 5.10 Migrations tracking + +```sql +CREATE TABLE IF NOT EXISTS _migrations ( + version INTEGER PRIMARY KEY, + applied_at INTEGER NOT NULL +); +``` + +--- + +## 6. Vector storage with model fingerprinting + +### 6.1 Model fingerprint + +```ts +// services/store/vector-fingerprint.ts + +export interface ModelIdentity { + provider: string; // 'voyage-ai' | 'openai' | 'sentence-transformers' | 'custom' + model: string; // 'voyage-3-large' + modelVersion: string; // '1.0' or 'unknown' if unversioned + dim: number; // 1024 + quantization: string; // 'float32' | 'int8' | 'binary' +} + +export function modelFingerprint(m: ModelIdentity): string { + const canonical = `${m.provider}:${m.model}:${m.modelVersion}:${m.dim}:${m.quantization}`; + return sha256Hex(canonical).slice(0, 16); +} +``` + +Each unique `ModelIdentity` gets its own vec table. Mismatched dimensions are impossible because the fingerprint diverges before the caller can insert into the wrong table. + +### 6.2 Table creation with race-safe registration + +The TOCTOU race in the original spec (`SELECT` then `CREATE VIRTUAL TABLE` then `INSERT`) is fixed by using `INSERT ... ON CONFLICT DO NOTHING` and re-reading: + +```ts +export function ensureVecTable(db: Database, model: ModelIdentity): string { + const id = modelFingerprint(model); + const tableName = `vectors_${id}`; + + // Try to register the model. If it already exists, this is a no-op. + db.prepare(` + INSERT INTO vector_models (id, provider, model, model_version, dim, quantization, vec_table, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(id) DO NOTHING + `).run(id, model.provider, model.model, model.modelVersion, model.dim, model.quantization, tableName, Date.now()); + + // Ensure the virtual table exists. CREATE VIRTUAL TABLE IF NOT EXISTS is safe. + // Validate the table name is pure alphanumeric/underscore before interpolating. + if (!/^vectors_[a-f0-9]{16}$/.test(tableName)) { + throw new Error(`invalid vec table name: ${tableName}`); + } + db.prepare(`CREATE VIRTUAL TABLE IF NOT EXISTS ${tableName} USING vec0(embedding FLOAT[${model.dim}])`).run(); + + return tableName; +} +``` + +The table name is validated against a strict regex before interpolation to prevent any SQL injection from a corrupted fingerprint. + +### 6.3 Insert + +```ts +export function vectorStore( + db: Database, + queue: WriteQueue, + input: { + mesh: string; + peer: string; + key: string; + content: string; + embedding: number[]; + model: ModelIdentity; + metadata?: unknown; + }, +): Promise { + return queue.enqueue(() => { + db.transaction(() => { + if (input.embedding.length !== input.model.dim) { + throw new Error(`embedding length ${input.embedding.length} does not match model dim ${input.model.dim}`); + } + const vecTable = ensureVecTable(db, input.model); + const modelId = modelFingerprint(input.model); + + const buf = Buffer.from(new Float32Array(input.embedding).buffer); + const vecResult = db.prepare(`INSERT INTO ${vecTable}(embedding) VALUES (?)`).run(buf); + const vecRowid = Number(vecResult.lastInsertRowid); + + const lamport = tickLamport(db, input.mesh); + + db.prepare(` + INSERT INTO vector_metadata + (mesh_slug, peer_id, key, content, metadata, model_id, vec_rowid, lamport, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + `).run( + input.mesh, + input.peer, + input.key, + input.content, + JSON.stringify(input.metadata ?? null), + modelId, + vecRowid, + lamport, + Date.now(), + ); + })(); + }); +} +``` + +### 6.4 Search + +Cross-model queries are forbidden. The caller specifies the model; if that model doesn't exist in the store, the result is empty (not an error). + +**Read-time integrity validation**: the `vec_table` column in `vector_models` is trusted input. If the database is corrupted or manually edited, a malicious `vec_table` value could inject arbitrary SQL into the `CREATE VIRTUAL TABLE` / `SELECT FROM` statements. The query path re-derives the expected table name from the stored identity columns and verifies it matches BEFORE using it. + +```ts +export function vectorSearch( + db: Database, + input: { mesh: string; query: number[]; model: ModelIdentity; limit?: number }, +): VectorSearchResult[] { + const id = modelFingerprint(input.model); + const row = db.prepare(` + SELECT vec_table, dim, provider, model, model_version, quantization + FROM vector_models WHERE id = ? + `).get(id) as { + vec_table: string; + dim: number; + provider: string; + model: string; + model_version: string; + quantization: string; + } | undefined; + + if (!row) return []; + + // Integrity check: re-derive the fingerprint from stored identity columns + // and verify vec_table matches. Prevents trusting a corrupted registry row. + const derivedFingerprint = modelFingerprint({ + provider: row.provider, + model: row.model, + modelVersion: row.model_version, + dim: row.dim, + quantization: row.quantization, + }); + const expectedTableName = `vectors_${derivedFingerprint}`; + if (row.vec_table !== expectedTableName) { + throw new Error( + `vector_models integrity failure: id ${id} has vec_table="${row.vec_table}" ` + + `but derived ${expectedTableName}. Database may be corrupted — run claudemesh doctor.` + ); + } + + // Defense in depth: regex-validate the format + if (!/^vectors_[a-f0-9]{16}$/.test(row.vec_table)) { + throw new Error(`invalid vec table name from registry: ${row.vec_table}`); + } + + if (row.dim !== input.query.length) { + throw new Error(`dimension mismatch: expected ${row.dim}, got ${input.query.length}`); + } + + const buf = Buffer.from(new Float32Array(input.query).buffer); + return db.prepare(` + SELECT vm.key, vm.content, vm.peer_id, vm.metadata, t.distance + FROM ${row.vec_table} t + JOIN vector_metadata vm ON vm.vec_rowid = t.rowid + WHERE t.embedding MATCH ? + AND vm.mesh_slug = ? + AND vm.tombstone = 0 + AND vm.model_id = ? + ORDER BY t.distance + LIMIT ? + `).all(buf, input.mesh, id, input.limit ?? 10) as VectorSearchResult[]; +} +``` + +`ensureVecTable` runs the same integrity check before `CREATE VIRTUAL TABLE IF NOT EXISTS` — if the stored `vec_table` doesn't match the derived name, the function throws instead of creating a table with the wrong name. + +### 6.5 Model migration protocol + +Changing embedding models is an explicit, expensive operation via `claudemesh advanced re-embed`: + +1. Begin: mark old model as `deprecated` in `vector_models` +2. For each row in `vector_metadata` under old model (with progress output): + - Re-embed `content` with new model (requires network to the embedding provider or a local model) + - Insert into new vec table under new model fingerprint + - Tombstone the old row +3. After completion with zero reads of old model for 30 days, GC the old vec table via `DROP TABLE vectors_` + +During the migration, reads against the old model still work (the vec table is not dropped until the grace period ends). New inserts go to the new model. + +--- + +## 7. Memory recall semantics + +### 7.1 API + +```ts +type RecallInput = { + mesh: string; + key: string; + scope?: + | { kind: 'self' } // default + | { kind: 'peer'; peer_id: string } + | { kind: 'all' }; +}; + +type RecallResult = + | { kind: 'single'; peer_id: string; value: string; lamport: number; updated_at: number } + | { kind: 'multi'; results: Array<{ peer_id: string; value: string; lamport: number; updated_at: number }> } + | { kind: 'not_found' }; +``` + +### 7.2 Resolution + +| `scope` | Behavior | +|---|---| +| `{ kind: 'self' }` (default) | Returns the current peer's value for `key`. `not_found` if absent. | +| `{ kind: 'peer', peer_id }` | Returns that peer's value. `not_found` if absent. | +| `{ kind: 'all' }` | Returns array sorted by `(lamport DESC, peer_id bytewise ASC)`. Empty array if none. | + +### 7.3 Tool surface + +```ts +// mcp/tools/memory.ts +{ + name: 'recall', + description: 'Retrieve a remembered value by key.', + inputSchema: { + key: z.string(), + peer: z.enum(['self', 'all']).or(z.string()).default('self'), + }, + handler: async ({ key, peer }) => memoryService.recall({ + mesh: currentMesh, + key, + scope: peer === 'self' + ? { kind: 'self' } + : peer === 'all' + ? { kind: 'all' } + : { kind: 'peer', peer_id: peer }, + }), +} +``` + +### 7.4 Namespaced keys (convention) + +For shared team memories, the convention is to namespace the key: + +``` +remember('team.api_key', '...') +recall('team.api_key') +``` + +This avoids per-peer collision entirely. The tool documentation recommends this pattern. + +--- + +## 8. File blob storage and garbage collection + +### 8.1 Path validation + +```ts +export function validatePath(p: string): void { + if (p.length === 0) throw new Error('empty path'); + if (p.length > 1024) throw new Error('path too long'); + if (p.includes('\0')) throw new Error('null byte in path'); + if (p.startsWith('/')) throw new Error('absolute path forbidden'); + if (p.includes('\\')) throw new Error('backslash forbidden'); + if (/(^|\/)\.\.($|\/)/.test(p)) throw new Error('parent reference forbidden'); + if (/(^|\/)\.($|\/)/.test(p)) throw new Error('self reference forbidden'); + if (!/^[\w. \-/+()]+$/.test(p)) throw new Error('invalid characters'); +} +``` + +### 8.2 Insert + +```ts +export function fileShare( + db: Database, + queue: WriteQueue, + blobsDir: string, + input: { mesh: string; peer: string; path: string; content: Buffer }, +): Promise<{ id: string }> { + validatePath(input.path); + const sha = sha256Hex(input.content); + const size = input.content.length; + + return queue.enqueue(() => { + // Write filesystem blob BEFORE the transaction so a rolled-back transaction + // doesn't leave a blob without a reference. If the transaction fails, + // we rely on GC sweep to clean up orphan files with pending_unlink = 1. + let blobPath: string | null = null; + let inlineContent: Buffer | null = null; + + if (size < 64 * 1024) { + inlineContent = input.content; + } else { + blobPath = `blobs/${sha.slice(0, 2)}/${sha}`; + const fullPath = path.join(blobsDir, '..', blobPath); + fs.mkdirSync(path.dirname(fullPath), { recursive: true, mode: 0o700 }); + // Use O_EXCL to avoid overwriting concurrent write + try { + fs.writeFileSync(fullPath, input.content, { mode: 0o600, flag: 'wx' }); + } catch (err: any) { + if (err.code !== 'EEXIST') throw err; + // Already exists (deduped) — verify content matches + const existing = fs.readFileSync(fullPath); + if (!existing.equals(input.content)) { + throw new Error(`sha256 collision or corrupted blob: ${sha}`); + } + } + } + + const id = uuidv7(); + db.transaction(() => { + if (blobPath !== null) { + db.prepare(` + INSERT INTO blob_refs (sha256, ref_count, bytes, created_at, last_accessed) + VALUES (?, 1, ?, ?, ?) + ON CONFLICT(sha256) DO UPDATE SET + ref_count = ref_count + 1, + last_accessed = excluded.last_accessed + `).run(sha, size, Date.now(), Date.now()); + } + + const lamport = tickLamport(db, input.mesh); + + db.prepare(` + INSERT INTO files + (id, mesh_slug, peer_id, path, sha256, size, storage_kind, inline_content, blob_path, shared_with, created_at, updated_at, lamport) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `).run( + id, input.mesh, input.peer, input.path, sha, size, + blobPath !== null ? 'blob' : 'inline', + inlineContent, + blobPath, + '[]', + Date.now(), Date.now(), lamport, + ); + })(); + return { id }; + }); +} +``` + +**Why write blob before transaction**: if the filesystem write succeeds and the transaction fails, the blob is orphaned but the GC sweep finds it via `pending_unlink = 1`. If we write after the transaction commits, a crash between commit and write would leave the DB referencing a missing blob. The first failure is recoverable via GC; the second is data loss. + +### 8.3 Delete with refcount + filesystem unlink + +```ts +export function fileDelete( + db: Database, + queue: WriteQueue, + blobsDir: string, + input: { mesh: string; file_id: string }, +): Promise { + return queue.enqueue(() => { + let blobToUnlink: string | null = null; + + db.transaction(() => { + const file = db.prepare('SELECT sha256, blob_path, storage_kind FROM files WHERE id = ? AND mesh_slug = ?').get(input.file_id, input.mesh) as any; + if (!file) return; + + const lamport = tickLamport(db, input.mesh); + db.prepare('UPDATE files SET tombstone = 1, updated_at = ?, lamport = ? WHERE id = ?').run(Date.now(), lamport, input.file_id); + + if (file.storage_kind === 'blob') { + db.prepare('UPDATE blob_refs SET ref_count = ref_count - 1 WHERE sha256 = ?').run(file.sha256); + const ref = db.prepare('SELECT ref_count FROM blob_refs WHERE sha256 = ?').get(file.sha256) as { ref_count: number }; + if (ref.ref_count <= 0) { + db.prepare('UPDATE blob_refs SET pending_unlink = 1 WHERE sha256 = ?').run(file.sha256); + blobToUnlink = file.blob_path; + } + } + })(); + + // Unlink happens AFTER the transaction commits. If it fails, GC sweep + // retries via pending_unlink = 1. + if (blobToUnlink !== null) { + const fullPath = path.join(blobsDir, '..', blobToUnlink); + try { + fs.unlinkSync(fullPath); + db.prepare('DELETE FROM blob_refs WHERE sha256 = ? AND pending_unlink = 1').run(blobToUnlink); + } catch (err: any) { + if (err.code !== 'ENOENT') { + // leave pending_unlink = 1 for GC sweep to retry + } + } + } + }); +} +``` + +### 8.4 GC sweep + +Runs every 24 hours and on shutdown: + +```ts +export function gcBlobs(db: Database, queue: WriteQueue, blobsDir: string): Promise { + return queue.enqueue(() => { + // Pending unlinks from earlier failures + const pending = db.prepare('SELECT sha256 FROM blob_refs WHERE pending_unlink = 1').all() as { sha256: string }[]; + for (const { sha256 } of pending) { + const blobPath = path.join(blobsDir, sha256.slice(0, 2), sha256); + try { + fs.unlinkSync(blobPath); + db.prepare('DELETE FROM blob_refs WHERE sha256 = ?').run(sha256); + } catch (err: any) { + if (err.code === 'ENOENT') { + db.prepare('DELETE FROM blob_refs WHERE sha256 = ?').run(sha256); + } + } + } + // Old tombstones + const cutoff = Date.now() - 90 * 24 * 60 * 60 * 1000; + db.prepare('DELETE FROM files WHERE tombstone = 1 AND updated_at < ?').run(cutoff); + }); +} +``` + +--- + +## 9. Personal → shared publish upgrade protocol + +### 9.1 Phases + +The protocol is split into phases, each of which is individually committable so crashes between phases are recoverable. + +```ts +export async function meshPublish( + services: { auth: AuthService; api: ApiClient; mesh: MeshService; broker: BrokerClient; queue: WriteQueue; db: Database }, + input: { mesh_slug: string; display_name?: string }, +): Promise<{ invite_url: string }> { + + // --- Phase 1: authentication --- + const token = await services.auth.ensureAuthenticated(); + + // --- Phase 2: server registration --- + // API is idempotent on (user, slug); calling twice returns the same server_id. + const response = await services.api.post('/api/my/meshes', { + name: input.display_name ?? (await services.mesh.getLocal(input.mesh_slug)).name, + slug: input.mesh_slug, + kind: 'shared_owner', + }); + // response: { server_id, broker_url, broker_epoch, slug } + + // --- Phase 3: local transition --- + await services.queue.enqueue(() => { + services.db.transaction(() => { + services.db.prepare(` + UPDATE mesh + SET kind = 'shared_owner', + broker_url = ?, + server_id = ?, + broker_epoch = ?, + updated_at = ? + WHERE slug = ? AND kind = 'personal' + `).run(response.broker_url, response.server_id, response.broker_epoch, Date.now(), input.mesh_slug); + + // Enqueue a mesh.publish marker op (first sync op). + services.db.prepare(` + INSERT INTO outbox (mesh_slug, op_type, payload, client_op_id, created_at) + VALUES (?, 'mesh.publish', ?, ?, ?) + `).run( + input.mesh_slug, + JSON.stringify({ snapshot_version: 1, schema_version: 1 }), + uuidv7(), + Date.now(), + ); + })(); + }); + + // --- Phase 4: backfill --- + // For small meshes (< 10k rows), enqueue all rows as backfill ops in chunks. + // For large meshes, use snapshot + cursor protocol (§9.3). + await backfillOutbox(services, input.mesh_slug); + + // --- Phase 5: sync wait --- + // Wait for the outbox to drain (with timeout). Publish is considered "done" + // when the sync daemon has acknowledged the mesh.publish marker. + await waitForPublishAck(services, input.mesh_slug, { timeoutMs: 30_000 }); + + // --- Phase 6: first invite --- + const invite = await services.api.post(`/api/my/meshes/${input.mesh_slug}/invites`, { + expires_in: '7d', + }); + + return { invite_url: invite.url }; +} +``` + +### 9.2 Backfill with chunking + +To satisfy the `< 100ms per transaction` rule, backfill happens in small chunks: + +```ts +async function backfillOutbox(services: Services, meshSlug: string): Promise { + const CHUNK_SIZE = 200; + + for (const table of ['memory', 'state_kv', 'vector_metadata', 'files', 'tasks']) { + let cursor = 0; + while (true) { + const done = await services.queue.enqueue(() => { + const rows = services.db.prepare(` + SELECT rowid, * FROM ${table} + WHERE mesh_slug = ? AND tombstone = 0 AND rowid > ? + ORDER BY rowid LIMIT ? + `).all(meshSlug, cursor, CHUNK_SIZE) as any[]; + + if (rows.length === 0) return true; + + services.db.transaction(() => { + for (const row of rows) { + services.db.prepare(` + INSERT INTO outbox (mesh_slug, op_type, payload, client_op_id, created_at) + VALUES (?, ?, ?, ?, ?) + `).run( + meshSlug, + `${table}.backfill`, + JSON.stringify(row), + uuidv7(), + Date.now(), + ); + } + })(); + + cursor = rows[rows.length - 1].rowid; + return false; + }); + + if (done) break; + } + } +} +``` + +Each chunk is a separate transaction (typically 200 rows × ~5 inserts = 1000 statements, well under 100ms). Between chunks, other writes can interleave via the queue. + +### 9.3 Large mesh snapshot protocol + +For meshes with >10k rows, use a server-side snapshot: + +``` +POST /api/my/meshes/:slug/snapshot/begin → { snapshot_id } +POST /api/my/meshes/:slug/snapshot/:id/chunk → { next_cursor } +POST /api/my/meshes/:slug/snapshot/:id/commit → { broker_epoch, broker_seq_start } +``` + +The CLI uploads rows in chunks keyed by `snapshot_id`. If the upload is interrupted, the next attempt reads the last cursor and resumes. The server commits atomically; partial uploads never become visible. + +### 9.4 Failure modes + +| Phase | Failure | Recovery | +|---|---|---| +| 1 (auth) | User denies in browser | Abort publish, local mesh unchanged | +| 2 (register) | API 409 (slug collision) | CLI suggests a suffix, retries with new slug | +| 3 (local transition) | Crash | Restart detects `kind = shared_owner` with empty outbox → resumes phase 4 | +| 4 (backfill) | Crash mid-chunk | Chunk transactions are atomic; resume from last committed rowid | +| 5 (wait) | Timeout | Publish is logically complete; user sees "Published, sync catching up" | +| 6 (invite) | API error | Mesh is published; user runs `claudemesh invite` explicitly | + +All phases are resumable because each phase's state is durable before the next phase begins. + +--- + +## 10. Task claim semantics and audit events + +### 10.1 Local claim + +```ts +export function taskClaim( + db: Database, + queue: WriteQueue, + input: { mesh: string; task_id: string; peer: string }, +): Promise { + return queue.enqueue(() => { + db.transaction(() => { + const task = db.prepare('SELECT * FROM tasks WHERE id = ? AND mesh_slug = ?').get(input.task_id, input.mesh) as any; + if (!task || task.tombstone) throw new Error('task not found'); + if (task.status === 'completed') throw new Error('task already completed'); + if (task.status === 'cancelled') throw new Error('task cancelled'); + if (task.status === 'claimed' && task.claimed_by !== input.peer) { + throw new Error(`task already claimed by ${task.claimed_by}`); + } + + const lamport = tickLamport(db, input.mesh); + const now = Date.now(); + + db.prepare(` + UPDATE tasks + SET status = 'claimed', claimed_by = ?, claimed_at = ?, updated_at = ?, lamport = ? + WHERE id = ? + `).run(input.peer, now, now, lamport, input.task_id); + + db.prepare(` + INSERT INTO task_claim_events + (mesh_slug, task_id, peer_id, event_type, lamport, event_time, applied_at) + VALUES (?, ?, ?, 'claimed', ?, ?, ?) + `).run(input.mesh, input.task_id, input.peer, lamport, now, now); + + db.prepare(` + INSERT INTO outbox (mesh_slug, op_type, payload, client_op_id, created_at) + VALUES (?, 'task.claim', ?, ?, ?) + `).run( + input.mesh, + JSON.stringify({ task_id: input.task_id, peer_id: input.peer, lamport, event_time: now }), + uuidv7(), + now, + ); + })(); + }); +} +``` + +### 10.2 Inbound claim reconciliation — all branches covered + +```ts +export function applyInboxClaim( + db: Database, + op: { + mesh_slug: string; + task_id: string; + peer_id: string; + lamport: number; + event_time: number; + }, +): void { + db.transaction(() => { + const local = db.prepare('SELECT * FROM tasks WHERE id = ?').get(op.task_id) as any; + if (!local || local.tombstone) return; + + // Advance the lamport clock per the invariant + const newLamport = tickLamport(db, op.mesh_slug, op.lamport); + + // Branch on local status + if (local.status === 'completed' || local.status === 'cancelled') { + // Terminal states — audit the late claim as rejected, do not mutate the task. + // Event type is 'rejected_terminal' (not 'superseded') because the incoming + // claim wasn't beaten by another concurrent claim — it arrived after the + // task was already done. + db.prepare(` + INSERT INTO task_claim_events + (mesh_slug, task_id, peer_id, event_type, lamport, event_time, applied_at) + VALUES (?, ?, ?, 'rejected_terminal', ?, ?, ?) + `).run(op.mesh_slug, op.task_id, op.peer_id, newLamport, op.event_time, Date.now()); + return; + } + + if (local.status === 'open') { + // No conflict, apply the claim + db.prepare(` + UPDATE tasks + SET status = 'claimed', claimed_by = ?, claimed_at = ?, updated_at = ?, lamport = ? + WHERE id = ? + `).run(op.peer_id, op.event_time, Date.now(), newLamport, op.task_id); + + db.prepare(` + INSERT INTO task_claim_events + (mesh_slug, task_id, peer_id, event_type, lamport, event_time, applied_at) + VALUES (?, ?, ?, 'claimed', ?, ?, ?) + `).run(op.mesh_slug, op.task_id, op.peer_id, newLamport, op.event_time, Date.now()); + return; + } + + // Claimed locally by someone — possibly a conflict + if (local.claimed_by === op.peer_id) { + // Same peer re-claiming (idempotent) — bump lamport only if higher + if (op.lamport > local.lamport) { + db.prepare('UPDATE tasks SET lamport = ? WHERE id = ?').run(newLamport, op.task_id); + } + return; + } + + // Different peer trying to claim + const localWinsTuple = aWins( + { lamport: local.lamport, peer_id: local.claimed_by }, + { lamport: op.lamport, peer_id: op.peer_id }, + ); + + if (localWinsTuple) { + // Our claim wins — log the incoming as superseded + db.prepare(` + INSERT INTO task_claim_events + (mesh_slug, task_id, peer_id, event_type, lamport, event_time, applied_at, conflict_peer_id, conflict_lamport) + VALUES (?, ?, ?, 'superseded', ?, ?, ?, ?, ?) + `).run(op.mesh_slug, op.task_id, op.peer_id, newLamport, op.event_time, Date.now(), local.claimed_by, local.lamport); + } else { + // Incoming wins — supersede our claim + db.prepare(` + UPDATE tasks + SET claimed_by = ?, claimed_at = ?, updated_at = ?, lamport = ? + WHERE id = ? + `).run(op.peer_id, op.event_time, Date.now(), newLamport, op.task_id); + + db.prepare(` + INSERT INTO task_claim_events + (mesh_slug, task_id, peer_id, event_type, lamport, event_time, applied_at, conflict_peer_id, conflict_lamport) + VALUES (?, ?, ?, 'superseded', ?, ?, ?, ?, ?) + `).run(op.mesh_slug, op.task_id, local.claimed_by, newLamport, op.event_time, Date.now(), op.peer_id, op.lamport); + + // Push notification to the local peer whose claim was superseded + if (local.claimed_by === currentPeerId(op.mesh_slug)) { + pushNotification({ + type: 'task_claim_superseded', + task_id: op.task_id, + by_peer: op.peer_id, + }); + } + } + })(); +} +``` + +Note the four branches: completed/cancelled (terminal, log only), open (apply), same-peer reclaim (idempotent), different-peer conflict (resolve via tuple comparison). The original spec missed the terminal-state branch. + +### 10.3 MCP notification + +When a local claim is superseded, subsequent tool calls by the affected agent include a `warnings` field: + +```json +{ + "ok": true, + "data": { /* tool result */ }, + "warnings": [ + { + "type": "task_claim_superseded", + "task_id": "abc123", + "by_peer": "bob", + "at_lamport": 42 + } + ] +} +``` + +Claude Code renders the warning in the TUI so agents don't silently redo work. + +--- + +## 11. Single-writer concurrency model + +### 11.1 The rule + +All writes go through one queue. Reads can use separate connections. No "database is locked" errors because only one writer holds the write lock at any time. + +### 11.2 Queue implementation with async awareness + +The original implementation didn't `await` the op result, meaning a Promise could slip through and the queue would mark "done" before the operation completed. Fixed version: + +```ts +// services/store/write-queue.ts + +type WriteOp = () => T | Promise; + +interface QueueItem { + op: WriteOp; + resolve: (v: T) => void; + reject: (e: Error) => void; + signal?: AbortSignal; +} + +export class WriteQueue { + private queue: QueueItem[] = []; + private running = false; + // State machine: 'open' → 'stopping' → 'stopped' + // All transitions are guarded by the single JS event loop (no actual mutex + // needed because Node/Bun are single-threaded for user code), but we use + // this state field as the source of truth and check it atomically in each + // method relative to when control returns to user code. + private state: 'open' | 'stopping' | 'stopped' = 'open'; + + constructor(private db: Database) {} + + async enqueue(op: WriteOp, signal?: AbortSignal): Promise { + // Read-and-act must happen in a single synchronous block — no awaits + // between the check and the push. JS single-threading guarantees this: + // no other code can run between these two statements. + if (this.state !== 'open') { + throw new Error(`write queue is ${this.state}`); + } + if (signal?.aborted) throw new Error('aborted'); + return new Promise((resolve, reject) => { + this.queue.push({ op, resolve, reject, signal }); + void this.drain(); + }); + } + + private async drain(): Promise { + if (this.running) return; + this.running = true; + try { + while (this.queue.length > 0) { + const item = this.queue.shift()!; + if (item.signal?.aborted) { + item.reject(new Error('aborted')); + continue; + } + try { + // await handles both sync and async ops correctly: sync returns + // resolve immediately through the microtask queue, async returns + // wait for the Promise to settle before proceeding. + const result = await item.op(); + item.resolve(result); + } catch (err) { + item.reject(err as Error); + } + } + } finally { + this.running = false; + } + } + + /** + * Begin shutdown. New enqueues are rejected immediately. Existing items + * drain to completion. Returns when all queued items have been processed. + * + * Race-free: setting state='stopping' is atomic relative to enqueue()'s + * state check because JS is single-threaded. No enqueue can sneak an item + * past the check after stop() sets state='stopping'. + */ + async stop(): Promise { + if (this.state !== 'open') return; + this.state = 'stopping'; + // Wait for the drain to finish processing all queued items + while (this.running || this.queue.length > 0) { + await new Promise(r => setTimeout(r, 10)); + } + this.state = 'stopped'; + } + + /** + * Cancel all pending items immediately. Used on SIGKILL-style shutdown. + */ + abort(): void { + if (this.state === 'stopped') return; + this.state = 'stopped'; + const pending = this.queue.splice(0); + for (const item of pending) { + item.reject(new Error('aborted')); + } + } +} +``` + +**Race-freedom rationale**: The JS event loop guarantees that `enqueue()`'s state check (`if (this.state !== 'open')`) and the subsequent `this.queue.push()` execute atomically — no other code can run between them. When `stop()` sets `this.state = 'stopping'`, any subsequent `enqueue()` call sees the updated state synchronously and rejects. There is no TOCTOU window because JS does not preempt synchronous code. + +The one subtlety: if `enqueue()` is called from an `async` function and has already passed its state check before `stop()` is called, the item is in the queue and will be drained. That's correct behavior — the caller's `await enqueue(...)` will resolve normally. If `stop()` wants to drop in-flight items, it uses `abort()` instead. + +**Critical fix**: `await item.op()` instead of `const result = item.op()`. If `op` returns a Promise, the queue now waits for it to settle before starting the next item. Ops that return synchronous values (via `better-sqlite3`) resolve immediately through the Promise machinery. + +**Event loop impact**: the `while` loop yields between items only if the op returns a Promise. Synchronous ops block the event loop briefly (typically <5ms per op). For large batches this is acceptable because backfill is split into chunks (§9.2). + +### 11.3 PRAGMA settings + +```ts +db.pragma('journal_mode = WAL'); +db.pragma('synchronous = NORMAL'); +db.pragma('busy_timeout = 5000'); +db.pragma('foreign_keys = ON'); +db.pragma('temp_store = MEMORY'); +db.pragma('mmap_size = 30000000'); +db.pragma('cache_size = -8000'); // 8MB page cache +``` + +**Durability tradeoff**: `synchronous = NORMAL` means the last committed transaction can be lost on power failure (not crash — SQLite WAL protects against process crash). This is acceptable for claudemesh because the broker replay can recover any lost ops from the server side. For users who want higher durability, `synchronous = FULL` is available via `CLAUDEMESH_STORE_SYNC=full` env var at the cost of ~2x write latency. + +### 11.4 Transaction length + +Every write transaction MUST complete in < 100ms. Long operations (backfill, GC sweep, re-embedding) are split into many small transactions (§9.2). + +--- + +## 12. Sync protocol + +### 12.1 Overview + +``` +┌──────────┐ ┌──────────┐ ┌────────────┐ +│ Tool │ write │ SQLite │ read │ Sync │ +│ Handler ├──────►│ (source ├──────►│ Daemon │ +└──────────┘ │ of truth)│ └──────┬─────┘ + └────▲─────┘ │ + │ │ outbox → + │ apply ▼ broker ws + ┌────┴─────┐ ┌────────────┐ + │ inbox │◄──────┤ Broker │ + └──────────┘ ← ws └────────────┘ +``` + +### 12.2 Outbox drain with abort semantics and head-of-line protection + +The original drain blocked the whole batch on a single flaky op. Fixed version aborts the batch on network-level errors and retries only op-specific errors: + +```ts +// services/broker/sync-daemon.ts + +async function drainOutbox(services: Services, meshSlug: string): Promise { + const MAX_BATCH = 10; + const MAX_ATTEMPTS_PER_OP = 10; + + // Read pending ops outside the write queue (read-only) + const pending = services.db.prepare(` + SELECT id, op_type, payload, client_op_id, attempts + FROM outbox + WHERE mesh_slug = ? AND synced_at IS NULL + ORDER BY id + LIMIT ? + `).all(meshSlug, MAX_BATCH) as OutboxRow[]; + + if (pending.length === 0) return { sent: 0, exhausted: false }; + + let sent = 0; + for (const op of pending) { + // Re-read attempts to avoid stale in-memory value + const current = services.db.prepare('SELECT attempts FROM outbox WHERE id = ?').get(op.id) as { attempts: number }; + if (current.attempts >= MAX_ATTEMPTS_PER_OP) { + // Mark mesh as sync_paused and surface to user + await services.queue.enqueue(() => { + services.db.prepare('UPDATE mesh SET sync_paused = 1 WHERE slug = ?').run(meshSlug); + }); + return { sent, exhausted: true }; + } + + try { + const ack = await services.broker.send({ + mesh_slug: meshSlug, + client_op_id: op.client_op_id, + op_type: op.op_type, + payload: JSON.parse(op.payload), + }); + + await services.queue.enqueue(() => { + services.db.prepare(` + UPDATE outbox + SET synced_at = ?, server_ack_id = ?, broker_epoch = ?, broker_seq = ? + WHERE id = ? + `).run(Date.now(), ack.server_ack_id, ack.broker_epoch, ack.broker_seq, op.id); + }); + sent++; + } catch (err: any) { + // Increment attempts in DB + await services.queue.enqueue(() => { + services.db.prepare(` + UPDATE outbox + SET attempts = attempts + 1, last_error = ?, last_attempt_at = ? + WHERE id = ? + `).run(String(err?.message ?? err), Date.now(), op.id); + }); + + // Classify the error + if (isNetworkError(err)) { + // Network error: abort the batch, let the daemon loop retry after backoff + return { sent, exhausted: false, networkError: true }; + } + // Op-specific error: continue with next op in batch + continue; + } + } + + return { sent, exhausted: false }; +} + +function isNetworkError(err: any): boolean { + if (!err) return false; + const code = err.code ?? err.cause?.code; + return code === 'ENOTFOUND' || code === 'ECONNREFUSED' || code === 'ECONNRESET' || + code === 'ETIMEDOUT' || code === 'EAI_AGAIN' || code === 'WS_CLOSED'; +} +``` + +### 12.3 Inbox apply + +```ts +async function applyInbox(services: Services, meshSlug: string): Promise { + const pending = services.db.prepare(` + SELECT id, broker_epoch, broker_seq, op_type, payload + FROM inbox + WHERE mesh_slug = ? AND applied_at IS NULL + ORDER BY broker_epoch, broker_seq + LIMIT 10 + `).all(meshSlug) as InboxRow[]; + + if (pending.length === 0) return { applied: 0 }; + + let applied = 0; + for (const inc of pending) { + try { + await services.queue.enqueue(() => { + services.db.transaction(() => { + applyOp(services.db, meshSlug, inc); + services.db.prepare('UPDATE inbox SET applied_at = ? WHERE id = ?').run(Date.now(), inc.id); + })(); + }); + applied++; + } catch (err) { + services.logger.error('inbox apply failed', { id: inc.id, err }); + // Stop on first apply failure; retry on next daemon tick + break; + } + } + return { applied }; +} + +function applyOp(db: Database, meshSlug: string, inc: InboxRow): void { + const payload = JSON.parse(inc.payload); + switch (inc.op_type) { + case 'memory.set': return upsertMemory(db, { ...payload, mesh_slug: meshSlug }); + case 'memory.tombstone': return tombstoneMemory(db, { ...payload, mesh_slug: meshSlug }); + case 'state.set': return upsertStateKv(db, { ...payload, mesh_slug: meshSlug }); + case 'task.claim': return applyInboxClaim(db, { ...payload, mesh_slug: meshSlug }); + case 'vector.store': return applyInboxVectorStore(db, { ...payload, mesh_slug: meshSlug }); + case 'file.share': return applyInboxFileShare(db, { ...payload, mesh_slug: meshSlug }); + // ... etc + default: + throw new Error(`unknown op_type: ${inc.op_type}`); + } +} +``` + +### 12.4 Daemon loop — idle path applies inbox + +**Critical fix**: the idle path now applies inbox, not just drains outbox. Remote messages no longer starve. + +```ts +export class SyncDaemon { + private state: 'active' | 'idle' | 'reconnecting' | 'stopped' = 'idle'; + private idleSleepMs = 5_000; + private activeSleepMs = 500; + private reconnectBackoff = 1_000; + private stopPromise: Promise | null = null; + private stopResolve: (() => void) | null = null; + + constructor(private services: Services) {} + + async start(): Promise { + this.stopPromise = new Promise(resolve => { this.stopResolve = resolve; }); + + while ((this.state as string) !== 'stopped') { + try { + if (this.state === 'reconnecting') { + try { + await this.services.broker.connect(); + this.state = 'active'; + this.reconnectBackoff = 1_000; + } catch { + await sleep(this.reconnectBackoff); + this.reconnectBackoff = Math.min(this.reconnectBackoff * 2, 30_000); + continue; + } + } + + // Apply inbound ops FIRST, regardless of state (prevents starvation) + for (const meshSlug of await this.getActiveMeshes()) { + await applyInbox(this.services, meshSlug); + } + + // Then drain outbound + let anyNetworkError = false; + for (const meshSlug of await this.getActiveMeshes()) { + const result = await drainOutbox(this.services, meshSlug); + if (result.networkError) { + anyNetworkError = true; + break; + } + } + + if (anyNetworkError) { + this.state = 'reconnecting'; + continue; + } + + // State transition: active → idle if nothing happened for 30s + const now = Date.now(); + const lastActivity = this.services.broker.lastActivityAt ?? 0; + if (this.state === 'active' && now - lastActivity > 30_000) { + this.state = 'idle'; + } + + await sleep(this.state === 'active' ? this.activeSleepMs : this.idleSleepMs); + } catch (err) { + this.services.logger.error('sync daemon loop error', { err }); + await sleep(1_000); + } + } + + this.stopResolve!(); + } + + /** Trigger immediate drain on local change. */ + onLocalChange(): void { + this.state = 'active'; + } + + /** Trigger immediate apply on incoming broker message. */ + onBrokerMessage(): void { + this.state = 'active'; + } + + /** Graceful shutdown. */ + async stop(): Promise { + this.state = 'stopped'; + if (this.stopPromise) await this.stopPromise; + await this.services.queue.stop(); + } +} +``` + +**State transitions**: +- `reconnecting` → `active` on successful connect +- `active` → `idle` after 30s of broker silence AND empty outbox +- `idle` → `active` on local change or broker message +- Any → `stopped` on `stop()` call + +**Critical properties**: +- Inbox is applied on every tick, regardless of state +- Outbox is drained on every tick (idle has a longer sleep) +- Network errors transition to `reconnecting` with backoff +- Stop is awaitable and drains in-flight ops + +--- + +## 13. Conflict resolution per tool family + +| Tool | Strategy | Tiebreaker | +|---|---|---| +| `memory` | LWW per `(mesh, peer, key)` | `(lamport, peer_id)` bytewise | +| `state_kv` | LWW per `(mesh, key)` | `(lamport, updated_by)` bytewise | +| `vectors` | Append-only per `(mesh, peer, key, model)` | Tombstone on delete, no conflict | +| `files` | LWW per `(mesh, peer, path)` | `(lamport, peer_id)` bytewise; content dedup by sha256 | +| `tasks` | First claim wins | `(lamport, peer_id)` bytewise; supersession events logged | +| `peers` | Last broker update wins | Cache only, no local writes | + +--- + +## 14. Offline behavior + +| Operation | Offline result | +|---|---| +| `remember` | Succeeds, enqueues outbox op | +| `recall` | Succeeds from local | +| `vector_store` | Succeeds, enqueues outbox op | +| `vector_search` | Succeeds from local vectors | +| `set_state` | Succeeds, enqueues outbox op | +| `get_state` | Succeeds from local | +| `share_file` | Succeeds, content to local blob store, metadata enqueues | +| `read_peer_file` | Returns `{ status: 'stale', content: last_known }` or `{ status: 'offline' }` if never synced | +| `list_peers` | Returns cached list with `stale: true` flag after 5 min | +| `send_message` | Returns `{ status: 'queued' }`, goes to outbox | +| `claim_task` | Tentative claim, reverts on reconnect if another peer won | +| `mesh_clock` | Returns `{ lamport, sync_state: 'offline', last_sync_at }` | +| `mesh_info` | Returns local metadata | + +--- + +## 15. Error recovery + +### 15.1 Corrupt database + +On startup: +```ts +const result = db.pragma('integrity_check', { simple: true }); +if (result !== 'ok') { + // Surface to user with `claudemesh doctor --repair` offer + // Repair: .dump → new db → re-import + // If repair fails: backup to data.db.corrupt- + init fresh +} +``` + +### 15.2 Stuck outbox + +Per-op retry limit is 10 (checked against the current DB value, not stale in-memory). When exhausted: +1. Set `mesh.sync_paused = 1` +2. Surface warning overlay in UI +3. `claudemesh doctor` shows the failing ops and offers `--retry` or `--drop` + +### 15.3 Diverged inbox + +If inbox has a gap in `(broker_epoch, broker_seq)`: +1. Request re-sync from the last known seq +2. If broker returns a new epoch, accept it (broker restarted) +3. If gap persists, mark mesh as "needs full resync" and re-download from snapshot + +### 15.4 Broker epoch change + +Detected when the broker ack includes a new `broker_epoch`. The CLI: +1. Updates `mesh.broker_epoch` in the DB +2. Continues with new epoch for all subsequent ops +3. Inbox dedupe still works because the unique constraint is `(mesh, epoch, seq)` + +### 15.5 Migration failure + +Migrations are transactional and atomic. If a migration fails mid-run: +- `_migrations` table is updated per migration's commit +- Restart retries from the last successful migration +- If a migration keeps failing, `claudemesh doctor --rollback-migration ` offers an escape + +--- + +## 16. Migration runner + +```ts +// services/store/migrations.ts + +interface Migration { + version: number; + name: string; + up: (db: Database) => void; +} + +const MIGRATIONS: Migration[] = [ + { + version: 1, + name: '001-initial', + up: (db) => { db.exec(readSqlFile('001-initial.sql')); }, + }, + { + version: 2, + name: '002-add-broker-epoch', + up: (db) => { db.exec(readSqlFile('002-add-broker-epoch.sql')); }, + }, + // ... +]; + +export function runMigrations(db: Database, queue: WriteQueue): Promise { + return queue.enqueue(() => { + db.exec('CREATE TABLE IF NOT EXISTS _migrations (version INTEGER PRIMARY KEY, applied_at INTEGER NOT NULL)'); + const applied = db.prepare('SELECT version FROM _migrations').all() as { version: number }[]; + const appliedVersions = new Set(applied.map(r => r.version)); + + for (const m of MIGRATIONS) { + if (appliedVersions.has(m.version)) continue; + db.transaction(() => { + m.up(db); + db.prepare('INSERT INTO _migrations (version, applied_at) VALUES (?, ?)').run(m.version, Date.now()); + })(); + } + }); +} +``` + +--- + +## 17. Bundle size accounting (honest) + +Per review: the 800 KB JS bundle target was optimistic. Honest targets: + +### 17.1 Per-platform distribution + +| Platform | Native addon size | JS bundle (gz) | Total install (decompressed) | +|---|---|---|---| +| macOS arm64 | ~2.8 MB | ~1.0 MB | ~8-10 MB | +| macOS x64 | ~2.9 MB | ~1.0 MB | ~8-10 MB | +| Linux x64 | ~3.2 MB | ~1.0 MB | ~9-11 MB | +| Linux arm64 | ~3.1 MB | ~1.0 MB | ~9-11 MB | +| Windows x64 | ~3.5 MB | ~1.0 MB | ~10-12 MB | + +**JS bundle target: ~1 MB gzipped** (not 800 KB). Realistic given Ink + React + Zod + citty + MCP SDK + all UI code. + +**Cold start target: 200-400 ms** (not 100 ms). `better-sqlite3` native addon load + SQLite init + connection pragmas takes 150-250 ms on modern hardware. Script evaluation adds another 50-150 ms. + +### 17.2 Cold start phases + +| Phase | Target | Notes | +|---|---|---| +| Node startup + script load | <50 ms | Bun or Node + ESM loader | +| better-sqlite3 native load | ~100-150 ms | One-time per process | +| sqlite-vec extension load | ~20-50 ms | One-time per connection | +| SQLite connection + PRAGMA | ~30-80 ms | Includes WAL checkpoint check | +| Migration check (cached) | <10 ms | Only runs if version mismatch | +| First meaningful output | **200-400 ms total** | Measured on Apple M2 Pro, 2026 | + +### 17.3 Optimization path + +If cold start exceeds 400 ms in practice: +- Defer non-critical service initialization (telemetry, update check) +- Use `bun` runtime as alternate distribution (Bun's native SQLite skips addon load) +- Lazy-load MCP tool registrations + +None of these are required for v1.0.0. + +--- + +## 18. Shutdown and drain protocol + +### 18.1 Signal handling + +```ts +// services/lifecycle/service-manager.ts + +export class ServiceManager { + private services: { + queue: WriteQueue; + daemon: SyncDaemon; + broker: BrokerClient; + }; + + async shutdown(): Promise { + // Order matters: + // 1. Stop accepting new work + await this.services.daemon.stop(); + // 2. Drain any queued writes + await this.services.queue.stop(); + // 3. Close broker connection + await this.services.broker.disconnect(); + // 4. GC sweep if time permits (best effort) + try { + await Promise.race([ + gcBlobs(this.services.db, this.services.queue, BLOBS_DIR), + sleep(2_000), + ]); + } catch {} + // 5. Checkpoint WAL + this.services.db.pragma('wal_checkpoint(TRUNCATE)'); + // 6. Close DB + this.services.db.close(); + } +} + +// Entrypoint wiring: +process.on('SIGINT', async () => { + await serviceManager.shutdown(); + process.exit(0); +}); +process.on('SIGTERM', async () => { + await serviceManager.shutdown(); + process.exit(0); +}); +``` + +### 18.2 Timeout + +Shutdown has a 10-second hard timeout. If services don't stop cleanly within that window, `process.exit(1)` is called and the user sees a warning on next launch: + +``` +~ Previous session didn't shut down cleanly. Running integrity check… +``` + +The integrity check verifies the database is uncorrupted. If WAL replay succeeds, the warning is cleared. + +--- + +## 19. Testing strategy + +### 19.1 Unit tests + +Every module in `services/store/` has a colocated `*.test.ts` with 100% coverage. Uses `better-sqlite3` with `:memory:` database. + +Required unit tests: +- `tickLamport` concurrency: 100 simultaneous calls, assert monotonic output with no gaps or duplicates +- `upsertMemory` conflict resolution: interleave writes with different lamports, assert winner is correct per tuple comparison +- `applyInboxClaim` all 4 branches: completed, cancelled, open, same-peer reclaim, different-peer conflict +- `WriteQueue` async op handling: enqueue async function, assert `await enqueue(...)` resolves after the Promise settles +- `ensureVecTable` race: two simulated processes race to create the same fingerprint, assert only one vec table is created +- Path validation: 50+ positive and negative cases + +### 19.2 Integration tests + +`tests/integration/store/` runs against a real staging broker + real file system. Covers: +- Full sync protocol end-to-end +- Conflict resolution between two simulated peers with clock skew +- Publish upgrade transaction with backfill +- Offline → reconnect → converge with 1000 pending ops +- Task claim race with explicit reconciliation +- Broker epoch change mid-session + +### 19.3 Fuzz tests + +`tests/fuzz/store/` generates random op sequences and verifies invariants: +- Lamport is monotonic within a peer +- Conflict resolution is deterministic (same input → same output across runs) +- Outbox + inbox round-trip produces identical state on both peers +- Blob refcount never goes negative +- No orphaned blobs after GC sweep + +Fuzz budget: 100,000 random operations per CI run. + +### 19.4 Benchmarks + +`tests/bench/store/` tracks regression: +- Memory insert latency p50/p99 +- Vector search latency +- Transaction throughput under single-writer contention +- Cold start +- Bundle size (fail if >20% regression) + +--- + +## 20. Operational concerns + +### 20.1 Backups + +`claudemesh doctor --backup` produces a clean snapshot via SQLite's `BACKUP` API. Users can also manually copy `data.db` + `data.db-wal` + `data.db-shm` if the process is stopped. + +### 20.2 Export + +`claudemesh advanced export --format jsonl` dumps all mesh data to JSONL for debugging or manual migration. + +### 20.3 Import + +`claudemesh advanced import ` is NOT implemented in v1.0.0. Importing rows with arbitrary lamports would break invariant §4.1. Deferred to v1.1 with a proper re-stamping pass. + +### 20.4 Metrics + +Local metrics log to `~/.claudemesh/logs/metrics.jsonl`: +- Operation counts and latencies per tool +- Sync lag (local lamport vs last applied inbox lamport) +- Error rates by category +- Cold start time per launch + +Read by `claudemesh doctor` for diagnosis. Never transmitted externally (even if telemetry is opted in). + +--- + +## 21. Open questions deferred to v1.1+ + +1. **Hybrid logical clocks** — if field experience shows Lamport is insufficient for certain workloads +2. **Selective sync** — allow users to exclude certain meshes or tables from sync +3. **Row-level encryption** — even the broker can't read content +4. **CRDT structures** — if append-only patterns dominate, move memory/state to Automerge-style +5. **Multi-machine personal mesh sync** — server-side encrypted storage of personal meshes +6. **SQLite encryption at rest** — SQLCipher adds ~4 MB; consider as `claudemesh-cli-sqlcipher` alternate distribution +7. **Time-series queries on memory** — "what did I remember 3 days ago" requires additional indexing +8. **Incremental vector re-embedding** — current flow is one big expensive operation +9. **Import support** — with safe re-stamping + +--- + +**End of spec.** diff --git a/.artifacts/specs/2026-04-10-cli-v2-pass2-shared-infrastructure.md b/.artifacts/specs/2026-04-10-cli-v2-pass2-shared-infrastructure.md new file mode 100644 index 0000000..b73edd9 --- /dev/null +++ b/.artifacts/specs/2026-04-10-cli-v2-pass2-shared-infrastructure.md @@ -0,0 +1,1481 @@ +# claudemesh-cli v2 Pass 2 — Shared infrastructure (broker-backed services) + +> ⚠️ **This document describes v2 Pass 2 broker hardening — NOT the Pass 1 scope.** +> +> For the v2 Pass 1 implementation target, see **`2026-04-11-cli-v2-pass1.md`**. +> +> Pass 1 keeps the broker **exactly as it is today in v1**. No role-per-mesh Postgres isolation, no MCP catalog tiering, no egress-controlled Docker networks, no SSRF policy for URL watch, no RBAC matrix rewrite, no catalog audit process, no vault mount_path validation. The broker's existing behavior is preserved; v2 Pass 1 only changes the CLI side. +> +> The existing v1 broker features (Postgres schemas, Neo4j databases, Qdrant collections, MinIO buckets, Docker MCP sandboxes, vault, URL watch, Telegram bridge) keep working unchanged. The security hardenings described in this document are desirable improvements for future broker releases, not v2 Pass 1 gates. +> +> This document is retained as reference for future Pass 2 broker hardening work. + +**Status:** Pass 2 future reference — NOT the Pass 1 implementation target +**Created:** 2026-04-10 +**Companion to:** `2026-04-10-cli-v2-final-vision.md` and `2026-04-10-cli-v2-local-first-storage.md` +**Purpose:** Specifies the broker-backed shared services that the CLI surfaces as mesh tools: shared SQL (Postgres), graph database (Neo4j), vector search (Qdrant), object storage (MinIO), MCP registry (two tiers), URL watch, vault, and the default bundled MCP catalog. Establishes the hybrid architecture where per-peer data is local-first SQLite while shared-mesh data lives on broker-hosted backends. + +All of this is **already implemented in v1** (`apps/cli/src/mcp/tools.ts`, `apps/broker/src/*`). This spec documents the v1 behavior, locks it into the v2 architecture, and defines the isolation and multi-tenancy model. + +--- + +## Table of contents + +1. The hybrid architecture +2. Shared-infrastructure inventory +3. Per-mesh isolation models +4. Shared SQL (Postgres) +5. Graph database (Neo4j) +6. Vector search (Qdrant) +7. Object storage (MinIO) +8. MCP registry — tier 1: peer-hosted +9. MCP registry — tier 2: broker-deployed +10. Vault (encrypted credentials) +11. URL watch +12. Default bundled MCP catalog +13. Broker deployment requirements +14. Security model +15. Tool surface summary +16. Migration from v1 + +--- + +## 1. The hybrid architecture + +v2 is **local-first for per-peer data** and **broker-backed for shared-mesh data**. This is what v1 already does, now explicit: + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Claude Code │ +└───────────────────────────┬─────────────────────────────────────┘ + │ stdio MCP protocol +┌───────────────────────────▼─────────────────────────────────────┐ +│ claudemesh-cli (per-peer process) │ +│ │ +│ ┌─────────────────────────┐ ┌─────────────────────────┐ │ +│ │ LOCAL (SQLite) │ │ REMOTE (broker WS) │ │ +│ │ source of truth for │ │ gateway to shared │ │ +│ │ per-peer data: │ │ services: │ │ +│ │ │ │ │ │ +│ │ • memory │ │ • mesh_query (SQL) │ │ +│ │ • state_kv (local ptr) │ │ • graph_query (Cypher) │ │ +│ │ • personal files │ │ • vector_search │ │ +│ │ • task claims │ │ • mesh_tool_call │ │ +│ │ • outbox / inbox │ │ • mesh_watch │ │ +│ │ • peer cache │ │ • vault_set │ │ +│ └─────────────────────────┘ └──────────┬──────────────┘ │ +└──────────────────────────────────────────────┼──────────────────┘ + │ WebSocket +┌──────────────────────────────────────────────▼──────────────────┐ +│ Broker (per-mesh gateway) │ +│ │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────────┐ │ +│ │ Postgres │ │ Neo4j │ │ Qdrant │ │ MinIO │ │ +│ │ per-mesh │ │ per-mesh │ │ per-mesh │ │ per-mesh │ │ +│ │ schema │ │ DB │ │collection│ │ bucket │ │ +│ └──────────┘ └──────────┘ └──────────┘ └──────────────┘ │ +│ │ +│ ┌───────────────────────────────────────────────────────┐ │ +│ │ MCP runtime sandbox (Docker per deployed server) │ │ +│ └───────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### Which side owns what + +| Concern | Owner | Rationale | +|---|---|---| +| `memory` | Local SQLite | Per-peer knowledge; local-first for offline | +| Local state keys | Local SQLite | Fast reads, sync via outbox | +| Shared `state_kv` snapshot | Local SQLite | Synced from broker, readable offline | +| Personal files (`files`) | Local blobs + MinIO mirror | Content-addressed local; upload to MinIO on share | +| Tasks (claims + status) | Local SQLite + sync | Offline claim with reconciliation | +| **Shared SQL tables** | **Broker Postgres** | Cross-peer SQL requires central DB | +| **Shared graph** | **Broker Neo4j** | Cross-peer Cypher requires central DB | +| **Cross-peer vector search** | **Broker Qdrant** | Shared index across peers | +| **Large file object store** | **Broker MinIO** | Per-mesh bucket, ephemeral vs persistent paths | +| **Shared MCP tool calls** | **Broker (routes or hosts)** | Tool lives on peer or in broker sandbox | +| **URL watch polling** | **Broker** | Central poller, push notifications to peers | +| **Vault credentials** | **Broker** (encrypted) | Injected into deployed MCPs at runtime | + +### The rule + +**If a feature requires reading another peer's data, it's broker-backed.** If it only needs your own data, it's local. This is the clean boundary. + +--- + +## 2. Shared-infrastructure inventory + +Confirmed present in v1 code: + +| Service | v1 file | v1 client lib | +|---|---|---| +| Postgres (shared SQL) | `apps/broker/src/broker.ts` (embedded) | `pg` (node-postgres) | +| Neo4j (graph) | `apps/broker/src/neo4j-client.ts` | `neo4j-driver` | +| Qdrant (vectors) | `apps/broker/src/qdrant.ts` | `@qdrant/js-client-rest` | +| MinIO (files) | `apps/broker/src/minio.ts` | `minio` | +| MCP runtime (Docker) | `apps/broker/src/broker.ts` (sandbox spawner) | Docker socket / `dockerode` | +| Vault (encrypted creds) | `apps/broker/src/broker.ts` | AES-GCM + KMS key | +| URL watch | `apps/broker/src/broker.ts` | Node fetch + scheduler | + +v2 keeps all seven services and ports them into the new architecture without redesign. The broker continues to run them; the v2 CLI consumes them through WebSocket calls routed through `services/broker/ws-client.ts`. + +--- + +## 3. Per-mesh isolation models + +Each backend uses a different isolation strategy. This is intentional — each tool's semantics match a different model. + +| Backend | Isolation strategy | Naming scheme | Multi-tenancy options | +|---|---|---|---| +| **Postgres** | Schema-per-mesh | `mesh_` schema | Default: one schema per mesh. Optional: single-owner mode (user's own schema), or Row-Level Security for fine-grained cross-mesh sharing. | +| **Neo4j** | Database-per-mesh | `mesh_` database | Enterprise: multi-database. Community: single default DB with label-based filtering (`mesh_id` label on every node). | +| **Qdrant** | Collection-per-mesh (per-collection) | `mesh__` | Single Qdrant instance, collection-level ACLs via broker. | +| **MinIO** | Bucket-per-mesh | `mesh-` | Single MinIO cluster, IAM policies per bucket. | +| **MCP sandboxes** | Container-per-deployment | `cm-mcp--` | Docker network isolation, read-only filesystem by default, network allowlist. | +| **Vault** | Row-per-peer | `vault(mesh_id, peer_id, key)` | AES-GCM with per-mesh wrapping key. | +| **URL watches** | Row-per-watch | `watches(mesh_id, peer_id, watch_id)` | Broker-level rate limiting per peer. | + +### The RBAC layer + +Every broker-backed operation goes through a common authorization check: + +```ts +// apps/broker/src/authz.ts +export async function checkAccess( + user: User, + meshId: string, + resource: Resource, + action: Action, +): Promise { + // 1. Is the user a member of the mesh? + const membership = await getMembership(user.id, meshId); + if (!membership) return { allowed: false, reason: 'not_a_member' }; + + // 2. Does the user's role include this action? + const role = membership.role; // 'owner' | 'admin' | 'member' | 'guest' + if (!roleAllows(role, resource, action)) { + return { allowed: false, reason: 'insufficient_role' }; + } + + // 3. Does the resource's scope include the user? + if (resource.scope === 'peer' && resource.owner_id !== user.id) { + return { allowed: false, reason: 'not_resource_owner' }; + } + + return { allowed: true }; +} +``` + +Role capabilities (complete matrix): + +| Action | guest | member | admin | owner | +|---|---|---|---|---| +| **SQL** | | | | | +| `mesh_query` (read) | ✓ (read-only) | ✓ | ✓ | ✓ | +| `mesh_execute` (write/DDL) | — | ✓ | ✓ | ✓ | +| `mesh_schema` | ✓ | ✓ | ✓ | ✓ | +| **Graph** | | | | | +| `graph_query` | ✓ (read-only) | ✓ | ✓ | ✓ | +| `graph_execute` | — | ✓ | ✓ | ✓ | +| **Vectors** | | | | | +| `vector_search scope=self` | ✓ | ✓ | ✓ | ✓ | +| `vector_search scope=all` | — | ✓ | ✓ | ✓ | +| `vector_search scope={peer}` | — | own peer only | ✓ | ✓ | +| `vector_store` | ✓ (own namespace) | ✓ | ✓ | ✓ | +| `vector_delete` | own only | own only | any | any | +| **Files** | | | | | +| `share_file` | ✓ | ✓ | ✓ | ✓ | +| `get_file` (download) | own + shared-with-self | own + shared-with-self + mesh-wide | any | any | +| `grant_file_access` (re-share) | — | own files only | any file | any file | +| `revoke_file_access` | — | own files only | any file | any file | +| `delete_file` | own only | own only | any | any | +| **MCP registry tier 1 (peer-hosted)** | | | | | +| `mesh_mcp_register` | — | ✓ | ✓ | ✓ | +| `mesh_mcp_list` | ✓ | ✓ | ✓ | ✓ | +| `mesh_mcp_remove` | — | own only | any | any | +| `mesh_tool_call` | ✓ (subject to scope) | ✓ | ✓ | ✓ | +| **MCP registry tier 2 (broker-deployed)** | | | | | +| `mesh_mcp_deploy scope=peer` | — | ✓ (own peer only) | ✓ | ✓ | +| `mesh_mcp_deploy scope=mesh` | — | — | ✓ | ✓ | +| `mesh_mcp_deploy scope=group` | — | — | ✓ | ✓ | +| `mesh_mcp_scope widen` (peer→mesh/group) | — | — | ✓ | ✓ | +| `mesh_mcp_scope narrow` | — | own deployments only | any | any | +| `mesh_mcp_undeploy` | — | own only | any | any | +| `mesh_mcp_logs` | — | own deployments only | any | any | +| `mesh_mcp_update` | — | own only | any | any | +| `mesh_mcp_catalog` | ✓ | ✓ | ✓ | ✓ | +| **Vault** | | | | | +| `vault_set` (own) | — | ✓ | ✓ | ✓ | +| `vault_list` (own metadata) | — | ✓ | ✓ | ✓ | +| `vault_delete` (own) | — | ✓ | ✓ | ✓ | +| `vault_read` (by deployed MCP) | — | — | — | — (broker-only, injected at container start) | +| **URL watch** | | | | | +| `mesh_watch` create | ✓ | ✓ | ✓ | ✓ | +| `mesh_unwatch` (own) | ✓ | ✓ | ✓ | ✓ | +| `mesh_unwatch` (any) | — | — | ✓ | ✓ | +| `mesh_watches` list | own only | own only | all | all | +| **Mesh lifecycle** | | | | | +| `mesh_rename` | — | — | ✓ | ✓ | +| `mesh_delete` | — | — | — | ✓ | +| `set_role` on another peer | — | — | ✓ (member↔guest) | ✓ (any transition) | +| **Catalog control** | | | | | +| `catalog enable tier=extended` | — | — | ✓ | ✓ | + +**Key principles**: + +1. **Guests are read-mostly** but can create vectors in their own namespace and watch URLs. They cannot write to shared SQL/graph, cannot deploy MCPs, cannot re-share files. +2. **Members can do everything in their own scope** (own files, own vectors, own vault, own tier-1 MCP registrations, own tier-2 peer-scoped deployments). They cannot widen scope to `mesh` or manage other peers' resources. +3. **Admins can manage mesh-wide resources** — scope changes, deployments affecting other members, role transitions for guests/members. They cannot delete the mesh or change the owner's role. +4. **Owners have full control** including mesh deletion and role reassignment. There is exactly one owner per mesh at any time; ownership transfer is a two-step process (invite new owner → current owner steps down). +5. **Vault read is broker-only** — no tool exposes the raw secret value. Deployed MCPs receive secrets via container env var injection at startup, scoped to the deployer's vault. +6. **MCP scope escalation path** (peer → mesh) requires admin role at the moment of escalation. A member cannot deploy as peer and then escalate themselves; an admin must approve the scope change. + +Roles are assigned at invite time or via `claudemesh advanced set-role` (admin+ required). + +--- + +## 4. Shared SQL (Postgres) + +### 4.1 Overview + +Each mesh has its own Postgres schema in the broker's cluster. Peers can run DDL (CREATE TABLE) and DML (INSERT/UPDATE/DELETE/SELECT) inside that schema. Cross-mesh access is impossible because the schema is the isolation boundary. + +### 4.2 Tools + +``` +mesh_query(sql) → SELECT-only, returns rows +mesh_execute(sql) → DDL + DML, returns affected rows +mesh_schema() → Lists tables and columns in this mesh's schema +``` + +Inputs are raw SQL strings. The broker parses them (via `pg-parser` or similar) to: +1. Reject queries that touch `pg_catalog`, `information_schema` beyond the mesh's scope, or other schemas +2. **Cross-schema qualified references**: the parser walks the AST and rejects any `TableRef` whose schema is not the caller's mesh schema (catches `SELECT * FROM "mesh_other".bugs`) +3. **File / system access**: `COPY ... FROM PROGRAM`, `COPY ... FROM '/path'`, `COPY ... TO PROGRAM`, `pg_read_file`, `pg_read_binary_file`, `pg_ls_dir`, `lo_import`, `lo_export` +4. **Cross-database access**: `dblink_connect`, `dblink`, any `postgres_fdw` operations +5. **Schema / extension management**: `CREATE SCHEMA`, `DROP SCHEMA`, `ALTER SCHEMA`, `CREATE EXTENSION`, `DROP EXTENSION` +6. **Role management**: `CREATE USER`, `CREATE ROLE`, `ALTER ROLE`, `DROP ROLE`, `GRANT`, `REVOKE` +7. **System catalog access** beyond a minimal allowlist (`pg_tables`, `pg_views`, `pg_indexes` scoped to the mesh's own schema) +8. **Volatile privilege functions**: `pg_backend_pid`, `pg_signal_backend`, `pg_terminate_backend`, `current_setting`/`set_config` with sensitive keys + +**The parser is secondary defense**. Primary isolation is enforced by dedicated Postgres roles per mesh (see §4.3), which means even if the parser misses a pattern, role-based access control prevents cross-mesh reads at the Postgres layer. + +### 4.3 Schema lifecycle — role-per-mesh isolation + +**`search_path` alone is NOT a security boundary.** `SET search_path` only affects unqualified name resolution; a malicious query can still write `SELECT * FROM "mesh_other".bugs` and bypass the default. Isolation is enforced via **dedicated Postgres roles per mesh**. + +```sql +-- On mesh creation +CREATE ROLE "mesh__role" WITH LOGIN NOINHERIT; +CREATE SCHEMA "mesh_" AUTHORIZATION "mesh__role"; + +-- Revoke public defaults that would allow cross-schema reads +REVOKE ALL ON SCHEMA "mesh_" FROM PUBLIC; +REVOKE ALL ON DATABASE claudemesh_shared FROM PUBLIC; +REVOKE ALL ON SCHEMA pg_catalog FROM "mesh__role"; +REVOKE ALL ON SCHEMA information_schema FROM "mesh__role"; + +-- Grant only to the mesh's own role +GRANT USAGE, CREATE ON SCHEMA "mesh_" TO "mesh__role"; +ALTER DEFAULT PRIVILEGES FOR ROLE "mesh__role" IN SCHEMA "mesh_" + GRANT ALL ON TABLES TO "mesh__role"; +ALTER DEFAULT PRIVILEGES FOR ROLE "mesh__role" IN SCHEMA "mesh_" + GRANT ALL ON SEQUENCES TO "mesh__role"; + +-- Explicitly revoke dangerous function access +REVOKE EXECUTE ON FUNCTION pg_read_file(text) FROM "mesh__role"; +REVOKE EXECUTE ON FUNCTION pg_read_binary_file(text) FROM "mesh__role"; +REVOKE EXECUTE ON FUNCTION lo_import(text) FROM "mesh__role"; +REVOKE EXECUTE ON FUNCTION lo_export(oid, text) FROM "mesh__role"; + +-- On mesh deletion +DROP SCHEMA "mesh_" CASCADE; +DROP ROLE "mesh__role"; +``` + +The broker holds **one pool per mesh role** via PgBouncer (see §4.5) and dispatches queries to the correct pool. Even if a malicious query bypasses the parser, the Postgres role has zero privileges outside its own schema, so cross-mesh reads are denied at the database layer. + +```ts +async function meshQuery(meshSlug: string, sql: string): Promise { + // Pool is pre-configured with role "mesh__role" — primary isolation + const pool = await getPoolForMesh(meshSlug); + const client = await pool.connect(); + try { + // Secondary defense: unqualified names default to mesh schema + await client.query(`SET search_path TO "mesh_${meshSlug}"`); + const result = await client.query(sql); + return result.rows; + } finally { + await client.query('RESET search_path'); + client.release(); + } +} +``` + +### 4.4 Connection pooling via PgBouncer (mandatory) + +**Scaling problem**: 10 connections per mesh × 1000 meshes = 10,000 Postgres connections. Default `max_connections` is ~100. Direct connection-per-mesh does not scale. + +**Mandatory architecture**: + +``` +broker → PgBouncer (transaction mode) → Postgres + │ + └─ one logical pool per mesh role + max 10 connections per pool + shared underlying Postgres connections via multiplexing +``` + +Reference configuration in `apps/broker/pgbouncer.ini`: + +```ini +[databases] +* = host=postgres port=5432 + +[pgbouncer] +pool_mode = transaction +max_client_conn = 10000 +default_pool_size = 10 +reserve_pool_size = 2 +server_reset_query = DISCARD ALL +``` + +**`server_reset_query = DISCARD ALL` is mandatory** — it resets `search_path`, session variables, temporary tables, and prepared statements between transactions to prevent state leakage across meshes reusing the same underlying Postgres connection. + +### 4.5 Tool call flow + +``` +Claude Code → claudemesh-cli MCP server → mesh_query(sql) + ↓ + services/broker/facade.ts + ↓ WS + broker: checkAccess → switch search_path → execute → return rows + ↑ WS + services/broker/facade.ts → MCP response + ↑ +Claude Code ← rows +``` + +### 4.5 Multi-tenancy option: Row-Level Security + +For meshes that want cross-peer row isolation (e.g. "each peer sees only their own bug reports"), RLS can be enabled: + +```sql +ALTER TABLE "mesh_".bugs ENABLE ROW LEVEL SECURITY; +CREATE POLICY peer_isolation ON "mesh_".bugs + FOR ALL + USING (peer_id = current_setting('claudemesh.peer_id')::text); +``` + +The broker sets `claudemesh.peer_id` as a session variable before executing queries. Opt-in via `mesh_execute("SET claudemesh.rls_enabled = true")` — a mesh-level flag that future table creations use. + +### 4.6 Single-owner mode + +For personal meshes that become shared, the default schema ownership is the mesh owner (the creator). Other members have read access by default; write access is granted via role. + +### 4.7 Resource limits + +Per-mesh Postgres limits: +- **Max connections**: 10 (pooled through broker) +- **Max query time**: 30 seconds (`statement_timeout`) +- **Max schema size**: 1 GB (soft limit; warns at 80%, blocks writes at 100%) +- **Max tables per mesh**: 100 + +Limits are enforced at the broker level, not Postgres-native. + +--- + +## 5. Graph database (Neo4j) + +### 5.1 Overview + +Each mesh has either: +- A dedicated Neo4j database (Enterprise edition), OR +- A shared default database with `mesh_id` label filtering (Community edition) + +### 5.2 Tools + +``` +graph_query(cypher) → Read-only MATCH +graph_execute(cypher) → Write CREATE, MERGE, DELETE +``` + +### 5.3 Enterprise mode + +```cypher +// On mesh creation +CREATE DATABASE mesh_ IF NOT EXISTS; + +// On mesh deletion +DROP DATABASE mesh_ IF EXISTS; +``` + +The broker opens a session against the mesh-specific database: + +```ts +const session = neo4jDriver.session({ database: meshDbName(meshSlug) }); +``` + +### 5.4 Community mode — `graph_*` tools refused + +Neo4j Community edition does not support multi-database isolation. Label-based filtering (Cypher AST rewriting to inject `mesh_id` labels) has known bypass patterns via APOC procedures, `CALL { ... }` subqueries, and future syntax that the rewriter can't anticipate. This is NOT a production-grade security boundary. + +**In Community mode, the broker refuses `graph_query` and `graph_execute` tools entirely**, returning a clear error: + +``` + Graph tools (graph_query, graph_execute) require Neo4j Enterprise edition. + This broker is running Neo4j Community, which does not support multi-mesh isolation. + Contact your administrator to upgrade. +``` + +The broker detects the edition at startup via Neo4j's `CALL dbms.components()` RPC and sets a feature flag. Community mode is valid for **personal meshes and development** (where the user owns all data and isolation isn't a concern), but the `graph_*` tools are disabled whenever a mesh has >1 peer OR is `shared_owner`/`shared_guest`. + +**Enterprise is required for any shared mesh with graph features.** The reference Docker Compose (§13) documents this explicitly and defaults to Community (safer default — fail closed, not open). + +### 5.5 Resource limits + +- **Max query time**: 30 seconds +- **Max nodes per mesh**: 100,000 (soft limit) +- **Max relationships per mesh**: 500,000 + +--- + +## 6. Vector search (Qdrant) + +### 6.1 Overview + +Each mesh has one or more named collections in Qdrant, prefixed with the mesh ID. Collections are created on first insert. + +### 6.2 Tools + +``` +vector_store(collection, text, metadata?) → embed + upsert +vector_search(collection, query, limit?) → embed query + nearest neighbors +vector_delete(collection, id) → delete by ID +list_collections() → list this mesh's collections +``` + +### 6.3 Collection naming and creation + +From v1's `qdrant.ts`: + +```ts +export function meshCollectionName(meshId: string, collection: string): string { + return `mesh_${meshId}_${collection}`.toLowerCase().replace(/[^a-z0-9_]/g, "_"); +} + +export async function ensureCollection(name: string, vectorSize = 1536): Promise { + try { + await qdrant.getCollection(name); + } catch { + await qdrant.createCollection(name, { + vectors: { size: vectorSize, distance: "Cosine" }, + }); + } +} +``` + +Default vector size is 1536 (OpenAI `text-embedding-3-small` or `ada-002`). v2 extends this with explicit model fingerprinting (see below) so peers using different embedding models don't corrupt each other's index. + +### 6.4 Embedding provider + +The broker runs an embedding service as part of the `vector_store` flow: + +1. Peer calls `vector_store(collection, text)` +2. Broker receives the text + the peer's embedding-model preference (stored per-mesh in the mesh config) +3. Broker calls the embedding provider (OpenAI, Voyage, local sentence-transformers, etc.) +4. Broker upserts the vector into Qdrant with metadata `{ peer_id, text, model_id, timestamp }` +5. Returns the vector ID to the peer + +Embedding model is per-mesh, not per-peer, to ensure search results are comparable across peers. Set via `claudemesh advanced set-embedding-model :` (mesh admin only). + +### 6.5 Cross-peer search with explicit scope + +``` +vector_search(collection, query, { scope: "self" | "all" | { peer: }, limit: 10 }) +``` + +**Scope is mandatory** — the caller must specify whether they want their own vectors, all peers' vectors, or a specific peer's vectors. There is no default "search everything" mode because that silently leaks vectors from other peers. + +- `scope: "self"` — Qdrant filter `peer_id == self.peer_id`. Private search across the caller's own vectors. +- `scope: "all"` — Qdrant filter `peer_id IN mesh.members`. Cross-peer search with `peer_id` in results. +- `scope: { peer: "alice" }` — Qdrant filter `peer_id == "alice"`. Read another peer's specific vectors (requires the caller's role to allow it). + +The scope filter is applied **server-side** in Qdrant via the collection's metadata filter, not client-side after the response. A malicious caller cannot bypass scope by editing the filter client-side. + +Results always include `peer_id` metadata so the caller knows who contributed each result, even under `scope: "self"` (for audit). + +### 6.6 Resource limits + +- **Max collections per mesh**: 20 +- **Max vectors per collection**: 100,000 +- **Max vector dimension**: 4096 + +--- + +## 7. Object storage (MinIO) + +### 7.1 Overview + +Each mesh has a dedicated MinIO bucket. Files uploaded via `share_file` land there. Small files (< 64 KB) still go through the local SQLite blob store; large files go to MinIO. + +### 7.2 Bucket naming and creation + +From v1's `minio.ts`: + +```ts +export function meshBucketName(meshId: string): string { + return `mesh-${meshId.toLowerCase().replace(/[^a-z0-9-]/g, "-")}`; +} + +export async function ensureBucket(name: string): Promise { + const exists = await minioClient.bucketExists(name); + if (!exists) await minioClient.makeBucket(name); +} +``` + +### 7.3 Key paths + +Two categories: +- **Persistent**: `shared/{fileId}/{originalName}` — survives until explicit delete +- **Ephemeral**: `ephemeral/{YYYY-MM-DD}/{fileId}/{originalName}` — auto-deleted after 7 days + +The CLI's `share_file` tool takes a `persistence` flag (default: `persistent`). Ephemeral files are for temporary artifacts (screenshots, test output, pasted snippets) that don't need long-term storage. + +### 7.4 E2E encryption with per-mesh long-term keys + +Files shared to a specific peer (`share_file(to: "jordan")`) are end-to-end encrypted using each peer's **per-mesh long-term keypair** (not the ephemeral session key — session keys rotate, and a re-wrap operation after rotation would be expensive). + +Every peer maintains two keys per mesh: +- **Session keypair** — rotates per session, used for transient messages (crypto_box) +- **Long-term keypair** — stable per `(mesh, peer)` pair, used for file encryption and vault envelopes + +The long-term key is generated at first mesh join and persisted at `~/.claudemesh/keys/.key` (0600). It's registered with the broker in the mesh member list so senders can look it up. + +Flow: +1. Sender generates a random symmetric key (32 bytes, AES-256-GCM) +2. Sender encrypts the file content with the symmetric key +3. Sender looks up the recipient's **long-term public key** from the mesh member list +4. Sender wraps the symmetric key with the recipient's long-term public key (crypto_box or sealed-box) +5. Wrapped key + file ciphertext uploaded to MinIO +6. Recipient downloads, unwraps the key with their long-term private key, decrypts the content + +The broker cannot read the content. `grant_file_access` adds another recipient by re-wrapping the symmetric key with the new recipient's long-term public key. + +### 7.5 Download URLs (chunked for large files) + +The broker returns presigned URLs from MinIO. Two modes: + +- **Small files (< 10 MB)**: single presigned URL, 10-minute expiry, one request +- **Large files (>= 10 MB)**: multipart download with per-chunk presigned URLs, each chunk up to 10 MB, 60-minute expiry total. The broker returns a list of URLs `{ chunks: [{ url, range: "0-10485759" }, ...] }`. The CLI downloads chunks in sequence (or in parallel for faster total throughput) and concatenates them. + +For encrypted files, the URL delivers the ciphertext; the recipient decrypts locally after downloading all chunks. + +**Resume on interruption**: if a chunk download fails, the CLI re-requests a new presigned URL for just that chunk. The broker regenerates the URL with a fresh expiry. Total download attempts capped at 3 per chunk. + +### 7.6 Resource limits + +- **Max file size**: 100 MB +- **Max total storage per mesh**: 10 GB (soft limit) +- **Ephemeral file retention**: 7 days +- **Persistent file retention**: until explicit delete + +--- + +## 8. MCP registry — tier 1: peer-hosted + +### 8.1 Overview + +A peer can register their **local** MCP server (e.g. their personal Postgres connector, their internal API wrapper) with the mesh. Other peers discover it and call it via `mesh_tool_call`. The call is routed through the broker to the hosting peer, executed locally by the hosting peer's CLI, and the result is returned. + +**Credentials never leave the hosting peer's machine.** The hosting peer's MCP server sees the real secrets; the broker only forwards requests and responses. + +### 8.2 Tools + +``` +mesh_mcp_register(server_name, description, tools) → announce +mesh_mcp_list() → discover +mesh_tool_call(server_name, tool_name, args) → invoke +mesh_mcp_remove(server_name) → unregister +``` + +### 8.3 Registration + +```ts +mesh_mcp_register({ + server_name: "postgres-prod", + description: "Production postgres connector", + tools: [ + { name: "query", description: "Run SELECT", inputSchema: {...} }, + { name: "tables", description: "List tables", inputSchema: {...} }, + ], + persistent: true, +}); +``` + +`persistent: true` means other peers see the registration even when the hosting peer is offline. The hosting peer's status is shown as "offline" in `mesh_mcp_list` but the entry itself persists. + +### 8.4 Tool call routing + +``` +Peer A (caller) Broker Peer B (host) + │ │ │ + ├─ mesh_tool_call ───────────────►│ │ + │ server_name: "postgres" │ │ + │ tool_name: "query" │ │ + │ args: { sql: "..." } │ │ + │ ├─ mcp_invoke ──────────────►│ + │ │ routed via WS │ + │ │ ├─ execute locally + │ │◄── mcp_result ─────────────┤ + │◄── mesh_tool_call_result ───────┤ │ + │ { result: [...] } │ │ +``` + +Timeout: 30 seconds. If the hosting peer doesn't respond in time, the caller gets a `{ status: 'timeout' }` error. + +### 8.5 Rate limiting + +Per-caller-per-host: 100 requests/minute. Per-mesh total: 1000 requests/minute. Enforced at the broker. + +### 8.6 Use cases + +- **Database access**: Peer B has credentials for the prod DB; Peer A queries it without ever seeing the credentials +- **Internal APIs**: Peer B's company-internal API is firewall-bound; Peer A calls it through B's machine as a proxy +- **GPU-accelerated tools**: Peer B has a local GPU; Peer A runs inference on B's machine + +--- + +## 9. MCP registry — tier 2: broker-deployed + +### 9.1 Overview + +Distinct from tier 1, the broker can **host** MCP servers directly in sandboxed containers. A peer uploads (or references) an MCP server package, and the broker runs it as a long-lived process on the VPS. Other peers call its tools via `mesh_tool_call` (same call path as tier 1, but the "host" is the broker itself). + +**This is how the marketing page's headline feature works**: `mesh_mcp_deploy("postgres-prod")` runs the actual MCP server on the broker, so the credentials are in the broker's vault (not the uploader's machine), and the server stays up even when the uploader is offline. + +### 9.2 Tools + +``` +mesh_mcp_deploy(server_name, { file_id | git_url | npx_package }, env, runtime, scope, ...) +mesh_mcp_undeploy(server_name) +mesh_mcp_update(server_name) → pull latest + restart +mesh_mcp_logs(server_name, lines) → tail recent logs +mesh_mcp_scope(server_name, scope?) → get/set visibility +mesh_mcp_schema(server_name, tool?) → inspect tool definitions +mesh_mcp_catalog() → list all deployed services in the mesh +``` + +### 9.3 Deployment sources + +Three ways to provide the MCP server code: + +1. **File upload**: `mesh_mcp_deploy({ file_id: "..." })` — the `file_id` comes from `share_file` with a `.zip` or `.tar.gz` archive +2. **Git clone**: `mesh_mcp_deploy({ git_url: "https://github.com/...", git_branch: "main" })` +3. **npm package**: `mesh_mcp_deploy({ npx_package: "@upstash/context7-mcp" })` + +### 9.4 Runtime sandbox + +Each deployed MCP server runs in a Docker container with strict limits + writable working directory + egress-controlled network: + +```ts +// apps/broker/src/mcp-runtime.ts (excerpt) +const containerConfig = { + Image: runtimeImage(runtime), // 'node:20-alpine' | 'python:3.12-alpine' | 'oven/bun:1' + Env: { + ...env, + HOME: '/workspace/home', + XDG_CONFIG_HOME: '/workspace/home/.config', + XDG_CACHE_HOME: '/workspace/home/.cache', + XDG_DATA_HOME: '/workspace/home/.local/share', + }, + HostConfig: { + Memory: memory_mb * 1024 * 1024, + MemorySwap: memory_mb * 1024 * 1024, + CpuShares: 256, + PidsLimit: 100, + ReadonlyRootfs: true, + Tmpfs: { + '/tmp': 'size=100m,mode=1777', + '/workspace/home': 'size=50m,mode=700,uid=65534,gid=65534', // writable HOME for XDG paths + }, + Binds: [ + // Per-mesh persistent workspace (opt-in via deploy config) + `/broker/data/mesh-${meshSlug}/mcp-${serverName}:/workspace/data:rw`, + ], + NetworkMode: mcpNetworkName(meshSlug, serverName), // egress-controlled network, see below + CapDrop: ['ALL'], + SecurityOpt: ['no-new-privileges', 'seccomp=default'], + ReadonlyPaths: ['/etc', '/usr'], + }, + User: '65534:65534', // explicit nobody:nogroup UID/GID + WorkingDir: '/workspace', +}; +``` + +**Security posture**: +- Read-only root filesystem with explicit `ReadonlyPaths` for `/etc`, `/usr` +- `/tmp` writable with 100 MB limit (mode 1777 for standard behavior) +- `/workspace/home` writable tmpfs with 50 MB limit — provides `$HOME`, `$XDG_CONFIG_HOME`, `$XDG_CACHE_HOME`, `$XDG_DATA_HOME` for npm/python packages that expect writable user dirs +- `/workspace/data` optional persistent bind mount per `(mesh, server_name)` — used by `filesystem` MCP and similar +- Memory cap (default 256 MB, max 1 GB) +- **Egress network isolation** (see §9.4.1 below) — not bare `bridge` mode +- All capabilities dropped +- Default seccomp profile (plus optional stricter custom profile per catalog entry) +- Runs as explicit UID `65534` (nobody), NOT `User: 'nobody'` (which depends on the base image's `/etc/passwd`) +- `no-new-privileges` prevents setuid escalation + +### 9.4.1 Network isolation for deployed MCPs + +**`NetworkMode: 'bridge'` is NOT acceptable** for sandboxed MCPs. Bridge mode gives the container full egress to the internet and to other containers on the default bridge network. + +Instead, the broker creates a **per-deployment Docker network** with no external access by default: + +```ts +// Create a network with no egress +await docker.createNetwork({ + Name: mcpNetworkName(meshSlug, serverName), + Driver: 'bridge', + Internal: true, // ← no NAT to the host/internet + IPAM: { Config: [{ Subnet: '172.28.0.0/24' }] }, + Options: { 'com.docker.network.bridge.enable_icc': 'false' }, // no inter-container comms +}); +``` + +When `network_allow` is non-empty, the broker attaches an **egress proxy** (a tiny sidecar container running `envoy` or `mitmproxy` in allowlist mode) to the MCP's network. The proxy accepts outbound connections only to hosts in `network_allow`, rejects everything else, and blocks private IP ranges by default: + +- `10.0.0.0/8`, `172.16.0.0/12`, `192.168.0.0/16` (RFC1918 private) +- `127.0.0.0/8` (loopback) +- `169.254.0.0/16` (link-local — blocks cloud metadata endpoints) +- `fc00::/7`, `fe80::/10` (IPv6 private + link-local) +- `::1/128` + +Even if `network_allow` contains a hostname that resolves to a private IP, the proxy rejects the connection at the IP layer. + +**Docker socket access is forbidden.** No MCP can mount `/var/run/docker.sock`. The `docker` MCP is explicitly excluded from the default catalog (§12). + +### 9.5 Environment variables and vault + +MCP servers often need credentials (API keys, DB passwords). These live in the **vault**, not in the deploy command: + +```ts +// Step 1: store the secret +vault_set({ key: "github_token", value: "ghp_xxx..." }); + +// Step 2: reference it in deploy +mesh_mcp_deploy({ + server_name: "github", + npx_package: "@modelcontextprotocol/server-github", + env: { + GITHUB_PERSONAL_ACCESS_TOKEN: "$vault:github_token", + }, + scope: "mesh", +}); +``` + +The broker resolves `$vault:` at container start by decrypting the value with the per-mesh wrapping key. The raw value is injected as an environment variable and never appears in logs or command history. + +### 9.6 Scope (visibility) + +Each deployed MCP has a visibility scope controlling which peers can call it: + +| Scope | Meaning | +|---|---| +| `"peer"` (default) | Only the deployer can call it | +| `"mesh"` | Every member of the mesh can call it | +| `{ group: "frontend" }` | Only members of `@frontend` group | +| `{ groups: ["frontend", "backend"] }` | Union of multiple groups | +| `{ role: "admin" }` | Members with the `admin` role | +| `{ peers: ["alice", "bob"] }` | Explicit peer allowlist | + +Change scope later with `mesh_mcp_scope(server_name, new_scope)`. + +### 9.7 Logs and observability + +Every deployed MCP server's stdout and stderr are captured by the broker: + +``` +mesh_mcp_logs("postgres-prod", lines=50) +``` + +Logs are retained for 7 days in the broker's own storage. Errors beyond that are dropped. + +### 9.8 Catalog + +``` +mesh_mcp_catalog() +``` + +Returns a list of every deployed MCP server in the mesh, with: +- Server name +- Status (`running`, `starting`, `stopped`, `crashed`) +- Scope +- Tool count +- Uptime +- Last log timestamp + +Filtered by the caller's visibility: you only see servers whose scope includes you. + +### 9.9 Cold start and lifecycle + +- **First deploy**: ~10–30 seconds (Docker image pull if not cached, container start, MCP handshake) +- **Cached deploy**: ~2–5 seconds +- **Undeploy**: ~1 second (SIGTERM with 10s grace, then SIGKILL) +- **Update**: undeploy + deploy with same config +- **Auto-restart**: if the container crashes, the broker restarts it up to 5 times in 60 seconds. Beyond that, it's marked `crashed` and requires manual `mesh_mcp_update`. + +### 9.10 Resource limits per mesh + +- **Max deployed servers per mesh**: 20 +- **Max total memory per mesh**: 4 GB +- **Max total containers on broker**: 200 (across all meshes) + +--- + +## 10. Vault (encrypted credentials) + +### 10.1 Overview + +Per-peer encrypted storage for secrets used by deployed MCP servers. Secrets are encrypted at rest with AES-GCM, keys wrapped with a per-mesh KMS key. + +### 10.2 Tools + +``` +vault_set(key, value, type?, mount_path?, description?) → store +vault_list() → list keys + metadata (no values) +vault_delete(key) → remove +``` + +### 10.3 Types + +- `type: "env"` (default) — a string, injected as an environment variable via `$vault:` +- `type: "file"` — a file, written to `mount_path` inside the deployed container. Used for TLS certs, SSH keys, JSON credential files + +### 10.4 Per-peer, per-mesh + +Each peer has their own vault entries per mesh. Peer A's `github_token` and Peer B's `github_token` are two separate values. When a deployed MCP references `$vault:github_token`, the broker looks up the secret owned by **the user who deployed the server** (the deployer's vault, not the caller's vault — tool calls from other peers execute with the deployer's credentials). + +### 10.5 `mount_path` validation for `type: file` + +When a vault entry is of `type: file`, it's written to `mount_path` inside the deployed container. The broker **validates `mount_path` before accepting the vault entry**: + +```ts +function validateMountPath(mountPath: string): void { + if (!mountPath.startsWith('/run/secrets/')) { + throw new Error('mount_path must be under /run/secrets/'); + } + if (mountPath.includes('\0')) throw new Error('null byte in mount_path'); + if (mountPath.includes('..')) throw new Error('parent reference forbidden'); + if (!/^\/run\/secrets\/[a-zA-Z0-9._-]+$/.test(mountPath)) { + throw new Error('invalid mount_path format'); + } +} +``` + +All vault files are written under `/run/secrets/` inside the container. Path traversal via `mount_path: "../../etc/passwd"` is rejected at `vault_set` time, not at container start time. + +Inside the container, `/run/secrets/` is a dedicated tmpfs mount separate from the writable `$HOME` tmpfs. Files are owned by the deployment runtime user (UID 65534), mode `0400` (read-only by owner). + +### 10.6 Security + +- AES-256-GCM for row encryption +- Per-mesh wrapping key derived from the broker's KMS +- Secrets never logged (scrubbed from broker stdout, container stdout piped through a secret-masking filter before being written to `mcp_logs`) +- Never returned by `vault_list` (only metadata: key, type, created_at, description) +- Revocation: `vault_delete` immediately breaks any deployed server using that key (the broker sends SIGTERM to affected containers) +- RBAC: only the deployer can `vault_set` for keys referenced by their own deployments. Admins can revoke but not read other peers' vault entries. + +--- + +## 11. URL watch + +### 11.1 Overview + SSRF policy + +The broker polls an HTTP URL on a schedule and notifies the requesting peer when the response changes. Useful for monitoring external status pages, build progress, PR states, etc. + +**`mesh_watch` is a server-side HTTP fetch primitive and therefore an SSRF vector.** The broker enforces a destination policy on every watch URL: + +**Rejected destinations** (checked at watch creation AND on every poll, after DNS resolution): +- Private IP ranges: `10.0.0.0/8`, `172.16.0.0/12`, `192.168.0.0/16`, `100.64.0.0/10` (CGNAT) +- Loopback: `127.0.0.0/8`, `::1/128` +- Link-local (cloud metadata): `169.254.0.0/16`, `fe80::/10` — blocks AWS/GCP/Azure IMDS endpoints +- IPv6 private: `fc00::/7` +- Broadcast: `255.255.255.255` +- Unspecified: `0.0.0.0`, `::` +- Broker's own hostname / internal container network + +**DNS rebinding protection**: the broker resolves the URL's hostname BEFORE fetching, rejects any resolution into a blocked range, and pins the IP for subsequent polls. If a subsequent poll resolves to a different IP (rebinding attempt), the watch is disabled and the creator is notified. + +**Allowed schemes**: `https://` only (no `http://`, no `file://`, no `gopher://`, no `ftp://`, no `data:`). `http://` is allowed in development mode only via `CLAUDEMESH_DEV=1` on the broker. + +**Per-watch egress limit**: max 1 MB response body per poll. Responses larger than 1 MB are truncated and hashed at 1 MB. + +### 11.2 Tools + +``` +mesh_watch({ + url: "https://status.example.com", + mode: "hash" | "json" | "status", + extract?: "data.status", // for json mode + interval: 30, // seconds, min 5 + notify_on?: "change" | "match:up" | "not_match:down", + headers?: { Authorization: "..." }, + label?: "Example status" +}) + +mesh_unwatch(watch_id) +mesh_watches() → list own watches +``` + +### 11.3 Detection modes + +- **`hash`** — SHA-256 of the response body; notify on any change +- **`json`** — extract a jsonpath from the response; notify on change at that path +- **`status`** — HTTP status code only; notify on code change + +### 11.4 Notification + +When a change is detected, the broker pushes a message to the watching peer via the normal `send_message` channel with `subtype: watch`: + +```json +{ + "subtype": "watch", + "watch_id": "abc123", + "label": "Example status", + "url": "https://status.example.com", + "old_value": "up", + "new_value": "down", + "at": 1712800000000 +} +``` + +### 11.5 Resource limits and worker pool + +- **Max watches per peer per mesh**: 10 +- **Min interval**: 5 seconds +- **Max interval**: 86400 seconds (24h) +- **Max response body for hash/json**: 1 MB +- **Broker-side total concurrent watches**: 10,000 per broker instance + +**Worker pool architecture**: 10,000 watches at 5-second intervals = 2,000 requests/second sustained. A single polling loop cannot handle this. The broker runs a **worker pool** with backpressure: + +- 50 worker goroutines (or Node.js async workers) +- Shared priority queue sorted by next-poll timestamp +- Each worker pulls the next due watch, fetches, applies change detection, emits notification, re-queues +- If the pool can't keep up, poll intervals stretch (warning logged, watch marked "degraded") + +For >10,000 concurrent watches, horizontal scaling is required: multiple broker instances with a shared queue (Redis-backed). This is a v1.1+ feature; v1.0.0 caps at 10,000 per broker. + +--- + +## 12. Default bundled MCP catalog + +### 12.1 Purpose + +Users should be able to add common infrastructure tools (GitHub, Slack, filesystem, etc.) to their mesh with a single command, without hunting for the right package name or writing a config file. The broker ships with a curated catalog of **official Anthropic MCP reference servers** pre-approved for one-command deployment. + +### 12.2 The tiered catalog + +The catalog is **tiered by risk**. Tier 1 (core) ships enabled by default. Tier 2 (extended) requires explicit opt-in by a mesh admin via `claudemesh advanced catalog enable tier=extended`. Tier 3 (dangerous) is **never** available via catalog — users who want these MCPs must deploy them via `npx_package` or `git_url` with full awareness of the risks. + +**Tier 1 — Core (default, low-risk)** + +Version-pinned, SHA256-locked, signature-verified, quarterly audited: + +| Alias | Package | Risk profile | Env vars | +|---|---|---|---| +| `git` | `@modelcontextprotocol/server-git` | local git ops, read-only by default | none | +| `memory` | `@modelcontextprotocol/server-memory` | in-container KV, no network | none | +| `sequential-thinking` | `@modelcontextprotocol/server-sequential-thinking` | no I/O, pure reasoning aid | none | +| `time` | `@modelcontextprotocol/server-time` | timezone lookup, no network | none | +| `filesystem` | `@modelcontextprotocol/server-filesystem` | scoped to `/workspace/data` per mesh | none | + +**Tier 2 — Extended (opt-in, medium-risk)** + +Require explicit admin enablement per mesh. Egress-controlled via `network_allow` to specific host lists per entry: + +| Alias | Package | Risk | Env vars | Egress allowlist | +|---|---|---|---|---| +| `github` | `@modelcontextprotocol/server-github` | API key in vault | `GITHUB_PERSONAL_ACCESS_TOKEN` | `api.github.com`, `github.com` | +| `gitlab` | `@modelcontextprotocol/server-gitlab` | API key in vault | `GITLAB_PERSONAL_ACCESS_TOKEN` | `gitlab.com`, `*.gitlab.com` (configurable) | +| `slack` | `@modelcontextprotocol/server-slack` | bot token in vault | `SLACK_BOT_TOKEN`, `SLACK_TEAM_ID` | `slack.com`, `*.slack.com` | +| `linear` | `linear-mcp` | API key in vault | `LINEAR_API_KEY` | `api.linear.app` | +| `notion` | `@notionhq/notion-mcp-server` | API key in vault | `NOTION_API_KEY` | `api.notion.com` | +| `google-maps` | `@modelcontextprotocol/server-google-maps` | API key in vault | `GOOGLE_MAPS_API_KEY` | `maps.googleapis.com` | +| `google-drive` | `@modelcontextprotocol/server-gdrive` | OAuth flow | OAuth tokens in vault | `googleapis.com`, `*.googleusercontent.com` | +| `stripe` | `@stripe/mcp` | live API key — highest risk of T2 | `STRIPE_SECRET_KEY` | `api.stripe.com` | +| `postgres` | `@modelcontextprotocol/server-postgres` | external DB connection | `POSTGRES_CONNECTION_STRING` | (user-specified host + egress proxy validates) | +| `sqlite` | `@modelcontextprotocol/server-sqlite` | scoped to `/workspace/data` | `SQLITE_PATH` | none | +| `fetch` | `@modelcontextprotocol/server-fetch` | arbitrary HTTP — SSRF vector | none | (user-specified, validated per-request by egress proxy) | +| `puppeteer` | `@modelcontextprotocol/server-puppeteer` | browser automation — can leak data | none | (user-specified) | +| `playwright` | `@playwright/mcp` | browser automation | none | (user-specified) | + +**Tier 3 — Dangerous (never in catalog)** + +| Alias | Why excluded | +|---|---| +| `docker` / any MCP requiring Docker socket access | Socket access = root on host VPS = container escape | +| Shell/exec MCPs that run arbitrary commands | No sandbox tight enough; equivalent to RCE | +| Any MCP requiring `CAP_SYS_ADMIN`, `CAP_NET_ADMIN`, or privileged mode | Escalation risk | + +Users who need tier-3 functionality deploy via `npx_package` or `git_url` and take responsibility for the security review. + +**Source of truth**: the catalog lives in `apps/broker/src/mcp-catalog.ts` and is pinned to specific versions with SHA256 lockfile entries. + +### 12.3 Catalog audit process (documented, not one-liner) + +Every catalog entry goes through a **mandatory audit checklist** before inclusion and at quarterly review: + +**On inclusion**: +1. **Provenance check** — package published by a verified Anthropic partner or a well-known vendor (e.g. Stripe, Notion, Slack) +2. **Source audit** — review the package source for: filesystem access, network hosts, env var reads, spawned processes, native dependencies +3. **Version pin** — exact version + SHA256 hash in `mcp-catalog-lockfile.json`. No `latest` tags, no version ranges. +4. **Signature verification** — if the package is signed (sigstore/cosign), verify the signature. If not, document the risk. +5. **Permission review** — document the minimum set of permissions, env vars, and network hosts required. Mismatch with catalog entry = audit fail. +6. **Risk tier assignment** — Tier 1 (zero external I/O), Tier 2 (known-host egress), Tier 3 (excluded) +7. **Approval** — two reviewers (one engineering, one security) sign off + +**Quarterly re-review** (every 3 months): +1. Check for upstream version updates, CVEs, or publisher changes +2. Re-run source audit against the new version if any updates are pending +3. Update `mcp-catalog-lockfile.json` with new pins if approved +4. Document changes in `CHANGELOG-catalog.md` + +**Compromise response** (if an upstream package is compromised): +1. Broker revokes the catalog entry immediately (pushed via broker config reload) +2. Running deployments using that catalog entry are SIGTERMed +3. Users notified via `mesh_info` and the next `claudemesh` launch +4. Post-mortem documented at `docs/incidents/` + +The audit checklist and current lockfile live in `apps/broker/src/mcp-catalog.ts` and `apps/broker/mcp-catalog-lockfile.json`. All tier-1 and tier-2 catalog entries are subject to this process. Third-party `npx_package` / `git_url` deployments bypass the catalog entirely and are the user's responsibility. + +### 12.3 One-command deployment + +``` +mesh_mcp_deploy({ + server_name: "github", + catalog: "github", // ← references the catalog alias + env: { + GITHUB_PERSONAL_ACCESS_TOKEN: "$vault:github_token" + }, + scope: "mesh" +}) +``` + +When `catalog` is set, the broker: +1. Looks up the catalog entry +2. Uses the pinned package/version, not a free-form `npx_package` +3. Validates required env vars are present (from vault) +4. Applies the catalog's default sandboxing rules +5. Deploys + +### 12.4 Custom MCP deployments still work + +The catalog doesn't replace `npx_package` / `git_url` / `file_id` deployments. It's a fast path for common cases. Custom deployments retain full control but require more careful configuration. + +### 12.5 Catalog discovery from CLI + +``` +$ claudemesh mcp catalog + +Available MCP servers (official Anthropic catalog): + + filesystem read/write files in a scoped directory + github GitHub API: issues, PRs, commits, files + git local git ops: log, diff, blame + postgres run SQL against a Postgres database + slack Slack: channels, messages, users + fetch HTTP fetch any URL + memory reference memory MCP (simple KV) + sequential-thinking structured step-by-step reasoning + time timezone-aware time queries + puppeteer browser automation + ... + +Deploy with: claudemesh mcp deploy +``` + +An advanced CLI command (not in the main 8). The actual deployment is through the normal `mesh_mcp_deploy` tool surface; this command is a convenience wrapper. + +### 12.6 Security review for catalog updates + +Before a new package is added to the catalog: +1. Source code audit (the publisher, the package, recent updates) +2. Permissions review (what env vars, what network hosts, what filesystem paths) +3. Version pinning (never `latest`, always explicit) +4. Bundled in the next broker release (no runtime catalog updates) + +**The catalog is an opinionated list, not a marketplace.** Users who want bleeding-edge or third-party MCPs use `npx_package` or `git_url` with the understanding that they're taking on the security review themselves. + +--- + +## 13. Broker deployment requirements + +### 13.1 Services the broker depends on + +| Service | Version | Purpose | Isolation | +|---|---|---|---| +| PostgreSQL | 15+ | Broker metadata + per-mesh shared SQL schemas | schema-per-mesh | +| Neo4j | 5.15+ | Per-mesh graph databases | database-per-mesh (Enterprise) or labeled (Community) | +| Qdrant | 1.7+ | Per-mesh vector collections | collection-per-mesh | +| MinIO | latest | Per-mesh object storage buckets | bucket-per-mesh | +| Docker | 24+ | MCP runtime sandboxes | container-per-deployment | +| KMS | any cloud KMS or local | Vault key wrapping | per-mesh key | + +### 13.2 Docker Compose reference + +**Default ships with Neo4j Community** (safer default, no license acceptance). Meshes that need `graph_*` tools must override to Enterprise and accept the license separately. + +```yaml +# apps/broker/docker-compose.yml (reference deployment — Community default) +services: + broker: + image: claudemesh/broker:1.0.0 + depends_on: [postgres, pgbouncer, neo4j, qdrant, minio] + environment: + POSTGRES_URL: postgresql://broker:${POSTGRES_PASSWORD}@pgbouncer:6432/broker + NEO4J_URL: bolt://neo4j:7687 + NEO4J_USER: neo4j + NEO4J_PASSWORD: ${NEO4J_PASSWORD} + NEO4J_EDITION: community # or 'enterprise' (see docker-compose.enterprise.yml) + QDRANT_URL: http://qdrant:6333 + MINIO_ENDPOINT: minio:9000 + MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY} + MINIO_SECRET_KEY: ${MINIO_SECRET_KEY} + KMS_KEY_ID: ${KMS_KEY_ID} + DOCKER_SOCKET: /var/run/docker.sock + volumes: + - /var/run/docker.sock:/var/run/docker.sock + - mcp-workspaces:/broker/data # per-mesh MCP workspaces (see §13.3) + + pgbouncer: + image: edoburu/pgbouncer:latest + depends_on: [postgres] + environment: + DB_HOST: postgres + DB_USER: broker + DB_PASSWORD: ${POSTGRES_PASSWORD} + POOL_MODE: transaction + MAX_CLIENT_CONN: 10000 + DEFAULT_POOL_SIZE: 10 + volumes: + - ./pgbouncer.ini:/etc/pgbouncer/pgbouncer.ini + + postgres: + image: postgres:15 + environment: + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + command: > + postgres + -c max_connections=200 + -c statement_timeout=30000 + volumes: + - postgres-data:/var/lib/postgresql/data + + neo4j: + # Community edition by default — graph_* tools are DISABLED for shared meshes + # To enable graph tools for shared meshes, switch to neo4j:5.15-enterprise + # and accept the Enterprise license (user responsibility): + # NEO4J_ACCEPT_LICENSE_AGREEMENT: "yes" + image: neo4j:5.15-community + environment: + NEO4J_AUTH: neo4j/${NEO4J_PASSWORD} + volumes: + - neo4j-data:/data + + qdrant: + image: qdrant/qdrant:v1.7.4 + volumes: + - qdrant-data:/qdrant/storage + + minio: + image: minio/minio:latest + command: server /data --console-address ":9001" + environment: + MINIO_ROOT_USER: ${MINIO_ACCESS_KEY} + MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY} + volumes: + - minio-data:/data + +volumes: + postgres-data: + neo4j-data: + qdrant-data: + minio-data: + mcp-workspaces: +``` + +**Enterprise Neo4j** (for meshes needing `graph_*`) lives in `apps/broker/docker-compose.enterprise.yml` as an overlay file: + +```yaml +services: + neo4j: + image: neo4j:5.15-enterprise + environment: + NEO4J_ACCEPT_LICENSE_AGREEMENT: "yes" # USER RESPONSIBILITY — Neo4j Enterprise license terms + NEO4J_EDITION: enterprise +``` + +Apply with `docker compose -f docker-compose.yml -f docker-compose.enterprise.yml up`. The user must review and accept the Neo4j Enterprise license independently — claudemesh does not bundle a license grant. + +### 13.3 Per-mesh MCP workspaces (filesystem MCP mount protocol) + +Deployed MCPs that need filesystem access (e.g. `filesystem`, `sqlite`) get a **per-mesh persistent workspace** mounted at `/workspace/data` inside the container: + +``` +Host path: /broker/data/mesh-/mcp-/ +Container path: /workspace/data +Permissions: owned by UID 65534 (container's nobody), mode 0700 +``` + +The broker creates the workspace on first deployment, validates the path (no `..`, no absolute outside `/broker/data`), and cleans it up on `mesh_mcp_undeploy`. Workspaces are **not shared** across MCP deployments — each `(mesh, server_name)` pair gets its own isolated directory. + +**Quota**: 1 GB per workspace (enforced via tmpfs size or disk quota). Larger requirements need explicit admin approval. + +Example: the `filesystem` MCP is deployed with: +```json +{ + "server_name": "fs", + "catalog": "filesystem", + "env": { "ALLOWED_DIRS": "/workspace/data" }, + "scope": "mesh" +} +``` +Peers then call `mesh_tool_call("fs", "read_file", { path: "/workspace/data/notes.md" })` and the MCP returns the file content from the per-mesh workspace. + +The `sqlite` MCP is similar — it stores the SQLite database file at `/workspace/data/mesh.sqlite` and uses the mount for persistence across container restarts. + +### 13.3 Minimum broker VPS specs + +For a small mesh (< 10 peers, < 5 deployed MCPs): + +| Resource | Minimum | Recommended | +|---|---|---| +| CPU | 2 vCPU | 4 vCPU | +| RAM | 4 GB | 8 GB | +| Disk | 20 GB | 100 GB | +| Network | 100 Mbps | 1 Gbps | + +For a large mesh (50+ peers, 20+ deployed MCPs): + +| Resource | Minimum | Recommended | +|---|---|---| +| CPU | 8 vCPU | 16 vCPU | +| RAM | 16 GB | 32 GB | +| Disk | 200 GB | 1 TB SSD | +| Network | 1 Gbps | 10 Gbps | + +### 13.4 Scaling notes + +- **Postgres**: vertical scaling first, read replicas for read-heavy meshes +- **Neo4j**: Enterprise clustering for large graphs +- **Qdrant**: horizontal scaling via sharding (collections per node) +- **MinIO**: distributed mode (4+ nodes) for high availability +- **Broker process**: single-node for v1.0.0; horizontal scaling via sticky sessions in v1.1+ + +### 13.5 Official Docker image + +`claudemesh/broker:1.0.0` — built from `apps/broker/Dockerfile`, includes: +- Broker binary (Bun-compiled) +- All CLI dependencies for the catalog MCPs (node, python, bun runtimes for sandbox containers) +- `sqlite-vec` extension for embedded use +- Default seccomp profile for container sandboxing + +### 13.6 Broker availability posture (single-node for v1.0.0) + +**v1.0.0 ships a single-node broker per mesh.** There is no high-availability failover, no load balancing across broker instances, no multi-region replication. The broker is a single point of failure for all shared-infrastructure operations (SQL, graph, vectors, MCP tool calls, URL watches). + +**Client behavior on broker outage**: +- **Local-first tools** (memory, state, tasks, personal files, recall on local vectors) continue to work from SQLite. Users experience no interruption for per-peer data operations. +- **Broker-backed tools** (mesh_query, graph_query, vector_search, mesh_tool_call, mesh_watch) return a clear error: `"Can't reach the mesh broker right now. This operation requires the shared infrastructure. Try again in a minute."` +- **The sync daemon enters reconnecting state** with exponential backoff (1s → 2s → 5s → 10s → 30s max). Outbox operations queue locally and flush on reconnect. +- **Claude Code's status line** transitions to amber/gray `◉` / `◎` to signal broker unreachable. + +**Client behavior on broker restart**: +- The broker increments its `broker_epoch` on each restart (see storage spec §5.9) +- CLIs reconnect and receive the new epoch in the first ack +- Inbox dedupe uses `(mesh_epoch, broker_seq)` so seq numbers starting from 1 after restart don't collide with prior deliveries +- Postgres connections held via PgBouncer are reset via `server_reset_query = DISCARD ALL` to clear any leaked session state + +**In-flight Postgres state on broker disconnect**: +- PgBouncer-pooled connections in transaction mode: any in-flight transaction aborts on broker crash and releases the connection back to the pool +- `SET search_path` state is cleared by `DISCARD ALL` on reset, so reused connections start clean for the next mesh +- A connection validator (`SELECT 1` on checkout) catches any connections that survived in broken state + +**Clean error surfaces on outage**: +| Operation | User-visible message | +|---|---| +| `claudemesh launch` to shared mesh | "Can't reach the mesh. Your Claude Code session will start, but broker-backed tools will be unavailable until we reconnect." | +| `mesh_execute` during outage | "Can't reach the mesh broker. Try again in a minute." | +| `claudemesh share` during outage | "Can't publish right now. claudemesh.com is unreachable. Try again in a minute." | +| Background sync daemon | Silent retry with status line dot transition. No modal. | + +**HA is a v1.1+ feature.** v1.0.0 treats the broker as a dependency similar to any other self-hosted database — run it on reliable infrastructure, monitor it, accept that outages are possible. For mission-critical deployments, document the limitation explicitly in the operator runbook. + +### 13.7 Broker observability (v1.0.0 minimum) + +- **Structured logs** to stdout in JSON format, ingestible by any log collector +- **`/health`** HTTP endpoint returning `{ status, postgres_ok, neo4j_ok, qdrant_ok, minio_ok, uptime_s, version }` +- **`/metrics`** Prometheus-format endpoint with: request counts, latencies (p50/p99), error rates by category, active connections per pool, sync daemon outbox/inbox lag, deployed MCP container count +- **Audit log** (§14.2) retained for 90 days, accessible via `claudemesh advanced audit --mesh ` + +v1.1+ will add traces, alerts, and a dashboard template. v1.0.0 ships with enough to diagnose outages via `curl /health` and log tailing. + +--- + +## 14. Security model + +### 14.1 Threat model + +| Threat | Mitigation | +|---|---| +| Cross-mesh data leak via SQL | Schema isolation + search_path enforcement + parser-level rejection of cross-schema queries | +| Cross-mesh data leak via Neo4j | Enterprise database isolation (preferred) or labeled queries (community) | +| Cross-mesh data leak via Qdrant | Collection-level naming + broker-enforced ACL | +| Cross-mesh data leak via MinIO | Bucket-per-mesh + IAM policies | +| Deployed MCP escaping sandbox | Docker with read-only root, dropped caps, seccomp, no-new-privileges | +| Vault secret leak in logs | Secrets never appear in stdout/stderr; env injection happens at container start | +| Deployed MCP making outbound network calls | Default: no network. Explicit `network_allow` required. | +| Peer calling another peer's local MCP | Tier-1 MCP calls are routed through broker with auth check | +| Malicious MCP from catalog | Catalog entries are version-pinned and audited before inclusion | +| Malicious custom MCP (`npx_package`/`git_url`) | User takes responsibility; broker enforces sandbox regardless | +| Broker compromise | Per-mesh KMS wrapping keys; root compromise still exposes ciphertext but not KMS keys without separate credentials | + +### 14.2 Audit logging + +Every shared-infrastructure operation is logged to the broker's audit log: + +```json +{ + "timestamp": 1712800000000, + "mesh_id": "alejandro-mbp", + "peer_id": "alice", + "action": "mesh_mcp_deploy", + "resource": "server_name=github", + "result": "success", + "source_ip": "1.2.3.4" +} +``` + +Logs are retained for 90 days. Accessible via `claudemesh advanced audit --mesh ` (admin-only). + +### 14.3 Rate limiting + +Limits are applied **per peer, per mesh** (not per mesh total, which would let one abusive peer starve the quota for everyone). An additional per-mesh aggregate cap applies at 10× the per-peer limit to cap total mesh load. + +| Operation | Per peer/mesh | Per mesh aggregate | +|---|---|---| +| `mesh_execute` | 100/min | 1000/min | +| `graph_execute` | 100/min | 1000/min | +| `vector_store` | 500/min | 5000/min | +| `vector_search` | 1000/min | 10000/min | +| `mesh_mcp_deploy` | 5/hour | 50/hour | +| `mesh_tool_call` | 1000/min | 10000/min | +| `mesh_watch` (create) | 10/hour | 100/hour | +| `share_file` | 100/hour | 1000/hour | + +Rate limits are enforced via token buckets in the broker, keyed by `(mesh_id, peer_id, operation)`. Excess requests return a `rate_limited` error with `retry_after_seconds` in the response. + +**Per-IP rate limit** (separate from per-peer): 2000 requests/minute per source IP to protect against anonymous abuse of unauthenticated endpoints (device-code polling, invite claim). + +--- + +## 15. Tool surface summary + +All ~30 tools from the gap analysis, organized by family, with their broker-side requirements: + +| Family | Tools | Backend | Isolation | +|---|---|---|---| +| **SQL** | `mesh_query`, `mesh_execute`, `mesh_schema` | Postgres | schema-per-mesh | +| **Graph** | `graph_query`, `graph_execute` | Neo4j | database-per-mesh | +| **Vectors** | `vector_store`, `vector_search`, `vector_delete`, `list_collections` | Qdrant | collection-per-mesh | +| **Files (large)** | `share_file`, `get_file`, `grant_file_access`, `read_peer_file`, `list_peer_files`, `list_files`, `file_status`, `delete_file` | MinIO | bucket-per-mesh | +| **MCP registry (peer-hosted)** | `mesh_mcp_register`, `mesh_mcp_list`, `mesh_mcp_remove`, `mesh_tool_call` | in-memory on broker | per-mesh registry | +| **MCP registry (broker-deployed)** | `mesh_mcp_deploy`, `mesh_mcp_undeploy`, `mesh_mcp_update`, `mesh_mcp_logs`, `mesh_mcp_scope`, `mesh_mcp_schema`, `mesh_mcp_catalog` | Docker | container-per-deployment | +| **Vault** | `vault_set`, `vault_list`, `vault_delete` | Postgres + AES-GCM | row-per-peer | +| **URL watch** | `mesh_watch`, `mesh_unwatch`, `mesh_watches` | broker scheduler | row-per-watch | +| **Mesh clock (write)** | `mesh_set_clock`, `mesh_pause_clock`, `mesh_resume_clock` | in-memory on broker | per-mesh | +| **Streams** | `create_stream`, `publish`, `subscribe`, `list_streams` | Redis / in-memory pub-sub | per-mesh | +| **Webhooks** | `create_webhook`, `list_webhooks`, `delete_webhook` | broker HTTP server | per-mesh | +| **Contexts** | `share_context`, `get_context`, `list_contexts` | Postgres | schema-per-mesh | +| **Skills** | `share_skill`, `get_skill`, `list_skills`, `remove_skill`, `mesh_skill_deploy` | Postgres + MinIO | schema-per-mesh | + +**Tools not needing shared infrastructure** (local-first, already in the storage spec): +- Memory: remember, recall, forget +- State: set_state, get_state, list_state +- Tasks: create_task, claim_task, complete_task, list_tasks +- Messaging: send_message, list_peers, check_messages, message_status +- Profile: set_summary, set_status, set_visible, set_profile +- Groups: join_group, leave_group +- Scheduling: schedule_reminder, list_scheduled, cancel_scheduled +- Mesh meta: mesh_info, mesh_stats, mesh_clock (read), ping_mesh +- Small files (< 64 KB) fallback to local blobs + +--- + +## 16. Migration from v1 + +### 16.1 What stays unchanged + +- `apps/broker/src/qdrant.ts` — port verbatim +- `apps/broker/src/minio.ts` — port verbatim +- `apps/broker/src/neo4j-client.ts` — port verbatim +- Postgres schema management logic in `apps/broker/src/broker.ts` — port verbatim +- MCP runtime sandbox logic — port verbatim +- Vault encryption logic — port verbatim +- URL watch scheduler — port verbatim +- Tool definitions in `apps/cli/src/mcp/tools.ts` — ported into `apps/cli-v2/src/mcp/tools/.ts` files + +### 16.2 What changes + +- **CLI side**: tool handlers move from `apps/cli/src/mcp/` (monolithic) to `apps/cli-v2/src/mcp/tools/.ts` (one file per family) +- **Broker client**: `apps/cli-v2/src/services/broker/ws-client.ts` is a typed wrapper with schemas for each remote tool call +- **Tool dispatch**: broker-backed tools go through `services/broker/facade.ts`, local tools go through the appropriate feature facade (per the facade pattern spec) +- **Authz enforcement**: v2 explicitly calls `services/auth/facade.ts::whoAmI()` before any shared-infrastructure operation, then the broker re-validates on its side +- **Default MCP catalog**: new file `apps/broker/src/mcp-catalog.ts` with the curated list (documented in §12) + +### 16.3 What's added + +- Curated default MCP catalog (§12) +- Explicit RBAC model with role matrix (§3) +- Structured audit logging (§14.2) +- Explicit rate limits per operation (§14.3) +- Resource limit enforcement per mesh +- v2 broker Docker Compose reference deployment (§13.2) + +### 16.4 Phase plan integration + +This spec adds **~3-4 days** to the v2 phased plan: + +| Phase | New work | +|---|---| +| Phase 4 (Mesh core) | +1 day — port broker client WS wrappers for SQL/graph/vector/files | +| Phase 5 (Sync daemon) | unchanged | +| Phase 7 (MCP server) | +2 days — implement `mcp/tools/sql.ts`, `graph.ts`, `vectors.ts`, `files.ts`, `mcp-registry.ts`, `watch.ts`, `vault.ts`, `mesh-clock.ts`; all delegating to broker facade | +| Phase 8 (Commands) | +0.5 day — add `claudemesh mcp catalog` and `claudemesh mcp deploy ` advanced commands | +| Phase 9 (Migration) | +0.5 day — document broker deployment requirements in new docs section | + +Total v2 phased plan revises from ~28-37 days to **~32-41 days** realistic, or ~11-13 days aggressive with Opus 4.6 1M. + +--- + +## 17. Open questions + +1. **Neo4j Enterprise licensing**: community edition is free but lacks multi-database. v1 silently falls back to labeled queries. Should v2.0.0 require Enterprise, or document both paths? Recommendation: document both, warn community users of the security implications. + +2. **Embedding provider for Qdrant**: the broker needs to call an embedding model. Options: (a) use the user's OpenAI key from their vault, (b) run a local sentence-transformers container, (c) require the caller to pre-compute embeddings. v1 uses option (a). Recommendation: keep (a), add (b) as a config option for air-gapped deployments. + +3. **Docker socket access**: the broker mounts `/var/run/docker.sock` to spawn MCP sandboxes. This is a significant privilege. Alternative: use `docker-in-docker` or a separate sandbox runner with a minimal API. Recommendation: stick with Docker socket for v1.0.0, add hardening notes in the security runbook. + +4. **MinIO vs S3**: should v2 default to MinIO or support S3-compatible backends generically? Recommendation: MinIO is the reference; any S3-compatible backend works via the same `minio` client library. + +5. **Per-mesh Postgres connection pooling**: 10 connections per mesh can exhaust a Postgres cluster with 1000 meshes. Should the broker use PgBouncer or a shared connection pool with search_path switching? Recommendation: shared pool with search_path switching, already implemented in v1. + +6. **Vault KMS**: v1 uses a local key file. v2 should use a cloud KMS (AWS KMS, GCP KMS, Azure Key Vault) or HashiCorp Vault in production. Local key file remains as a dev fallback. + +7. **Tier-2 MCP execution fairness**: if one mesh deploys 20 MCPs and consumes all the broker's container resources, other meshes suffer. Need per-mesh quotas and a fairness scheduler. Recommendation: document as v1.1 feature; for v1.0.0 use static per-mesh limits. + +--- + +**End of spec.** diff --git a/.artifacts/specs/2026-04-10-cli-v2-pass2-ux-design.md b/.artifacts/specs/2026-04-10-cli-v2-pass2-ux-design.md new file mode 100644 index 0000000..886663b --- /dev/null +++ b/.artifacts/specs/2026-04-10-cli-v2-pass2-ux-design.md @@ -0,0 +1,1702 @@ +# claudemesh-cli v2 Pass 2 — UX Design Reference + +> ⚠️ **This document describes v2 Pass 2 — the longer-term UX vision, NOT the Pass 1 scope.** +> +> For the v2 Pass 1 implementation target, see **`2026-04-11-cli-v2-pass1.md`**. +> +> Pass 1 preserves v1's existing CLI interactions verbatim (welcome wizard, launch flow, status prompts). The design system described here (six semantic color roles, delight beats, session_kind enum, accessibility matrix, ICU MessageFormat, trust surfaces) is the Pass 2 interactive redesign, not the Pass 1 scope. +> +> This document is retained as reference for future Pass 2 UX work. + +**Status:** Pass 2 future reference — NOT the Pass 1 implementation target +**Created:** 2026-04-10 +**Consolidated:** 2026-04-10 (35 amendments merged into body, no appendices) +**Companion to:** `2026-04-10-cli-v2-final-vision.md` (authoritative for architecture; defers to this doc for design) +**Related specs:** +- `2026-04-10-cli-v2-local-first-storage.md` — storage canonical +- `2026-04-10-cli-v2-facade-pattern.md` — boundary canonical +- `2026-04-10-cli-v2-shared-infrastructure.md` — broker-backed services canonical +**Purpose:** Single source of truth for every pixel, every string, every interaction in the v2 CLI. When implementation questions about *how* something should look, read, or feel arise, this doc wins. + +--- + +## Table of contents + +1. Design philosophy +2. The fourteen inviolable rules +3. Voice and tone +4. First-run onboarding script +5. Session kinds and output budgets +6. Microcopy catalog +7. Error patterns +8. Trust surfaces (distinct from delight) +9. Picker rules +10. Keyboard conventions +11. Progressive disclosure +12. Accessibility (testable matrix) +13. Dark/light terminal compatibility +14. Browser→terminal continuity +15. Claude Code status-line integration +16. Delight beats (four total) +17. Anti-patterns (forbidden behaviors) +18. Locked copy governance +19. Visual audit checklist +20. Copy review checklist +21. Stable JSON API versioning + +--- + +## 1. Design philosophy + +claudemesh-cli v2 is designed as if **Apple shipped a developer tool under Anthropic's brand**. The combination matters: + +- **Apple** gives us: opinionated defaults, zero configuration theater, delight in first seconds, restraint in visual language, one canonical path, and the reflex to delete every friction point. +- **Anthropic** gives us: honesty about limitations, respect for the user's competence, safety without fussiness, beautiful prose, and a voice that treats users as thoughtful adults. + +The combination gives us: **a CLI that makes a technical user feel like the tool understood them before they even typed anything.** + +### What we're NOT designing + +- Not a command-line utility in the Unix tradition (terse, assume-you-know-what-you're-doing, unhelpful on error) +- Not a "friendly" CLI in the modern TUI trend (cartoonish, over-animated, cluttered with emoji, treats users like children) +- Not a dashboard wrapped in a terminal (heavy frames, tables everywhere, screen-sized output) + +We are designing **a terminal tool that respects the medium and the user equally**. + +### The two users we're designing for + +**User A — "Fresh install Alejandro"** +- Heard about claudemesh from a tweet or a colleague +- Has Claude Code installed +- Runs `claudemesh` expecting something to happen +- Has zero patience for setup, pickers, or prompts +- Will abandon the tool within 30 seconds if they don't see value + +**User B — "Daily driver Alexis"** +- Uses claudemesh 20+ times per day +- Has muscle memory for `claudemesh` and `claudemesh peers` +- Notices every extra millisecond of overhead +- Notices every change to output format (scripts depend on it) +- Will write angry GitHub issues about regressions + +Design for both simultaneously. User A's delight must not come at User B's expense, and User B's speed must not come at User A's confusion. + +--- + +## 2. The fourteen inviolable rules + +These are the design constraints every PR, every screen, every string gets checked against. Violation = revision. + +### Rule 1 — One question, or none + +> A CLI question exists only when the machine genuinely cannot guess. Every other question is a bug. + +**Test**: for every prompt in the code, ask "could the machine have guessed this from context?" If yes, delete the prompt and use the guess. + +**Applied**: first-run creates a mesh named `` without asking. Display name is the account's real name. Role is `member`. Template is `solo`. The user types `claudemesh` and answers zero questions. + +### Rule 2 — Silence is the interface (for daily use) + +> A daily-use command produces zero user-facing output before the handoff to Claude Code. + +**Test**: does the command print anything before `exec claude`? If yes, can it be skipped? + +**Applied**: `claudemesh` on a returning machine (session kind = `daily_launch`) goes straight from bash prompt to Claude Code's TUI. No banner, no "Continue?" beat, no status line. The terminal appears to simply become Claude Code. + +Exceptions to Rule 2 are **explicit and named**: first-run, recovery mode, and silent auth refresh (which shows one status line because the user's action in the browser is required, and zero output would be deceptive). + +### Rule 3 — Picker only on genuine ambiguity + +> A picker appears only when the user has 2+ valid choices AND no `--flag` AND no cached preference AND no clipboard hint. + +**Test**: four conditions. If any is false, no picker. If all four are true, show the picker. + +**Applied**: first run with one mesh → no picker. Daily use with a cached preference → no picker. `--mesh ` flag → no picker. Clipboard has an invite → one-option picker that's really just a confirm. + +### Rule 4 — Default is the primary action + +> When a picker does appear, the first option is always the action the user most likely wants, and Enter is always the accept key. + +**Test**: what happens on one keypress of Enter? + +**Applied**: mesh picker preselects last-used by default. Confirm screen defaults to "Continue". Invite detection defaults to "Join". Never make the user arrow-down to select what they already want. + +### Rule 5 — Remember everything, announce nothing + +> Cache every decision the user makes. Never brag about remembering. + +**Test**: does the next run of the same command feel shorter than the previous one? Without the user being told anything? + +**Applied**: last-used mesh, name, role are written to `~/.claudemesh/state.json` silently. Next run uses them. No "Using your last mesh: platform-team" message. No "(last used)" annotation in pickers. The selected row is indicated by position (first) and emphasis (bold + arrow), not by a label. + +### Rule 6 — Errors are Anthropic essays, not Unix stacks + +> Every error is 1–3 sentences in full English. Line 1: what happened. Line 2 (optional): what we know about why. Line 3: what to do next. **Exactly one primary recovery action per error**; if the recovery can fail, its next step is surfaced as a chained follow-up error, not inlined. + +**Test**: print the error to a non-technical colleague. Do they understand what to do next? + +**Applied**: see §7 for the full error pattern catalog. + +**Chained recovery**: for multi-step recoveries where the first action can fail and needs a second step, the error displayed is always the *current* one-action recovery. If that action fails, the next error in the chain becomes the new one-action error. Example: + +``` +Step 1 (first error): + The local mesh store is corrupt. + Run `claudemesh doctor --repair` to attempt automatic repair. + +[user runs doctor --repair, which fails] + +Step 2 (next error, shown when repair fails): + Automatic repair failed. Your local store has unrecoverable corruption. + Run `claudemesh doctor --reset` to back up the corrupt data and initialize + a fresh store. Your shared-mesh data on the broker is unaffected. +``` + +Each error in the chain still follows the one-action rule. The user is never presented with two competing actions in a single message. This is how the rule scales to real failure modes without adding UI complexity or changing the structural contract. + +### Rule 7 — No animation unless meaningful + +> Spinners for operations under 200ms are motion noise. Loading lines for predictable ops are disrespectful. Transitions for their own sake are insulting to the reader's time. + +**Test**: would removing the animation change what the user knows or can do? + +**Applied**: no spinner on sub-200ms ops. No fake typing. No "loading mesh…" when the mesh loads in 12ms. A single `⠋` spinner appears only when an operation genuinely takes time (network I/O, device-code polling, browser round-trip). + +### Rule 8 — Six semantic color roles (five in monochrome), ten icons, one typeface + +> The visual system is deliberately constrained. Constraint is consistency, and consistency is trust. In monochrome mode (`NO_COLOR=1`), `dim` gracefully collapses into `muted`, leaving five distinguishable roles — this is an accepted degradation, not a contradiction. + +**Test**: does this screen introduce a color or icon not in the system? Reject. + +**Applied**: `ui/styles.ts` exports exactly **six semantic color roles**: + +```ts +export const Colors = { + primary: 'cyan', // default interactive / affirmative / brand + success: 'green', // confirmation of completed operations + error: 'red', // failures that block progress + warning: 'yellow', // degraded but non-blocking + muted: 'gray', // metadata, annotations, secondary info + dim: 'blackBright', // tertiary info (collapses to muted in monochrome) +} as const; +``` + +No custom hex colors. No purple brand tint. No `accent` or `title` as separate tokens. `primary` is the brand color, the selection color, and the heading color — one role, three uses. This works in any terminal theme (Solarized, Dracula, Nord, Tokyo Night, default macOS/Windows Terminal) without per-theme testing. + +Ten icons from BMP Unicode: `✔ ✘ ⚠ ▶ ▸ • ◆ █ ◉ ◎`. ASCII fallback mapping exists for old terminals (see §13.2). + +A `biome-lint-rule: no-inline-colors` catches violations at CI. A parallel `no-raw-glyphs` rule catches inline unicode. + +### Rule 9 — Typography, not decoration + +> Lines of text with alignment and whitespace. No boxes. No borders. No ASCII art. No tables unless displaying tabular data. + +**Test**: does this screen use `│`, `─`, `╔`, or similar? Reject unless rendering a structured table. + +**Applied**: status rows align by column math, not by drawing boxes. Picker menus are plain lists with a gutter. **No brand mark.** The first-run welcome uses the product name in `primary` color, the tagline in `muted`, and nothing else. See §4 for the exact first-run copy. + +### Rule 10 — One primary action per screen + +> Every screen has exactly one "recommended" action — the first option in a picker with emphasis, or the only option implied by context. Cancel via Escape/Ctrl-C is always available. Non-cancel alternative actions (e.g. "pick a different option") are allowed as secondary rows in confirm pickers only. + +**Test**: can you point at the "do this" action on every screen in under 1 second? + +**Applied**: no screen shows two actions with equal visual weight. The primary is always distinguishable by position (first row), weight (bold), color (`primary`), and the `▸` gutter arrow. Secondary alternatives in confirm pickers (e.g. "Pick a different mesh" on the launch confirm) are allowed because they're navigational alternatives, not cancels — cancel is Escape/Ctrl-C. + +**Clarification on confirm pickers**: a row labeled "Pick a different mesh" in a confirm picker is NOT a cancel — it's a navigation back to the picker. A row labeled "Cancel" or "Go back" would violate this rule. The distinction is: +- **Cancel** (abort the entire flow, exit the CLI): Escape / Ctrl-C only +- **Navigate back** (return to a previous screen in the same flow): allowed as a picker row, clearly labeled with the destination ("Pick a different mesh", "Edit name") +- **Secondary action** (do a different thing than the primary): allowed as a picker row if and only if it's an alternative way to accomplish the user's goal, not a way out + +### Rule 11 — Progressive disclosure at the filesystem level + +> The main `--help` shows 8 commands plus a "When something's wrong" section (with `doctor` and `whoami`). Advanced commands are hidden behind `help advanced`. + +**Test**: does a new user's first `--help` overwhelm them? + +**Applied**: `commands/advanced/` folder is hidden from main citty help output. `claudemesh help advanced` is the only discovery path for less common commands. Survival commands (`doctor`, `whoami`) stay visible in the main help even though they're "advanced" by nature, because hiding them would be hostile to users in broken states. + +### Rule 12 — Context-aware primary action + +> The main command `claudemesh` behaves differently based on state, but the user always sees one obvious thing happen. + +**Test**: document every state `claudemesh` (bare) handles, confirm the outcome is "obvious". + +**Applied**: +- No config → bootstrap personal mesh, drop into Claude Code +- Config + last-used mesh → drop into Claude Code in that mesh +- Config + 2+ meshes + no last-used → picker +- Config + invite in clipboard → offer join (preselected) +- Config + expired token → silent refresh (one status line), then drop in +- Config + broker down → drop in with amber connection indicator in status line + +One command, many states, always obvious. + +### Rule 13 — Honest restraint with delight + +> We do not sprinkle emojis, jokes, or personality. We include exactly **four delight beats per major version**. Trust surfaces (compliance, telemetry, audit) are a distinct category and do not count against the delight budget. + +**Test**: count the delight beats in the catalog. If greater than 4, cut one. + +**Applied**: see §16 for the full locked catalog of 4 delight beats. The first-run closing sentence is `"You're in."` — three syllables, one complete thought. The 100th-session easter egg is `"Nice to see you again."` — acknowledging the relationship, not the count. + +### Rule 14 — The return-to-terminal contract + +> After any successful action, the user's terminal is left in exactly the state they'd expect. No leftover ANSI. No hidden cursor. No alt-screen artifact. No "press enter to continue" that requires a keypress. + +**Test**: after any command, does `echo $?` work immediately? Does the cursor blink? + +**Applied**: `ui/terminal.ts::resetTerminal()` is called exactly once per session exit, inside `HandoffScreen` for the wizard path and inside `cli/exit.ts` for non-interactive paths. It's the single choke point for ANSI teardown. Non-interactive commands never boot Ink, so there's nothing to reset. + +--- + +## 3. Voice and tone + +### 3.1 The claudemesh voice + +**Informed, restrained, competent, warm-but-not-familiar.** + +- Like a senior engineer explaining something to a peer, not a bootcamp instructor explaining to a student +- Complete sentences, proper punctuation, no contraction abuse, no corporate "we" overload +- Uses "you" when addressing the user directly; uses "your" for possession +- Uses active voice almost always. Passive voice only when the actor is irrelevant ("Your token was revoked") +- Never refers to itself in third person as "claudemesh-cli" or "the tool" or "the CLI". Say "we" when a first-person voice is needed, but use it sparingly + +### 3.2 Forbidden concepts (not just words) + +The blocklist is a list of **concepts**, each with per-locale word lists. CI lints every locale file against the concept's word list for that locale. Raw word blocklists for translations are theater — a translator could commit the same sin using a different word. + +```ts +// lint/forbidden-concepts.ts +export const FORBIDDEN_CONCEPTS = { + successTheater: { + description: 'Declaring success in a way that feels like celebration rather than confirmation', + en: ['successfully', 'awesome', 'hooray', 'woohoo', 'yay'], + es: ['exitosamente', 'estupendo', 'genial', 'bravo'], + }, + fakeApology: { + description: 'Pseudo-empathetic opener that delays the actual message', + en: ['oops', 'whoops', 'unfortunately', 'sorry, but'], + es: ['vaya', 'lamentablemente', 'desafortunadamente'], + }, + patronizing: { + description: 'Telling the user how to feel', + en: ["don't worry", 'no need to panic', 'relax'], + es: ['no se preocupe', 'tranquilo', 'sin prisa'], + }, + vagueFailure: { + description: 'Error messages that hide what broke', + en: ['something went wrong', 'an error occurred', 'oops something broke'], + es: ['algo salió mal', 'ocurrió un error', 'hubo un problema'], + }, + fillerPolite: { + description: 'Filler words that pretend politeness but add noise', + en: ['please', 'kindly'], + es: ['por favor'], // context-sensitive in Spanish — softer rule + }, +}; +``` + +CI rule: every PR that touches `locales/*.ts` is linted against `FORBIDDEN_CONCEPTS` for every locale present. Violations block merge. Adding a new locale requires adding its forbidden concept entries. + +### 3.3 Required patterns + +- **Verbs first** when giving instructions: "Run `claudemesh share`" not "You should run `claudemesh share`" +- **State before suggest**: explain what happened before telling the user what to do +- **Specific over vague**: "Mesh creation failed because the slug `test` is already taken" not "Something went wrong with mesh creation" +- **One sentence per idea**: don't cram three thoughts into one compound sentence +- **Second person singular**: "your mesh", not "the user's mesh" +- **Present tense**: "We created your mesh" not "Your mesh has been created" + +### 3.4 Verbosity budget + +| Context | Max length | +|---|---| +| First-run welcome header | 2 lines | +| First-run welcome description | 1 sentence | +| Command success confirmation | 1 line | +| Error message | 3 lines | +| Onboarding closing sentence | 1 sentence, ≤ 5 words | +| `--help` command descriptions | 1 line each, under 60 chars in English | +| Status line (in Claude Code) | 1 line, under 60 chars in English | + +**Per-locale length budgets**: Spanish expands ~30% vs English. Hard-capped strings have explicit per-locale limits: + +```ts +export const budgets = { + 'help.description': { en: 60, es: 80, mode: 'hard' }, // hard cap — CI fails on exceed + 'status.line': { en: 60, es: 75, mode: 'hard' }, + 'error.message': { en: 150, es: 195, mode: 'soft' }, // soft — warning at 150% of English + 'button.label': { en: 30, es: 40, mode: 'hard' }, + 'picker.option': { en: 60, es: 80, mode: 'soft' }, +}; +``` + +**CI enforcement**: +- **Hard cap exceeded** → CI build **fails** with a clear error: `locales/es.ts: help.description "inicia una sesión en tu malla (crea una si es necesario)" exceeds 80 char limit (83 chars)` +- **Soft cap exceeded** → CI emits a **warning** but does not fail: `locales/es.ts: error.message "..." exceeds 150% of English length (195 vs 130 chars). Consider tightening.` +- **Missing translation** → CI fails: every key in `en.ts` must have a corresponding entry in `es.ts`. Fallback to English at runtime is allowed for gradual rollout but CI flags the gap. + +**ICU plural category support**: EN and ES have simple plural rules (one, other). Future locales with complex rules (Polish, Russian, Arabic) have additional categories (few, many, etc.). The ICU MessageFormat library handles these automatically, but the catalog entries must cover all categories for each supported locale. When v1.1+ adds Russian, every plural-sensitive key gains `few` and `many` categories in `ru.ts`. The English catalog is always the source of truth for keys; translated locales add whatever plural categories their grammar requires. + +--- + +## 4. First-run onboarding script + +This is the most important UX surface in the entire product. Every word is reviewed by three people before it ships. + +### 4.1 Scenario A — Fresh machine, no network + +``` +$ claudemesh + + claudemesh + Peer mesh for Claude Code sessions. + + Creating your mesh… + + ✔ Your mesh "alejandro-mbp" is ready. + + You're in. +``` + +**Elapsed time**: ~300ms (SQLite init + local mesh bootstrap). +**Words on screen**: ~16. +**Decisions required**: 0. + +After the closing sentence, the terminal transitions to Claude Code (invisible handoff via process replace). + +### 4.2 Scenario B — Fresh machine, with network + +Phase 1: initial render +``` +$ claudemesh + + claudemesh + Peer mesh for Claude Code sessions. + + Opening browser for sign-in… +``` + +Browser opens to `claudemesh.com/cli-auth?code=ABCD-EFGH`. User sees the Better Auth login screen if needed, then a single approval card: + +``` +Link this CLI session? + + Code ABCD-EFGH + Device Alejandro's MacBook Pro (darwin/arm64) + Expires in 9:47 + + [Approve] [Deny] +``` + +User clicks Approve. Browser shows: + +``` +✔ You're linked. + +Return to your terminal to continue. +``` + +Phase 2: back in the terminal +``` + ⠋ Waiting for browser confirmation… + + ✔ Signed in as Alejandro Gutiérrez. + ✔ Your mesh "alejandro-mbp" is ready. + + You're in. +``` + +**Elapsed time**: 4–8 seconds depending on browser speed. +**Words on terminal**: ~24. +**Decisions required**: 1 (click Approve). + +After the closing sentence, Claude Code takes over invisibly. + +### 4.3 The closing sentence (locked, first-run only) + +Exactly one sentence is the emotional payoff of the first run: + +> **"You're in."** + +Three syllables. One complete thought. Locked — do not change without a design review. + +This sentence was chosen because: +- "You're in" is short and declarative +- It frames the moment as arrival, not achievement +- Zero exclamation marks +- Zero emoji +- Zero "welcome" + +**Locked to first-run only** — this sentence is the first delight beat and is shown exactly once per machine (see §16). It is NOT reused for silent auth refresh, which has its own different message (see §6.4 `auth.refresh.done`). Reusing "You're in." for refresh would dilute the first-run impact. + +The longer version that used to appear here — `"Your mesh is ready for you and anyone you invite."` — was cut because it diluted the beat. The extra words turned delight into onboarding instruction, and onboarding instruction belongs on day 2, not day 1. + +### 4.4 The second-invocation hint (deferred, not first-run) + +On the **second time** a user runs `claudemesh` (the literal second invocation, not the second calendar day), if `session_count === 2`, a one-line muted-color hint appears after the handoff signal: + +> "Type `claudemesh peers` in another terminal to see who's around." + +This is onboarding spread over time. Shown exactly once per machine. Tracked in `state.milestoneShown.secondInvocationHint`. + +**"Second-invocation", not "day-2"**: the trigger is purely counter-based (`session_count === 2`). If a user runs `claudemesh` twice in the same five minutes, they see the hint on invocation #2. If they skip three weeks and then run a second time, they still see it on invocation #2. "Day-2" was a misleading earlier name — the trigger has nothing to do with the calendar. + +### 4.5 What we do NOT show on first run + +Explicitly forbidden: + +- A menu of things to do +- A tour of the features +- A list of available commands +- A request for feedback +- A "follow us on Twitter" prompt +- A "star us on GitHub" prompt +- A changelog +- A "click here to learn more" link +- The output of `claudemesh --help` +- Any ASCII art (no brand mark, no logo) +- Any "Welcome!" / "Hello!" / "Hi there!" framing + +--- + +## 5. Session kinds and output budgets + +Rule 2 says "silence is the interface" for daily use but §4 shows verbose output for first run. The distinction is machine-readable via `session_kind`: + +```ts +// ui/session-kind.ts +export enum SessionKind { + FirstRun = 'first_run', // no prior state — verbose welcome OK + Recovery = 'recovery', // post-error or post-migration — 1-line status + DailyLaunch = 'daily_launch', // normal case — silent handoff + Interactive = 'interactive', // `new`, `invite`, `list`, etc. — normal TTY + NonInteractive = 'non_interactive', // CI, pipe, --json — machine output only + Rescue = 'rescue', // `doctor`, `--help`, `whoami` — explicit diagnosis +} +``` + +### 5.1 Output budget per kind + +| Kind | Pre-handoff output | Frames rendered | Example | +|---|---|---|---| +| `first_run` | Up to 8 lines (welcome + status rows + closing) | 1 Ink frame | §4.1, §4.2 | +| `recovery` | 1 status line | 0 frames | "Your sign-in expired. Refreshing in browser…" | +| `daily_launch` | 0 lines | 0 frames | bare `claudemesh` | +| `interactive` | Flow pipeline, no budget | N frames | `claudemesh new`, `claudemesh invite` | +| `non_interactive` | Structured output only | 0 frames | `claudemesh list --json` | +| `rescue` | Full diagnostic output | 0 frames | `claudemesh doctor` | + +### 5.2 Detection logic + +In `entrypoints/cli.ts`: + +- `first_run` → no `~/.claudemesh/state.json` exists +- `recovery` → previous session ended with non-zero exit code AND cache exists +- `daily_launch` → cache exists AND no flags specifying new behavior AND `process.stdout.isTTY` AND not `-y` with missing required args +- `non_interactive` → `!process.stdout.isTTY` OR `--json` flag OR `CI` env var +- `interactive` → explicit subcommand (`new`, `invite`, `list`, etc.) +- `rescue` → explicit `doctor` / `--help` / `whoami` / `--version` + +### 5.3 Session kind is not user-visible + +Users never see "session_kind" in output. It's purely internal routing — different modes pick different flows in `ui/flows.ts` and different output renderers in `cli/output/`. + +### 5.4 Session kind is immutable post-boot + +`session_kind` is determined once in `entrypoints/cli.ts` before Ink boots, and it **does not change during runtime**. A session that starts as `interactive` (running `claudemesh new`) cannot transition to `rescue` mid-flight when a tool call fails — instead, the failure surfaces as an **overlay** within the current session kind. + +If an interactive command hits a corrupt-database error that requires `doctor`, the overlay says "Local store is corrupt. Exit and run `claudemesh doctor` to repair." The user exits, re-runs with `doctor`, and the new process starts with `session_kind = rescue`. There is no runtime re-classification path. + +**Rationale**: re-classifying mid-flight would require tearing down Ink and re-bootstrapping, which fights Rule 14 (return-to-terminal contract) and Rule 2 (silence). A clean exit + re-exec is simpler and more predictable. + +**Implementation note**: overlays can still push runtime errors to the user without changing the session kind. The overlay stack (see §9) is the mechanism for runtime interruptions within a fixed session kind. + +--- + +## 6. Microcopy catalog + +Every user-visible string in v2. Centralized in `locales/en.ts` (and per-locale translations). Uses **ICU MessageFormat** for all pluralization and locale-sensitive grammar. + +### 6.1 ICU MessageFormat is mandatory + +Flat key-value catalogs break for Spanish and any language with plural/gender agreement. Every string with count-sensitive grammar uses ICU syntax: + +```ts +// locales/en.ts +export const en = { + whoami: { + meshCount: '{owned, plural, =0 {no meshes owned} one {# mesh owned} other {# meshes owned}}, {guest, plural, =0 {no guest meshes} one {# as guest} other {# as guest}}', + }, + broker: { + reconnected: 'Reconnected.', // plain — peer count lives in status line, not message + }, + peers: { + empty: "No one else is here yet. Invite teammates with `claudemesh invite`.", + }, +}; + +// locales/es.ts +export const es = { + whoami: { + meshCount: '{owned, plural, =0 {sin mallas propias} one {# malla propia} other {# mallas propias}}, {guest, plural, =0 {ninguna como invitado} one {# como invitado} other {# como invitado}}', + }, + broker: { + reconnected: 'Reconectado.', + }, +}; +``` + +Library: `@formatjs/intl-messageformat` or equivalent lightweight ICU implementation. + +### 6.2 First-run keys + +| Key | String | +|---|---| +| `firstRun.brandLine` | `claudemesh` | +| `firstRun.tagline` | `Peer mesh for Claude Code sessions.` | +| `firstRun.creating` | `Creating your mesh…` | +| `firstRun.openingBrowser` | `Opening browser for sign-in…` | +| `firstRun.waitingBrowser` | `Waiting for browser confirmation…` | +| `firstRun.signedIn` | `✔ Signed in as {name}.` | +| `firstRun.meshReady` | `✔ Your mesh "{slug}" is ready.` | +| `firstRun.closing` | `You're in.` | + +### 6.3 Day-2 hint + +| Key | String | +|---|---| +| `dayTwo.peersHint` | `Type \`claudemesh peers\` in another terminal to see who's around.` | + +### 6.4 Authentication + +| Key | String | +|---|---| +| `auth.deviceCode.manual` | `If your browser didn't open, visit:\n {url}` | +| `auth.deviceCode.timedOut` | `Sign-in timed out. Run \`claudemesh\` to try again.` | +| `auth.deviceCode.denied` | `Sign-in canceled. Run \`claudemesh\` to try again.` | +| `auth.deviceCode.networkError` | `Can't reach claudemesh.com. Check your connection and try again.` | +| `auth.token.saved` | `Token saved to {path}.` | +| `auth.token.invalid` | `That doesn't look like a claudemesh token. Expected something starting with \`cm_\`.` | +| `auth.token.rejected` | `Token rejected by the server. It may have been revoked or it's from a different environment.` | +| `auth.refresh.silent` | `Your sign-in expired. Refreshing in browser…` | +| `auth.refresh.done` | `✔ Done.` | +| `auth.logout.success` | `Logged out. Removed {path}.` | +| `auth.logout.serverFailed` | `Logged out locally. The server revocation failed — the token is still valid on the server. Revoke it manually at {url}.` | + +### 6.5 Mesh operations + +| Key | String | +|---|---| +| `mesh.bootstrap.success` | `Your mesh "{slug}" is ready.` | +| `mesh.create.prompt` | `Name?` | +| `mesh.create.success` | `Created "{slug}".` | +| `mesh.create.joined` | `You're in.` | +| `mesh.create.slugCollision` | `A mesh called "{slug}" already exists. Try "{suggestion}" instead.` | +| `mesh.publish.confirm` | `Your personal mesh is local-only. Publish it to claudemesh.com?` | +| `mesh.publish.needsAuth` | `We'll sign you in first if you haven't already.` | +| `mesh.publish.success` | `Published as "{slug}".` | +| `mesh.publish.inviteCopied` | `Invite URL copied to clipboard:\n {url}` | +| `mesh.join.success` | `Joined "{slug}".` | +| `mesh.join.fromClipboard` | `✔ Joined "{slug}" from the dashboard link.` | +| `mesh.leave.confirm` | `Leave "{slug}"? You won't lose your local data.` | +| `mesh.leave.success` | `Left "{slug}".` | +| `mesh.rename.success` | `Renamed to "{newSlug}".` | +| `mesh.rename.permissionDenied` | `Only the mesh owner can rename it.` | + +### 6.6 Invites + +| Key | String | +|---|---| +| `invite.generate.success` | `Invite URL copied to clipboard:\n {url}\n\nShare the link with anyone. Expires in {duration}.` | +| `invite.email.sent` | `✔ Sent to {email}.` (utility confirmation) | +| `invite.email.sentFirst` | `Sent. They'll see it when they check their inbox.` (first-time only — delight beat #3) | +| `invite.email.alsoCopied` | `✔ Also copied to clipboard.` | +| `invite.clipboard.detected` | `Detected invite in clipboard.` | +| `invite.expired` | `That invite expired on {date}. Ask whoever sent it for a new one.` | +| `invite.malformed` | `That doesn't look like a claudemesh invite. Expected:\n https://claudemesh.com/i/` | +| `invite.alreadyMember` | `You're already in "{slug}". Running launch instead.` | + +### 6.7 Broker state + +| Key | String | +|---|---| +| `broker.disconnected` | `Connection lost. Reconnecting in {seconds}s…` | +| `broker.reconnected` | `Reconnected.` | +| `broker.unreachable` | `Can't reach the mesh right now. Your Claude Code session is still running. Messages will queue until the connection returns.` | + +### 6.8 List / peers / whoami (non-interactive renderers) + +| Key | String | +|---|---| +| `list.empty` | `You're not in any meshes yet. Run \`claudemesh new\` to create one.` | +| `peers.empty` | `No one else is here yet. Invite teammates with \`claudemesh invite\`.` | +| `whoami.notLoggedIn` | `Not signed in. Run \`claudemesh login\` when you're ready to share a mesh.` | +| `whoami.signedIn` | `Signed in as {name} ({email})` | +| `whoami.tokenSource` | `Token source: {source}` | +| `whoami.meshCount` | `Meshes: {owned, plural, one {# owned} other {# owned}}, {guest, plural, one {# as guest} other {# as guest}}` | + +### 6.9 Typo recovery + +Typo recovery prompts are a **distinct exception class** from errors (§7). They're 1-line "did you mean?" interactive prompts, not 3-sentence essays. + +| Key | String | +|---|---| +| `typo.meshSuggestion` | `No mesh called "{attempt}". Did you mean "{suggestion}"?` | +| `typo.commandSuggestion` | `Unknown command "{attempt}". Did you mean "{suggestion}"?` | +| `typo.noSuggestion` | `Unknown command "{attempt}". Run \`claudemesh --help\` to see all commands.` | + +### 6.10 Clipboard handoff (dashboard → terminal) + +When a dashboard "Launch in CLI" button changes local state (joins a mesh), a one-line confirmation is shown before launch. This is an exception to Rule 2 because disk state changed — silence would be deceptive. + +| Key | String | +|---|---| +| `clipboard.joinSuccess` | `✔ Joined "{slug}" from the dashboard link.` | +| `clipboard.alreadyMember` | `Already in "{slug}". Launching…` | + +### 6.11 Day-2 hint and 100th session + +| Key | String | +|---|---| +| `milestone.hundredth` | `Nice to see you again.` | + +--- + +## 7. Error patterns (full taxonomy) + +### 7.1 Error structure + +Every error message has three parts: + +``` +{ACTIVE VOICE: WHAT HAPPENED} + +{OPTIONAL: WHAT WE KNOW ABOUT WHY} + +{EXACTLY ONE ACTION TO TAKE} +``` + +**Exactly one primary action.** If an error has two verbs competing for the user's attention (e.g. "try again" + "check the status page"), one must be demoted to `claudemesh doctor` output or a documentation link. + +### 7.2 Network errors + +**Can't reach claudemesh.com** +``` + Can't reach claudemesh.com right now. + + The broker may be down or there's a network issue. Check your + connection and try again in a minute. +``` + +**Can't reach the broker during a session** +``` + Lost connection to the mesh. Your Claude Code session is still + running — messages will queue until we reconnect. + + Retrying in 3s… +``` +(Shown as a status-line transition, auto-dismissed on reconnect.) + +**Dashboard up but broker down** +``` + The dashboard is reachable but the mesh broker isn't. + This usually means a broker restart is in progress. + + Retrying in 10s… +``` + +**Timeout during first-run device code** +``` + Sign-in timed out. Run `claudemesh` to try again. +``` + +### 7.3 Authentication errors + +**Token expired (interactive)** +``` + Your sign-in expired. Refreshing in browser… +``` +(Recovery is silent; the user doesn't need to do anything.) + +**Token revoked (non-interactive / PAT)** +``` + Your access token was revoked. Generate a new one at + claudemesh.com/dashboard/settings/cli-tokens and run: + + claudemesh login --token +``` + +**Malformed token** +``` + That doesn't look like a claudemesh token. Expected something + starting with `cm_`. +``` + +**Token from wrong environment** +``` + That token is for a different claudemesh environment. Use a + token from claudemesh.com. +``` + +### 7.4 Mesh errors + +**Slug collision on create** +``` + A mesh called "platform-team" already exists in your account. + Try a different name. +``` + +**Slug not found on launch** (recovery prompt, not error — see §7.8) + +**Not a member** +``` + You're not a member of "platform-team" (or it doesn't exist). + To join, get an invite from someone who is. +``` + +**Not the owner (rename/archive)** +``` + Only the owner of "platform-team" can {action} it. Ask whoever + created the mesh. +``` + +### 7.5 Invite errors + +**Expired** +``` + That invite expired on Apr 7. Ask whoever sent it for a new one. +``` + +**Malformed URL** +``` + That doesn't look like a claudemesh invite. Expected: + + https://claudemesh.com/i/ +``` + +**Invalid code** +``` + This invite is no longer valid. It may have been revoked. + Ask whoever sent it for a new one. +``` + +**Uses exhausted** +``` + This invite has reached its usage limit. Ask whoever sent it + for a new one. +``` + +### 7.6 `claudemesh ` error matrix + +Positional URL routing handles every edge case: + +| Input | Behavior | +|---|---| +| Valid invite, not yet joined | Join flow, then launch | +| Valid invite, already a member | Recovery prompt (§7.8): "You're already in '{slug}'. Launch it instead?" | +| Valid invite, expired | Error: `invite.expired` | +| Valid URL format, code doesn't exist | Error: "This invite is no longer valid." | +| Valid URL format, different env | Error: "That invite is for a different claudemesh environment." | +| Malformed URL | Error: `invite.malformed` | +| URL without `/i/` path | Recovery prompt: "That looks like a claudemesh URL but not an invite. Did you mean the dashboard?" | +| URL for a different domain | Error: "That's not a claudemesh URL." | + +### 7.7 Environment errors + +**No Claude Code installed** +``` + Claude Code isn't installed on this machine. + + Install it from https://claude.ai/code and run `claudemesh` + again. +``` + +**Permission denied on ~/.claudemesh/** +``` + Can't write to ~/.claudemesh/ — check the directory's + permissions. It should be owned by you and mode 700. + + To fix: + chmod 700 ~/.claudemesh +``` + +**Disk full** +``` + Can't write to ~/.claudemesh/data.db — disk is full. Free some + space and try again. +``` + +**Corrupt SQLite** +``` + The local mesh store is corrupt. This is rare and usually + recoverable. Run: + + claudemesh doctor --repair +``` + +### 7.8 Recovery prompts (distinct exception class) + +Typo recovery and similar interactive recovery prompts are NOT subject to the 3-sentence error structure. They're 1-line "did you mean?" questions that immediately offer a picker: + +``` + No mesh called "plataform-team". Did you mean "platform-team"? +▸ Yes, use "platform-team" + No, cancel +``` + +Rules for recovery prompts: +- One line of prompt text +- A picker with 2–3 options +- First option is the recommended action +- No "why" explanation — the mismatch is self-explanatory +- Triggered by levenshtein distance ≤ 2 for typo cases, or by clear user intent mismatches (e.g. URL that looks like a dashboard URL, not an invite URL) + +### 7.9 CLI usage errors + +**Missing required flag in non-interactive mode** +``` + Missing --mesh (required with -y when you're in 2+ meshes). + Available meshes: platform-team, alejandro-mbp, claudefarm +``` + +**Unknown command** (recovery prompt, not error) +``` + Unknown command "lanch". Did you mean "launch"? +``` + +**Conflicting flags** +``` + --mesh and --new can't be used together. Pick one. +``` + +### 7.10 MCP server errors + +**Can't start stdio server** +``` + The MCP server failed to start: {reason} + + Run `claudemesh doctor` to diagnose. +``` + +**Tool call failed** (returned to Claude Code via MCP protocol) +``` + {tool_name} failed: {reason} +``` + +These errors reach Claude Code's TUI via the MCP protocol, not the CLI directly. + +--- + +## 8. Trust surfaces (distinct from delight) + +Delight and trust are different UX categories. Delight is emotional payoff; trust is compliance, disclosure, and user control. Mixing them is cynical. v2 treats them as distinct surface categories with different voices. + +### 8.1 The category + +Trust surfaces are neutral-informational, never warm. They use: +- A leading `~` marker to mark them as system notices (distinct from product messages) +- Muted color +- Single-line format +- No decorative elements + +**The `~` marker convention is documented** in two places: +1. `claudemesh help conventions` — a short advanced help topic explaining every visual convention (`✔` for success, `✘` for error, `▸` for picker selection, `~` for trust surfaces, `◉` for connection status) +2. The first trust surface a user ever sees (the telemetry disclosure on first run) includes a brief gloss: `~ claudemesh collects anonymized usage data. (System notices start with "~" — run \`claudemesh help conventions\` to learn more.)` — shown only on the first occurrence, not every time. + +Users who want to dig deeper can run the help command; users who ignore it still understand the notice because the text is self-explanatory. The `~` is not load-bearing semantically — removing it wouldn't break comprehension, it just signals category. + +### 8.2 Trust surface catalog + +**First-run telemetry disclosure** (shown exactly once, after the handoff transition): + +``` +~ claudemesh collects anonymized usage data. Run `claudemesh advanced telemetry off` to disable. +``` + +**Audit log access**: + +``` +~ Showing audit events from the last 30 days. Older events are in ~/.claudemesh/logs/. +``` + +**Data deletion confirmation**: + +``` +~ Local data deleted. Server-side data remains until you log out. +``` + +### 8.3 Rules + +- Trust surfaces do NOT count against the delight beat budget (§16) +- They have their own voice: neutral, factual, never cheerful, never apologetic +- They're never blocked — the user doesn't need to acknowledge to proceed +- They scroll by once and are marked "shown" in `~/.claudemesh/state.json` +- The `~` marker is a system-notice convention, used only in this category + +--- + +## 9. Picker rules + +### 9.1 When a picker shows + +A picker MUST show when ALL of the following are true: +1. The user has 2+ valid choices +2. No CLI flag specifies the choice +3. No cached preference exists (see §9.4 for cache invalidation) +4. The user is in an interactive (TTY) context +5. `-y` was not passed +6. No clipboard hint (e.g. invite URL) implies a default + +A picker MUST NOT show when any of those is false. + +### 9.2 Picker visual structure + +``` + {optional question on one line} +▸ {first option — bold, primary color} + {second option} + {third option} +``` + +- No header like "Choose one:" unless the context isn't obvious +- No separators between options +- No "Cancel" as a menu item (Escape/Ctrl-C handles cancel) +- First option is always the recommended default, rendered in **bold + `primary` color + gutter arrow `▸`** +- Non-selected rows use default weight in `dim` color + +### 9.3 Selection indication uses three signals + +Per accessibility rule (§12.1): selection is indicated by icon, text weight, AND **position-as-rendered** (i.e. where the gutter arrow currently sits, not where in the list). + +1. **Icon**: `▸` in the gutter (or `>` in monochrome mode) — moves with the selection as the user navigates +2. **Text**: bold weight for the selected row +3. **Position-as-rendered**: the selected row has the gutter arrow in its leftmost column; non-selected rows have two spaces. "Position" here means "the row where the arrow is currently drawn" — not "first row in the list". If the user arrow-downs to row 3, row 3 becomes the "position-signaled" row. + +In color mode, `primary` color is added as a fourth signal. At least two signals are legible in any a11y configuration. + +**Clarification**: earlier drafts said "first row" which was ambiguous. The rule is "the row currently rendered with the gutter arrow" — which starts as the first row by default (preselection) but moves as the user navigates. + +### 9.4 Cache invalidation rules + +The cache `state.lastUsedMesh` is considered stale when ANY of the following is true: +1. The referenced mesh no longer exists in local state (user ran `claudemesh leave`) +2. The referenced mesh's broker URL is unreachable AND the mesh is shared (not personal) — fall through to picker +3. The cache was written by a different CLI major version +4. The user explicitly ran `claudemesh advanced state clear-last-used` +5. The cache is older than 30 days + +**Behavior on stale cache**: clear the stale entry, fall through to normal picker logic. Never silently use a stale value. + +**Auto-invalidation triggers**: +- `claudemesh leave ` where slug matches → clear entry +- `claudemesh logout` → clear all cache +- `claudemesh advanced migrate` → clear cache to force fresh selection +- Server-side mesh deletion detected on next connect → clear entry + +### 9.5 The mesh picker + +``` + Which mesh? +▸ alejandro-mbp + platform-team · 7 peers + claudefarm · 12 peers +``` + +- Last-used is preselected by position (first row) + emphasis — **no "(last used)" annotation** +- Shared meshes show peer count in `muted` color after `·` +- Personal mesh shows no annotation (it's yours, count is 1) + +### 9.6 The confirm picker + +``` + Continue to "alejandro-mbp"? +▸ Yes, launch + Pick a different mesh +``` + +Only two options. First is the recommended action. No "Cancel" — Escape cancels. + +### 9.7 The invite-detected picker + +``` + Detected invite in clipboard. +▸ Join "platform-team" + Continue to "alejandro-mbp" +``` + +Always two options: the detected invite OR the last-used mesh. Detected invite wins preselection (fresh user intent trumps cached preference). + +### 9.8 First-letter jumping with cycling + +Pickers support first-letter jumping: press `p` to jump to the first option starting with P. If multiple options start with the same letter, subsequent presses cycle through matches. Resets after 1 second of inactivity or when a different letter is pressed. + +### 9.9 Maximum visible options + +If a picker has >7 options, it shows 7 with arrow indicators `⌃` / `⌄` at top/bottom. The list scrolls as the user navigates. No pagination dialog. No numbered selection. + +--- + +## 10. Keyboard conventions + +| Key | Action | Notes | +|---|---|---| +| `↑` / `↓` | Navigate picker | Wraps at ends | +| `←` / `→` | (unused in v1.0.0) | Reserved for future multi-column pickers | +| `Enter` / `Return` | Accept current selection | Always | +| `Escape` | Cancel / go back | Exits to previous screen, or exits CLI at root | +| `Ctrl-C` | Exit immediately | Skips confirmation, resets terminal | +| `Ctrl-D` | Exit immediately | Alias for Ctrl-C | +| `Tab` | No-op (explicit) | Reserved for future autocomplete; currently does nothing (no bell, no hint) | +| `?` | Show keybindings overlay | On any interactive screen | +| `q` | Quit (list screens only) | See §10.2 | +| `/` | Filter (long lists only) | Only on screens with `filterable: true` | +| `[a-z]` | First-letter jump | Pickers only; cycles on collision (§9.8) | + +### 10.1 No hidden shortcuts + +Every keyboard shortcut is either: +- Listed in the `?` overlay +- A universal convention (Ctrl-C, arrows, Enter, Escape) + +No easter eggs. No hidden dev shortcuts. No "press 5 to skip". + +### 10.2 `q` quit key scope + +`q` quits only on "list screens" — screens whose primary purpose is displaying a list (`peers`, `list`, `doctor` results). The screen's component declares `quitKey: 'q'` in its props; the global keymap checks this flag before binding `q`. On non-list screens (pickers, text inputs, flows), `q` is forwarded as a literal keystroke (used for first-letter jump in pickers). + +### 10.3 The `?` keybindings overlay + +``` + Keyboard + + ↑ ↓ Navigate + Enter Accept + Escape Cancel / back + Ctrl-C Exit + a-z Jump to option by first letter + ? Show this overlay + + Press any key to dismiss. +``` + +Brief. Fits in 8 lines. Dismisses on any keypress. Accessible from every interactive screen. + +--- + +## 11. Progressive disclosure + +### 11.1 Four levels of help + +``` +claudemesh --help # 8 primary commands + "When something's wrong" section +claudemesh --help # per-command flags + examples +claudemesh help advanced # advanced + internal commands +claudemesh help all # complete, stable, grep-able dump +``` + +### 11.2 The main `--help` output + +``` +$ claudemesh --help + +claudemesh — peer mesh for Claude Code sessions +v1.0.0 + +USAGE + claudemesh start a session in your mesh (creates one if needed) + claudemesh join a mesh from an invite link + claudemesh new create a new mesh + claudemesh invite [email] generate an invite (copies to clipboard) + claudemesh list see your meshes + claudemesh rename rename the current mesh + claudemesh leave [mesh] leave a mesh + claudemesh peers see who's in the current mesh + +When something's wrong + claudemesh doctor diagnose install/config/connection issues + claudemesh whoami show current identity + +More: claudemesh help advanced +``` + +Exactly 8 primary verbs in the USAGE section. The "When something's wrong" section surfaces `doctor` and `whoami` so users in broken states can find them without drilling into advanced help. + +The main command description `start a session in your mesh (creates one if needed)` is true in every state — fresh install, daily use, or recovery. + +### 11.3 The advanced help output + +``` +$ claudemesh help advanced + +claudemesh advanced + + login re-authenticate (usually automatic) + logout revoke session and clear local credentials + share publish personal mesh as shared + publish alias for share + install register MCP server with Claude Code + uninstall remove MCP server registration + migrate run config/data migrations manually + connect link external bridges (telegram, etc.) + disconnect unlink external bridges + telemetry on|off manage telemetry opt-in + mcp catalog browse default MCP catalog + mcp deploy deploy an MCP from the catalog + +Internal (for Claude Code and scripts): + + mcp start MCP server on stdio + hook handle Claude Code hook events + seed-test-mesh developer tool + +Full reference: claudemesh help all +``` + +### 11.4 The full reference + +`claudemesh help all` prints a complete, stable, grep-able dump of every command and every flag. This is what power users and script-writers read. It's longer than the main help and it's OK for it to be — that's why it's hidden. + +### 11.5 Per-command help + +``` +$ claudemesh invite --help + +claudemesh invite — generate an invite URL + +USAGE + claudemesh invite [email] + +OPTIONS + --mesh mesh to invite to (default: current) + --expires expiry duration (default: 7d) + --uses max uses (default: unlimited) + --role role for the invitee (default: member) + --json machine-readable output + +EXAMPLES + claudemesh invite + claudemesh invite alice@example.com + claudemesh invite --mesh platform-team --expires 30d +``` + +Three sections: usage, options, examples. Examples are not optional — every command has at least one. + +--- + +## 12. Accessibility (testable matrix) + +Accessibility is specified as a testable matrix, not principles. Every state has three cues; at least two must be legible in any a11y configuration. + +### 12.1 Token-signal matrix + +| State | Icon cue | Text cue | Position cue | VoiceOver announcement pattern | +|---|---|---|---|---| +| Picker row selected | `▸` in gutter | Bold weight | First row in visible range | `"{label}, selected, {index} of {total}"` | +| Picker row unselected | ` ` (two spaces) | Default weight | Not first | `"{label}, {index} of {total}"` | +| Success confirmation | `✔` | "Done" / "Ready" / "Sent" | After action | `"{label}, completed"` | +| Error | `✘` | Error message | On error surface | `"Error: {message}. {action}"` | +| Warning | `⚠` | Warning message | On warning surface | `"Warning: {message}"` | +| In-progress | `⠋` | Progress text | Same line | `"Working: {label}"` | +| Connected | `◉` | Mesh name | Status position | `"Connected to {mesh}. {peer_count} peers."` | +| Disconnected | `◎` | Mesh name | Status position | `"Disconnected from {mesh}. Reconnecting in {seconds} seconds."` | + +Every screen is tested against this matrix. CI runs an `ink-render` smoke test asserting the **announcement string** for each screen matches the expected pattern. + +**Ink does not ship with native VoiceOver integration.** The "VoiceOver announcement pattern" column describes a *contract*: the screen must render an announcement string that a screen-reader can read. The delivery mechanism is a CLI-owned shim at `ui/accessibility/announce.ts` that: + +- On macOS: writes the announcement to a hidden Ink `` element that VoiceOver picks up through standard terminal accessibility APIs (VoiceOver reads terminal content line-by-line; the hidden text becomes part of the reading stream) +- On Linux with `orca`: writes the announcement via `brltty`/AT-SPI bridge if available, else falls back to plain terminal text +- On Windows with NVDA: writes the announcement via a hidden Ink element that NVDA's terminal reader picks up +- When no screen reader is detected: no-op (the visible UI is already sufficient for sighted users) + +**v1.0.0 delivery**: the shim ships as a thin Ink component that renders an announcement string to the terminal in a form screen-readers can consume. It is a **best-effort implementation**, not a full a11y platform. True native VoiceOver integration (via NSAccessibility APIs, Windows UI Automation, etc.) is v1.1+ work. + +The matrix is therefore an **implementation contract for the announcement strings**, not a promise that every platform delivers perfect screen-reader output. Platforms where the shim is weak are documented in `docs/accessibility.md` with workarounds. + +### 12.2 Monochrome (NO_COLOR=1) rendering + +| Role | Monochrome rendering | +|---|---| +| `primary` (emphasis) | Bold weight | +| `success` | Bold weight + `✔` prefix | +| `error` | Bold weight + `✘` prefix | +| `warning` | Bold weight + `⚠` prefix | +| `muted` | Default weight | +| `dim` | Default weight (collapses into `muted` in monochrome) | + +In monochrome, `dim` collapses into `muted`. Accepted tradeoff — without color, one level of tertiary distinction is lost, but no critical state becomes illegible. + +Monochrome picker example: +``` + Which mesh? +> alejandro-mbp + platform-team · 7 peers + claudefarm · 12 peers +``` + +`▸` becomes `>` in monochrome mode. Selected row is bold. + +### 12.3 Contrast targets + +For terminals with theme support (Solarized, Dracula, Nord, Tokyo Night, default macOS, default Windows Terminal), the CLI is tested on each: + +- `primary` on default background: ≥ 4.5:1 (WCAG AA) +- `error` on default background: ≥ 7:1 (WCAG AAA — errors must never be subtle) +- `success` on default background: ≥ 4.5:1 +- `muted` on default background: ≥ 3:1 + +Contrast is measured using the terminal's reported theme via OSC 10/11 escape sequences when available; defaults are used otherwise. A CI test renders each token against each theme's background and computes the contrast ratio. + +### 12.4 Focus order + +Interactive screens declare a tab order (even though Tab is a no-op in v1.0.0). The order is used for screen-reader navigation via arrow keys: +- Top to bottom +- Left to right within a row +- Picker items navigable with arrow keys +- No focus trap across the alt-screen boundary + +### 12.5 Terminal width compatibility + +- **Minimum supported width**: 60 columns. Below that, reflow rules apply (§12.6). +- **Below 40 columns**: CLI refuses to render interactive screens. Suggests running in a wider terminal or with `--json`. +- **60–100 columns**: normal rendering +- **Above 100 columns**: content is NOT stretched; caps at 100 columns for readability +- **Above 120 columns**: right-aligned annotations (like "7 peers") appear in the same row; below 120 they move to a new line + +### 12.6 Sub-60-column reflow rules + +1. **Status rows**: split label and value onto separate lines: + ``` + Account + ✔ Alejandro + Mesh + ✔ alejandro-mbp + ``` +2. **Pickers**: unchanged — already single-column +3. **List commands**: drop all right-annotations +4. **Help output**: truncate command descriptions at `width - 4`, append `…` +5. **Error messages**: reflow at the actual width instead of hard 60-col default +6. **Status-line integration**: compress to the most compact form (§15.3) + +### 12.7 Font compatibility + +All Unicode characters used (`✔ ✘ ⚠ ▸ • ◆ █ ◉ ◎`) are in the BMP and supported by every modern terminal font. No emoji (private-use area). No Powerline characters. No Nerd Font characters. + +**ASCII fallback detection**: at startup, the CLI checks `TERM` env var against a known-good list (xterm-256color, xterm-color, alacritty, iterm, kitty, tmux-256color). If not in list OR `CLAUDEMESH_NO_UNICODE=1`, ASCII fallback is used: + +| Unicode | ASCII fallback | +|---|---| +| `✔` | `[OK]` | +| `✘` | `[X]` | +| `⚠` | `[!]` | +| `▸` | `>` | +| `⠋` | `*` (static) | +| `◉` | `(*)` | +| `◎` | `( )` | + +### 12.8 Locale support + +- `CLAUDEMESH_LOCALE=` switches the CLI locale +- Fallback: `en` if the locale isn't supported +- Detection: `LANG` env var on first run, stored in config +- Strings live in `locales/.ts` +- v1.0.0 ships with `en` and `es` +- Date/time/number formatting respects the locale via ICU + +### 12.9 Timezone + +Timestamps are shown in the user's local timezone. ISO format for machine output (`--json`), human format for interactive display: + +- Machine: `2026-04-10T21:50:00Z` +- Human: `Apr 10 at 9:50 PM` (local) + +--- + +## 13. Dark/light terminal compatibility + +### 13.1 Approved palette + +Only colors that pass contrast in both dark and light themes: + +- `primary` (cyan) — safe on both, brand color, selection color, heading color +- `success` (green) — safe on both +- `error` (red) — safe on both +- `warning` (yellow) — visible on both (use sparingly on light) +- `muted` (gray) — blackBright terminal value, works on both +- `dim` — reduces contrast for tertiary text + +### 13.2 Forbidden colors + +- Pure white (#FFFFFF) — invisible on light +- Pure black (#000000) — invisible on dark +- Low-saturation pastels — invisible on both +- **Custom hex colors beyond the six semantic roles** + +The purple brand tint (`#7C3AED`) that appeared in earlier drafts is retired. The dashboard and marketing site keep the purple; the terminal does not. + +### 13.3 Test matrix + +Every PR with visual changes is tested on: +- macOS Terminal (default light, default dark) +- iTerm2 (Solarized Dark, Solarized Light) +- Alacritty (default) +- Windows Terminal (default) +- VS Code integrated terminal + +If it's illegible on any of those, it doesn't ship. + +--- + +## 14. Browser→terminal continuity + +The missing feature in every CLI tool. v1.0.0 ships the clipboard handoff path; v1.1+ may add deep linking. + +### 14.1 Clipboard handoff (v1.0.0) + +Dashboard has a "Launch in CLI" button per mesh. Clicking it: +1. Generates a one-time handoff token server-side (60-second TTL) +2. Copies `claudemesh launch --mesh {slug}` to the clipboard (plus the token as an env var) +3. Shows a toast: "Copied. Paste in your terminal to join." + +User pastes and runs. The CLI: +1. Resolves the mesh from `--mesh ` +2. If the mesh isn't already joined locally, silently claims the one-time token and joins +3. Shows a one-line confirmation if state changed (see §14.2) +4. Launches Claude Code + +### 14.2 Confirmation line for state-changing handoffs + +When the clipboard handoff triggers a join (disk state changed), a single confirmation line appears: + +``` +✔ Joined "platform-team" from the dashboard link. +``` + +If the user was already a member: + +``` +Already in "platform-team". Launching… +``` + +These lines are exceptions to Rule 2 because state changed silently would be deceptive. + +### 14.3 "Launch in CLI" button design (dashboard side) + +``` +┌──────────────────────────────────────┐ +│ platform-team │ +│ 7 peers · 2 online │ +│ │ +│ [Launch in CLI] [Settings] │ +└──────────────────────────────────────┘ +``` + +"Launch in CLI" in the brand `primary` color, "Settings" in muted. Click → toast → done. + +### 14.4 Browser copy catalog alignment + +All browser-side copy related to CLI flows lives in a shared catalog at `packages/shared-copy/cli-auth/en.ts` and is imported by both `apps/web/` (for rendering) and `apps/cli-v2/` (for displaying "return to your terminal" hints and verifying backend responses match expected text). CI fails if the catalogs drift. + +--- + +## 15. Claude Code status-line integration + +### 15.1 What Claude Code sees + +The MCP server exposes a mesh-status tool that Claude Code polls (or subscribes to): + +```json +{ + "mesh_slug": "platform-team", + "mesh_name": "Platform team", + "peer_count": 7, + "peers_online": 2, + "broker_connected": true, + "sync_pending": 0, + "schema_version": "1.0" +} +``` + +### 15.2 Status line rendering + +Claude Code's status line reads this and renders a single line at the bottom-right: + +``` +◉ platform-team · 2 peers +``` + +### 15.3 Responsive widths + +Depending on available width: + +- Full: `◉ platform-team · 2 peers` +- Medium: `◉ platform-team` +- Compact: `◉ ·2` +- Minimal: `◉` + +When `peers_online === 0` (you're alone in the mesh): + +- Full: `◉ platform-team · solo` +- Medium: `◉ platform-team` +- Compact: `◉ solo` +- Minimal: `◉` + +ICU plural rules handle the `1 peer` / `2 peers` distinction for English and per-locale rules for Spanish. + +### 15.4 Dot states + +- `◉` (green via `success`) — broker connected, sync caught up +- `◉` (amber via `warning`) — broker connected, sync pending > 0 +- `◉` (yellow via `warning`) — broker connecting (during reconnect) +- `◎` (gray via `muted`) — broker disconnected (queueing locally) +- (nothing) — not in a mesh or in personal mode without sync + +### 15.5 Click or slash-command interaction + +Clicking the status line (if Claude Code supports click) or running `/mesh` as a slash command opens a compact overlay: + +``` +◉ platform-team (owned) + +Peers (7, 2 online) + alice working launching CI + bob idle — + carol offline (last seen 2m ago) + … + +[Invite] [Leave] +``` + +Dismissible. Actions at the bottom. + +--- + +## 16. Delight beats (four total) + +**Exactly four delight beats per major version.** Not six, not five, not one per screen. Four. + +Trust surfaces (§8) are a distinct category and do NOT count against this budget. + +### 16.1 The locked catalog + +1. **First-run closing sentence** + > `"You're in."` + + Shown exactly once per machine, in the first-run flow. State: `milestoneShown.firstRunClosing = true`. + +2. **First publish success** + > `"Your mesh is live. Anyone with the invite can join."` + + Shown when a personal mesh is successfully published as shared. State: `milestoneShown.firstPublish = true`. + +3. **First invite sent** + > `"Sent. They'll see it when they check their inbox."` + + Shown when the user successfully sends their first invite by email (not clipboard — the clipboard flow has its own confirmation in §6.6 that's utility, not delight). State: `milestoneShown.firstInvite = true`. + + **Why two sentences**: a single-word "Sent." is too minimal to register as delight — it reads as a confirmation checkmark, not an emotional payoff. The second sentence completes the beat with a calm acknowledgment of what happens next. Still under the 1-sentence verbosity budget because the two are parts of one thought (the payoff + the implication). + +4. **100th session milestone** + > `"Nice to see you again."` + + Shown exactly once, at the 100th `daily_launch` session (see §16.3 for counter semantics). State: `milestoneShown.hundredth = true`. + +### 16.2 The 5th slot + +Slot #5 is **reserved**. Not a placeholder — if no genuinely delightful moment is found for v1.0.0, the product ships with 4. Better to ship fewer good beats than to pad the count. + +### 16.3 Counter semantics for the 100th-session milestone + +- **What counts**: every successful `daily_launch` session that reaches the handoff to Claude Code. `--help`, `doctor`, `whoami`, first-run, and failed launches don't count. +- **Storage**: `~/.claudemesh/state.json` → `state.sessionCount: number`. Incremented atomically inside the handoff transaction. +- **Trigger**: when `sessionCount === 100` exactly (not ≥). Shown once. Never shown again even if state is reset. +- **Shown-flag**: `state.milestoneShown.hundredth: boolean` to prevent re-showing. +- **Reset behavior**: `claudemesh advanced telemetry off` does NOT reset the counter. `rm -rf ~/.claudemesh` does (effectively a new machine). Explicit `claudemesh advanced reset-milestones` exists for testing. +- **No network**: counter is purely local, never transmitted. + +### 16.4 Growth across versions + +**Four delight beats per major version.** v1.0.0 ships with 4. v1.1–1.9 can each add at most 1 new beat (minor version cap +1, total cap 13 in the v1 lifetime). v2.0 resets the counter. + +**Strict rule**: a minor release cannot add more than 1 new delight beat. Additions require design review. + +--- + +## 17. Anti-patterns (forbidden behaviors) + +Literal blocklist. Every one of these has appeared in other CLIs and been painful. + +### 17.1 Prompts we will never show + +- "Do you want to continue? [Y/n]" — if yes is always correct, don't ask +- "Are you sure?" — use typed confirmation for destructive operations +- "Is this your first time?" — we know from the filesystem +- "What's your name?" — we know from the account +- "Would you like to install shell completions?" — ship them automatically +- "Please rate your experience" +- "We noticed you haven't used us in a while. Everything OK?" + +### 17.2 Outputs we will never produce + +- ASCII art logos on every command +- Emojis in log output +- Unicode box drawing around error messages +- Color-only state indication +- Rainbow gradients +- Blinking text +- Sound (bell character `\a`) +- Claiming success before success is confirmed +- Hiding errors behind "debug mode" +- Forcing the user to read a TOS on first run +- **Brand mark / ASCII art on the first-run welcome** (typography only) + +### 17.3 Commands we will never add + +- `claudemesh say ` — cutesy inter-peer chat belongs in Claude Code itself +- `claudemesh games` +- `claudemesh weather` +- `claudemesh update` self-updater — `npm i -g claudemesh-cli@latest` is the update path +- `claudemesh reset --hard` — too dangerous to expose as one command +- `claudemesh sudo` +- `claudemesh agi` + +### 17.4 Behaviors we will never adopt + +- Phoning home on startup except for opt-out update check +- Auto-updating without user action +- Silently modifying files outside `~/.claudemesh/` +- Starting background daemons without telling the user +- Running `sudo` without explicit permission +- Reading env vars we don't need +- Logging PII even hashed +- Emitting `\a` bell characters +- Overriding the user's locale +- Overriding the user's terminal theme colors + +### 17.5 The "explain it in a tweet" test + +Every feature, every command, every screen must pass this test: **can you explain what it does in a single tweet without sounding silly?** If not, it's over-designed. + +--- + +## 18. Locked copy governance + +### 18.1 What "locked" means + +Some strings are marked **locked** in the microcopy catalog. A locked string cannot change without a design review. + +Locked strings in v1.0.0: + +- `firstRun.closing` = `"You're in."` +- `milestone.hundredth` = `"Nice to see you again."` +- `invite.email.sent` = `"✔ Sent to {email}."` (exact form) +- The first-run scenario scripts (§4.1, §4.2) — every word and linebreak +- The main `--help` command descriptions (§11.2) + +### 18.2 Locked does not mean frozen + +Locked strings can still be: +- Translated per locale (with per-locale length budgets) +- Reformatted for accessibility (e.g. ASCII fallback for icons) +- Reformatted for terminal width (e.g. sub-60-col reflow) + +Locked means the **intent** is fixed. The English literal can change if a reviewer approves; the translated versions must preserve the intent. + +### 18.3 Adding new strings + +New user-visible strings follow a review path: +1. Draft in the microcopy catalog (`locales/en.ts`) +2. Pass the Copy Review Checklist (§20) +3. Approver (not the author) signs off +4. CI runs the forbidden-concepts lint across all locales +5. Ships + +--- + +## 19. Visual audit checklist + +Run through this before every visual change ships. + +- [ ] No inline color strings — all from `ui/styles.ts` +- [ ] No inline icon unicode — all from `Icons` +- [ ] No boxes, borders, or Unicode drawing characters +- [ ] No emoji in user-visible output +- [ ] All status states distinguishable in monochrome +- [ ] Works at 60-column terminal width +- [ ] Works at 120-column terminal width without stretching +- [ ] Works in light-theme terminal +- [ ] Works in dark-theme terminal +- [ ] Works with `NO_COLOR=1` +- [ ] Works with `CLAUDEMESH_NO_UNICODE=1` (ASCII fallback) +- [ ] All spinners have >200ms minimum display time OR are removed +- [ ] Every string comes from `locales/` +- [ ] Error messages are 1–3 sentences, end with exactly one action +- [ ] Success messages are 1 line +- [ ] No forbidden concepts (§3.2) +- [ ] Picker preselects the most likely action +- [ ] Selection uses 3-signal indication (icon + bold + position) +- [ ] Keyboard conventions respected (Enter, Escape, arrows, Tab no-op) +- [ ] `?` overlay available on interactive screens +- [ ] Terminal state is clean after exit (no leftover ANSI, cursor visible) + +## 20. Copy review checklist + +Run through this before every string change ships. + +- [ ] Voice is informed, restrained, competent, warm-but-not-familiar +- [ ] Active voice +- [ ] Second person singular ("you", "your") +- [ ] Present tense +- [ ] Specific over vague +- [ ] No forbidden concepts in EN or ES +- [ ] Within verbosity budget (§3.4) +- [ ] No exclamation marks +- [ ] No rhetorical questions +- [ ] No "we" when "claudemesh" or passive would be clearer +- [ ] Errors end with exactly one primary action +- [ ] ICU interpolation handles plurals correctly +- [ ] Reads well to a non-native speaker +- [ ] Reads well when said out loud + +--- + +## 21. Stable JSON API versioning + +Scripts depend on the shape of `--json` output. Breaking changes would break user automation. + +### 21.1 Every JSON output includes `schema_version` + +```json +{ + "schema_version": "1.0", + "meshes": [ + { + "slug": "alejandro-mbp", + "name": "Alejandro's Mac", + "kind": "personal", + "peer_count": 1 + } + ] +} +``` + +### 21.2 Rules + +- Breaking changes bump `schema_version` (major) +- Additive changes (new fields) do not bump (minor) +- The CLI supports the current + previous schema version for at least 6 months +- Scripts check `schema_version` and adapt + +### 21.3 Fields stable for v1.0.0 + +- `meshes[].slug`, `name`, `kind`, `peer_count`, `peers_online`, `last_used_at` +- `peers[].peer_id`, `display_name`, `status`, `summary`, `last_seen_at` +- `whoami.signed_in`, `user.id`, `user.display_name`, `user.email`, `token_source` + +Adding new fields is safe. Renaming or removing fields requires a major bump. + +--- + +**End of spec.** diff --git a/.artifacts/specs/2026-04-11-cli-v2-pass1.md b/.artifacts/specs/2026-04-11-cli-v2-pass1.md new file mode 100644 index 0000000..0bfe9ab --- /dev/null +++ b/.artifacts/specs/2026-04-11-cli-v2-pass1.md @@ -0,0 +1,1157 @@ +# claudemesh-cli v2 — Pass 1 + +**Status:** spec (active implementation target) +**Created:** 2026-04-11 +**Scope:** immediate v2 work — refactor + CLI user flows + v1 preservation +**Ships as:** `claudemesh-cli@1.0.0` + +**Companion documents** (read these first): + +- `2026-04-11-v1-feature-inventory.md` — every v1 feature that must keep working +- `2026-04-11-v2-parity-test-plan.md` — how we verify v2 preserves v1 +- `2026-04-10-cli-v2-pass2-facade-pattern.md` — architectural rules (authoritative for both passes) + +**Pass 2 references** (future work, not implementation targets): + +- `2026-04-10-cli-v2-pass2-final-vision.md` — longer-term architecture vision +- `2026-04-10-cli-v2-pass2-ux-design.md` — interactive UX redesign +- `2026-04-10-cli-v2-pass2-local-first-storage.md` — local SQLite + Lamport + sync daemon +- `2026-04-10-cli-v2-pass2-shared-infrastructure.md` — broker hardening + +--- + +## 1. Scope + +### 1.1 What Pass 1 delivers + +Three things, exactly: + +1. **Folder structure refactor.** `apps/cli/src/` becomes `apps/cli-v2/src/` with a cleaner layout: `entrypoints/`, `cli/`, `commands/`, `services/` (feature-folders with facades), `ui/`, `mcp/`, `constants/`, `types/`, `utils/`, `locales/`, `templates/`. ESLint + dependency-cruiser enforce the facade pattern. The scaffold replaces v1's ad-hoc organization. +2. **CLI user flows** that remove the dependency on the web dashboard for common operations: + - `claudemesh login` — device-code OAuth against claudemesh.com + - `claudemesh register` — account creation flow (browser handoff for email verification) + - `claudemesh logout` — revoke session + - `claudemesh whoami` — identity check + - `claudemesh new` — create a mesh from the CLI (was dashboard-only in v1) + - `claudemesh invite` — generate an invite from the CLI (was dashboard-only in v1) +3. **Every v1 feature preserved** — the 79 MCP tools, the 85 broker WS message types, the welcome wizard, the launch flow, the status hooks, the crypto primitives, the invite parsing, the doctor checks. See `2026-04-11-v1-feature-inventory.md` for the complete list. + +### 1.2 What Pass 1 does NOT deliver + +Everything in the Pass 2 specs. Concretely: + +- No local SQLite source of truth. v2 Pass 1 still hits the broker for every read/write (same as v1). +- No Lamport clock, no sync daemon, no outbox/inbox, no publish transaction, no NFC normalization, no write queue with state machine. +- No broker changes. The broker runs unchanged. Postgres, Neo4j, Qdrant, MinIO, Docker sandboxes — all stay on their v1 configurations. +- No hardened Postgres role isolation. v1's `search_path`-based scoping is preserved unchanged. Improving this is Pass 2. +- No MCP catalog tiering, no catalog audit process, no egress proxy for Docker sandboxes, no SSRF policy for URL watch. v1 behavior is preserved. +- No Telegram bridge changes. `apps/broker/src/telegram-bridge.ts` stays exactly as it is. The v2 CLI **preserves `claudemesh connect telegram`** as an interactive wizard (see §5.7) — this was a regression risk in an earlier draft of this spec. The v1 `disconnect telegram` subcommand is dropped because teardown is handled by the bridge itself (revoke inside Telegram) or by leaving the mesh. +- No new MCP catalog. Deployed MCPs still come from zip upload / git URL / npx package, same as v1. The bundled catalog (`apps/broker/src/mcp-catalog.ts`) is Pass 2 work. +- No ICU MessageFormat. v2 Pass 1 uses simple string templates. ICU plurals + per-locale length budgets are Pass 2. +- No `session_kind` enum with six output budgets. v2 Pass 1 keeps v1's imperative output patterns. +- No accessibility token-signal matrix. v2 Pass 1 keeps v1's TUI primitives. Pass 2 introduces the new design system. +- No new visual design system. v1's colors and icons stay. Pass 2 introduces the six-role palette. +- No trust surfaces / delight beats budget. Pass 2 concern. +- No new error structure with one-action recovery. v1's existing error messages stay. +- No 14 inviolable rules from the Pass 2 UX spec. Pass 2 concern. + +If it's in a Pass 2 document, it is **not** part of Pass 1 unless this document explicitly says so. + +### 1.3 The two compatibility contracts + +**Contract A — the broker's WS protocol.** v2's CLI must speak the broker's v1 WS protocol byte-for-byte. Every one of the 85 WS message types in inventory §3 keeps the same envelope shape, the same field names, the same signature format. Production brokers deployed today must continue to serve v2 CLIs without any broker-side changes. This is the **only** compatibility contract that matters. + +**Contract B — v1 feature parity (user-facing behavior).** Every item in the v1 inventory §12 "must preserve" list must keep working. This includes the 79 MCP tools, the 85 WS messages, the 18 HTTP endpoints, the backend integrations, the status engine, the scheduled message delivery, the URL watch, and the crypto round-trips. The v2 parity test suite is the verification. + +**What is NOT a compatibility contract**: + +- v1's CLI command names (`launch`, `connect telegram`, etc.) — v2 picks new names where better ones exist. +- v1's config file shape (`~/.claudemesh/config.json`) — v2 picks a new shape if needed. +- v1's JSON output shape (`--json` outputs) — v2 locks a new shape via `schema_version: "1.0"`. +- v1's env var names — v2 picks new ones. +- v1's exit codes — v2 uses the cleaner scheme from Pass 2 §11.2. +- v1's stdout text — v2 rewrites for consistency and future localization. + +No deprecation windows, no migration runners, no aliases. There are no users to migrate. + +--- + +## 2. The three Pass 1 intents + +These are the decisions v2 Pass 1 is optimizing for. Everything else is either out of scope or inherits from v1. + +### 2.1 Intent A — scalability through folder structure + +**Problem**: v1's `apps/cli/src/` has 22 files, with business logic scattered across `commands/`, `mcp/`, `ws/`, `auth/`, `crypto/`, `invite/`, `state/`, `tui/`, and `lib/`. No enforced boundaries. `commands/launch.ts` is 775 lines. `mcp/server.ts` is 2,139 lines. `ws/client.ts` is 2,191 lines. Growing to v1.5 and beyond would compound the tangle. + +**Pass 1 fix**: feature-folder services with facade pattern. Every service (`auth`, `mesh`, `invite`, `broker`, `api`, `crypto`, `config`, `state`, `device`, `clipboard`, `spawn`, `telemetry`, `health`, `update`, `i18n`, `lifecycle`, `logger`) lives in `services//` with `facade.ts` as the single public entry point. ESLint `boundaries` plugin + dependency-cruiser enforce that `ui/`, `commands/`, `cli/`, and `mcp/` import only from `services/*/facade.ts`. + +The facade pattern is described in full in `2026-04-10-cli-v2-pass2-facade-pattern.md` (which applies to both passes). Pass 1 implements it completely. + +**What this buys us**: +- Growing a service is local — add files inside its folder, extend its facade +- Cross-service calls are explicit — facades import other facades, never internal files +- Testing is trivial — mock the facade +- Code review is bounded — PRs touch single service folders +- Scaling contributors is possible — ownership can split along service lines + +### 2.2 Intent B — CLI user flows (remove dashboard dependency) + +**Problem**: v1 users must open the claudemesh.com dashboard to: +- Create an account +- Log in to the dashboard (OAuth flow) +- Create a mesh +- Generate an invite link +- Manage their profile + +Only after those web steps can they use the CLI to join (`claudemesh join `). This means a terminal-first developer always has to context-switch to a browser for anything beyond joining. + +**Pass 1 fix**: add CLI commands that drive all these flows end-to-end. + +| v2 Pass 1 command | Replaces | +|---|---| +| `claudemesh register` | Web dashboard signup page | +| `claudemesh login` | Web dashboard login + dashboard-to-CLI JWT sync | +| `claudemesh logout` | Web dashboard logout | +| `claudemesh whoami` | Web dashboard "who am I" profile view | +| `claudemesh new ` | Web dashboard "create mesh" form | +| `claudemesh invite [email]` | Web dashboard "generate invite" button | + +These are the **only** net-new CLI commands in Pass 1. Everything else inherits from v1. + +The flows use **device-code OAuth** against claudemesh.com's existing Better Auth backend. No new backend work is required beyond confirming the device-code endpoint exists (it may or may not — the inventory §4 confirms `POST /cli-sync` exists, which is the existing dashboard-sync flow; v2 extends this to a full login/register flow). + +### 2.3 Intent C — preserve every v1 feature + +**Problem**: the scope creep risk. Every review round has added "while we're refactoring, let's also fix X." + +**Pass 1 fix**: the v1 feature inventory is the regression contract. The parity test suite is the verification. A PR cannot merge if it breaks any item in inventory §12. + +Pass 1 is a **refactor**, not a rewrite. v1's code gets moved into new files, its imports get updated to use facades, its business logic stays intact. Where v1's implementation is long or messy, v2 Pass 1 may split it into multiple files — but the observable behavior stays identical. + +--- + +## 3. Target source tree + +``` +apps/cli-v2/ +├── package.json # name: claudemesh-cli, version: 0.11.0-alpha.0 → 1.0.0 +├── tsconfig.json +├── bunfig.toml +├── build.ts # Bun bundler driver, enforces 1.2 MB gzipped ceiling +├── .eslintrc.cjs # boundaries plugin + 3 custom rules +├── dependency-cruiser.config.js # folder-level boundary enforcement +├── biome.json +├── CHANGELOG.md +├── README.md +├── bin/ +│ └── claudemesh # shebang → dist/entrypoints/cli.js +│ +├── src/ +│ ├── entrypoints/ +│ │ ├── cli.ts # interactive CLI entry — parses argv, dispatches +│ │ └── mcp.ts # `claudemesh mcp` → MCP stdio server +│ │ +│ ├── cli/ # non-Ink I/O plumbing +│ │ ├── argv.ts # parse process.argv, detect positional invite URLs +│ │ ├── print.ts # stdout helpers (respects NO_COLOR / FORCE_COLOR) +│ │ ├── structured-io.ts # --json output with schema_version +│ │ ├── exit.ts # exit codes + cleanup hooks +│ │ ├── update-notice.ts # "new version available" banner (npm registry poll) +│ │ ├── handlers/ +│ │ │ ├── signal.ts # SIGINT/SIGTERM graceful shutdown +│ │ │ └── error.ts # top-level error → user message +│ │ └── output/ # plain-text renderers for non-interactive commands +│ │ ├── list.ts +│ │ ├── peers.ts +│ │ ├── whoami.ts +│ │ └── version.ts +│ │ +│ ├── commands/ # one verb per file; no "advanced" prefix +│ │ ├── index.ts # command registry +│ │ ├── join.ts # join a mesh from invite URL (v1 + v2 formats) +│ │ ├── new.ts # NEW — create a mesh via API +│ │ ├── invite.ts # NEW — generate invite via API +│ │ ├── list.ts # list joined meshes +│ │ ├── rename.ts # rename current mesh +│ │ ├── leave.ts # leave a mesh +│ │ ├── peers.ts # list peers +│ │ ├── send.ts # send a message +│ │ ├── inbox.ts # drain inbox +│ │ ├── state.ts # get / set / list state +│ │ ├── info.ts # mesh overview +│ │ ├── remember.ts # store memory +│ │ ├── recall.ts # search memories +│ │ ├── remind.ts # schedule reminder +│ │ ├── profile.ts # view / edit profile +│ │ ├── status.ts # broker connectivity check +│ │ ├── doctor.ts # diagnostic checks (port from v1) +│ │ ├── register.ts # NEW — account creation +│ │ ├── login.ts # NEW — device-code OAuth +│ │ ├── logout.ts # NEW — revoke session +│ │ ├── whoami.ts # NEW — identity check +│ │ ├── install.ts # register MCP + hooks with Claude Code +│ │ ├── uninstall.ts # remove MCP + hooks +│ │ ├── sync.ts # sync meshes from dashboard (existing /cli-sync) +│ │ ├── welcome.ts # welcome wizard (bare command, first-run only) +│ │ ├── hook.ts # internal Claude Code hook handler +│ │ ├── mcp.ts # internal `claudemesh mcp` dispatcher +│ │ └── seed-test-mesh.ts # dev-only +│ │ +│ ├── services/ # feature-folders with facades +│ │ ├── auth/ # NEW CLI user flows + existing JWT sync +│ │ │ ├── client.ts # HTTP calls to /api/auth/cli/* endpoints +│ │ │ ├── device-code.ts # device-code flow orchestration +│ │ │ ├── token-store.ts # ~/.claudemesh/auth.json R/W with 0600 +│ │ │ ├── dashboard-sync.ts # existing /cli-sync flow (port from v1 auth/sync-with-broker.ts) +│ │ │ ├── callback-listener.ts # local HTTP listener for OAuth callback (port from v1) +│ │ │ ├── open-browser.ts # cross-platform browser launcher (port from v1) +│ │ │ ├── implementation.ts +│ │ │ ├── schemas.ts +│ │ │ ├── errors.ts +│ │ │ ├── index.ts # getAuthService() factory +│ │ │ ├── facade.ts # loginWithDeviceCode, logout, whoAmI, register +│ │ │ └── auth.test.ts +│ │ │ +│ │ ├── mesh/ # mesh CRUD + list + resolve-target +│ │ │ ├── client.ts # HTTP to /api/my/meshes +│ │ │ ├── list.ts # port from v1 commands/list.ts logic +│ │ │ ├── create.ts # NEW — POST /api/my/meshes +│ │ │ ├── rename.ts # PATCH /api/my/meshes/:slug +│ │ │ ├── leave.ts # port from v1 commands/leave.ts +│ │ │ ├── join.ts # port from v1 commands/join.ts +│ │ │ ├── resolve-target.ts # port from v1 launch.ts mesh picker logic +│ │ │ ├── implementation.ts +│ │ │ ├── schemas.ts +│ │ │ ├── errors.ts +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── invite/ # invite generation + parsing + claiming +│ │ │ ├── generate.ts # NEW — POST /api/my/meshes/:slug/invites +│ │ │ ├── parse-url.ts # port from v1 invite/parse.ts + lib/invite-v2.ts +│ │ │ ├── claim.ts # port from v1 invite/enroll.ts +│ │ │ ├── send-email.ts # NEW — email delivery (if backend supports) +│ │ │ ├── implementation.ts +│ │ │ ├── schemas.ts +│ │ │ ├── errors.ts +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── broker/ # WS client + peer crypto (port from v1 ws/) +│ │ │ ├── ws-client.ts # port from v1 ws/client.ts (2191 lines) +│ │ │ ├── manager.ts # port from v1 ws/manager.ts +│ │ │ ├── envelope.ts # port from v1 crypto/envelope.ts +│ │ │ ├── hello-sig.ts # port from v1 crypto/hello-sig.ts +│ │ │ ├── implementation.ts +│ │ │ ├── schemas.ts # WS message type definitions (all 85) +│ │ │ ├── errors.ts +│ │ │ ├── index.ts +│ │ │ ├── facade.ts # typed methods for each WS op +│ │ │ └── broker.test.ts +│ │ │ +│ │ ├── api/ # base HTTP client +│ │ │ ├── client.ts # typed fetch wrapper +│ │ │ ├── my.ts # /api/my/* endpoint helpers +│ │ │ ├── public.ts # /api/public/* (invite claim) +│ │ │ ├── errors.ts +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── crypto/ # Ed25519, NaCl, AES-GCM +│ │ │ ├── keypair.ts # port from v1 crypto/keypair.ts +│ │ │ ├── file-crypto.ts # port from v1 crypto/file-crypto.ts +│ │ │ ├── box.ts # NaCl crypto_box wrappers +│ │ │ ├── random.ts # secure random helpers +│ │ │ ├── index.ts +│ │ │ ├── facade.ts +│ │ │ └── crypto.test.ts +│ │ │ +│ │ ├── config/ # ~/.claudemesh/config.json +│ │ │ ├── read.ts # port from v1 state/config.ts +│ │ │ ├── write.ts +│ │ │ ├── schemas.ts +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── state/ # last-used cache (not mesh state — that's via broker) +│ │ │ ├── last-used.ts # remember last mesh, name, role for faster launch +│ │ │ ├── schemas.ts +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── device/ # hostname, os, arch +│ │ │ ├── info.ts +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── clipboard/ # pbpaste / xclip / wl-paste (optional) +│ │ │ ├── read.ts +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── spawn/ # exec choke points +│ │ │ ├── claude.ts # exec claude binary (port from v1 launch.ts) +│ │ │ ├── browser.ts # open browser (port from v1 auth/open-browser.ts) +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── telemetry/ # opt-out usage events +│ │ │ ├── emit.ts +│ │ │ ├── opt-out.ts +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── health/ # doctor check implementations +│ │ │ ├── check-node-version.ts +│ │ │ ├── check-claude-binary.ts +│ │ │ ├── check-mcp-registered.ts +│ │ │ ├── check-hooks-registered.ts +│ │ │ ├── check-config-perms.ts +│ │ │ ├── check-keypairs-valid.ts +│ │ │ ├── index.ts +│ │ │ └── facade.ts # runAllChecks, runCheck +│ │ │ +│ │ ├── update/ # npm registry version check +│ │ │ ├── check.ts +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── i18n/ # simple string templates (no ICU in Pass 1) +│ │ │ ├── resolve.ts # locale detection +│ │ │ ├── format.ts # {placeholder} substitution +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ ├── lifecycle/ # start/stop long-running services +│ │ │ ├── service-manager.ts +│ │ │ ├── index.ts +│ │ │ └── facade.ts +│ │ │ +│ │ └── logger/ # structured logger +│ │ ├── logger.ts +│ │ ├── index.ts +│ │ └── facade.ts +│ │ +│ ├── ui/ # Ink-based TUI (port from v1 tui/) +│ │ ├── styles.ts # colors + icons (port from v1 tui/colors.ts — v1 values, not Pass 2 redesign) +│ │ ├── screen.ts # port from v1 tui/screen.ts +│ │ ├── spinner.ts # port from v1 tui/spinner.ts +│ │ ├── welcome/ # welcome wizard screens (port from v1 commands/welcome.ts) +│ │ │ ├── WelcomeScreen.tsx +│ │ │ ├── RegisterStep.tsx # NEW for Pass 1 +│ │ │ ├── LoginStep.tsx # NEW for Pass 1 +│ │ │ ├── MeshPickerStep.tsx +│ │ │ └── index.ts +│ │ └── launch/ # launch flow (port from v1 commands/launch.ts interactive bits) +│ │ ├── LaunchFlow.tsx +│ │ └── index.ts +│ │ +│ ├── mcp/ # stdio MCP server (port from v1 mcp/server.ts) +│ │ ├── server.ts # entry from entrypoints/mcp.ts +│ │ ├── router.ts # tool dispatch +│ │ ├── tools/ # one file per tool family (79 tools total) +│ │ │ ├── memory.ts # remember, recall, forget +│ │ │ ├── state.ts # set_state, get_state, list_state +│ │ │ ├── messaging.ts # send_message, list_peers, check_messages, message_status +│ │ │ ├── profile.ts # set_profile, set_status, set_summary, set_visible +│ │ │ ├── groups.ts # join_group, leave_group +│ │ │ ├── files.ts # share_file, get_file, list_files, file_status, delete_file, grant_file_access, read_peer_file, list_peer_files +│ │ │ ├── vectors.ts # vector_store, vector_search, vector_delete, list_collections +│ │ │ ├── graph.ts # graph_query, graph_execute +│ │ │ ├── sql.ts # mesh_query, mesh_execute, mesh_schema +│ │ │ ├── streams.ts # create_stream, publish, subscribe, list_streams +│ │ │ ├── contexts.ts # share_context, get_context, list_contexts +│ │ │ ├── tasks.ts # create_task, claim_task, complete_task, list_tasks +│ │ │ ├── scheduling.ts # schedule_reminder, list_scheduled, cancel_scheduled +│ │ │ ├── mesh-meta.ts # mesh_info, mesh_stats, mesh_clock, ping_mesh +│ │ │ ├── clock-write.ts # mesh_set_clock, mesh_pause_clock, mesh_resume_clock +│ │ │ ├── skills.ts # share_skill, get_skill, list_skills, remove_skill, mesh_skill_deploy +│ │ │ ├── mcp-registry-peer.ts # mesh_mcp_register, mesh_mcp_list, mesh_tool_call, mesh_mcp_remove +│ │ │ ├── mcp-registry-broker.ts # mesh_mcp_deploy, undeploy, update, logs, scope, schema, catalog +│ │ │ ├── vault.ts # vault_set, vault_list, vault_delete +│ │ │ ├── url-watch.ts # mesh_watch, mesh_unwatch, mesh_watches +│ │ │ ├── webhooks.ts # create_webhook, list_webhooks, delete_webhook +│ │ │ └── index.ts +│ │ ├── middleware/ +│ │ │ ├── logging.ts +│ │ │ └── error-handler.ts +│ │ └── handlers/ +│ │ ├── stdio.ts +│ │ └── jsonrpc.ts +│ │ +│ ├── constants/ +│ │ ├── paths.ts # XDG paths for ~/.claudemesh/ +│ │ ├── urls.ts # default API / broker URLs +│ │ ├── timings.ts # polling intervals, timeouts +│ │ ├── exit-codes.ts # numeric exit code constants +│ │ └── index.ts +│ │ +│ ├── types/ +│ │ ├── api.ts # request/response types +│ │ ├── mesh.ts # mesh, member, invite types +│ │ ├── peer.ts +│ │ └── index.ts +│ │ +│ ├── utils/ +│ │ ├── levenshtein.ts # typo recovery +│ │ ├── slug.ts # hostname → slug +│ │ ├── url.ts # URL parsing +│ │ ├── format.ts # bytes, durations +│ │ ├── semver.ts # version comparison +│ │ ├── retry.ts # backoff helpers +│ │ └── index.ts +│ │ +│ ├── locales/ +│ │ ├── en.ts # simple string templates (no ICU in Pass 1) +│ │ └── index.ts +│ │ +│ └── templates/ # mesh templates (port from v1 templates/index.ts) +│ ├── dev-team.ts +│ ├── research.ts +│ ├── ops-incident.ts +│ ├── simulation.ts +│ ├── personal.ts +│ └── index.ts +│ +└── tests/ # See parity test plan for full layout + ├── unit/ # colocated tests under src/services/*/ + ├── parity/ # v1/v2 behavioral equivalence (~70 files) + ├── contract/ # WS protocol compatibility (~85 files) + ├── mcp-tools/ # per-tool handler parity (79 files) + ├── e2e/ # full journeys against real broker (~28 files) + ├── golden/ # --json output shape (~12 files) + ├── fixtures/ + └── helpers/ +``` + +**What's NOT in this tree** (versus Pass 2): + +- No `src/services/store/` — no local SQLite source of truth +- No `src/services/broker/sync-daemon.ts` — no outbox/inbox sync +- No `src/services/broker/peer-crypto.ts` (beyond hello-sig + envelope that v1 already has) +- No `src/migrations/` — no config migrations because there's no v1 user base +- No `src/ui/accessibility/` — no announcement shim +- No `src/ui/session-kind.ts` — no output budget enum +- No `src/locales/es.ts` — Pass 1 is English-only (Spanish is Pass 2) +- No `src/services/broker/mcp-catalog.ts` — no CLI-side catalog mirror + +The scaffold for Pass 1 creates ~200 files (similar to Pass 2 scaffold but leaner on the `services/store/`, `services/broker/sync-*`, `ui/accessibility/`, and Pass 2-specific areas). + +--- + +## 4. CLI command surface + +### 4.1 Main `--help` output + +``` +$ claudemesh --help + +claudemesh — peer mesh for Claude Code sessions +v1.0.0 + +USAGE + claudemesh start a session in your mesh (creates one if needed) + claudemesh join a mesh from an invite link + claudemesh new create a new mesh + claudemesh invite [email] generate an invite (copies to clipboard) + claudemesh list see your meshes + claudemesh rename rename the current mesh + claudemesh leave [mesh] leave a mesh + claudemesh peers see who's in the current mesh + + claudemesh send send a message + claudemesh inbox drain pending messages + claudemesh state ... get, set, or list shared state + claudemesh remember store a memory + claudemesh recall search memories + claudemesh remind ... schedule a reminder + claudemesh profile view or edit your profile + +When something's wrong + claudemesh doctor diagnose install/config/connection issues + claudemesh whoami show current identity + claudemesh status check broker connectivity + +Authentication + claudemesh register create a claudemesh.com account from the CLI + claudemesh login sign in via browser + claudemesh logout sign out and clear credentials + +Setup + claudemesh install register MCP server + status hooks with Claude Code + claudemesh uninstall remove MCP server + hooks + claudemesh sync refresh mesh list from your dashboard + claudemesh connect telegram link a Telegram bot to a mesh (interactive wizard) + +Internal (for Claude Code and scripts) + claudemesh mcp stdio MCP server + claudemesh hook handle Claude Code hook events + claudemesh seed-test-mesh dev-only helper +``` + +### 4.2 Flat command namespace + +There is no `advanced` prefix. There is no `launch` subcommand. `connect telegram` is preserved as an interactive wizard (see §5.7); `disconnect telegram` is dropped. Every command above is callable directly. + +The main `--help` groups commands visually (USAGE block + "When something's wrong" + "Authentication" + "Setup" + "Internal") but this is display formatting, not namespacing. `claudemesh register` is a direct invocation, not `claudemesh advanced register`. + +### 4.3 Bare command behavior + +``` +$ claudemesh +``` + +Depends on machine state: + +- **First run, no config**: runs the welcome wizard (`commands/welcome.ts`) which detects first-time use and prompts for register/login/join/exit. +- **Returning user, config present**: launches a session directly. Picks the mesh from (in priority order): `--mesh` flag → last-used cache (`services/state/last-used.ts`) → interactive picker if ambiguous → the only joined mesh if there's just one. +- **Returning user, flag-first invocation** (`claudemesh --resume abc`, `claudemesh --name X`, `claudemesh -y`): same as returning user launch, with the flags passed to the launch handler. +- **Returning user, mesh already determined, clipboard has invite URL**: offers the invite as an alternative option before launching. + +This is the v1 behavior preserved verbatim, minus the `launch` word. + +### 4.4 Exit codes + +``` +0 success +1 user cancelled (Ctrl-C, declined prompt) +2 authentication failed +3 invalid arguments / unknown command +4 network error (broker or API unreachable — only when network was explicitly required) +5 not found (mesh, invite, peer) +6 already exists (slug collision, duplicate command) +7 permission denied (role, scope) +8 internal error (bug) +9 claude binary missing +``` + +These are defined in `src/constants/exit-codes.ts` and imported by every command handler. + +### 4.5 Flag conventions + +- `-y` / `--yes` — skip interactive confirmations +- `-q` / `--quiet` — suppress non-essential output +- `-v` / `--verbose` — increase log detail +- `--json` / `--output-format json` — machine-readable output with `schema_version: "1.0"` +- `--mesh ` — override mesh selection +- `--token ` — override auth token (for scripts) +- `--help` / `-h` — per-command help + +`-y` is the key flag. It enables scripted/non-interactive use by skipping every prompt. Commands that require a confirmation in interactive mode just proceed when `-y` is passed. + +--- + +## 5. CLI user flows (net new) + +All five user flows run against claudemesh.com's existing Better Auth + dashboard infrastructure. The broker already has `POST /cli-sync` (inventory §4) for JWT-based sync — v2 Pass 1 extends this to a full device-code OAuth flow. + +### 5.1 `claudemesh register` + +Creates a new claudemesh.com account via browser handoff (email verification is a browser-side step). + +``` +$ claudemesh register + + Opening browser for account signup… + + [browser opens to claudemesh.com/register?source=cli] + [user fills in email, password, verifies email] + [browser redirects to claudemesh.com/cli-auth with success token] + + ✔ Account created: alejandro@example.com + ✔ Signed in. +``` + +Implementation: +1. CLI generates a one-time callback URL with a local port listener (`services/auth/callback-listener.ts`, ported from v1) +2. Opens browser to `claudemesh.com/register?source=cli&callback=http://localhost:` +3. User completes signup + email verification on the web +4. claudemesh.com posts a session token back to the callback +5. CLI stores token in `~/.claudemesh/auth.json` via `services/auth/token-store.ts` +6. CLI calls `POST /cli-sync` to fetch any existing meshes (usually none for a new account) + +**Backend requirement**: `claudemesh.com/register?source=cli&callback=` must exist. If it doesn't, Pass 1 scope expands to add it to the web app (but this is a single Next.js page, not a backend architectural change). + +### 5.2 `claudemesh login` + +Device-code OAuth against claudemesh.com for existing accounts. + +``` +$ claudemesh login + + Opening browser for sign-in… + + If your browser didn't open, visit: + https://claudemesh.com/cli-auth?code=ABCD-EFGH + + Waiting for confirmation… + + ✔ Signed in as Alejandro. + ✔ Synced 3 meshes: platform-team, alejandro-mbp, claudefarm +``` + +Implementation: +1. CLI POSTs `/api/auth/cli/device-code/new` with device info (hostname, os, arch) +2. Receives `{ device_code, user_code, expires_at, verification_url }` +3. Opens browser to `verification_url?code=` +4. Prints the user code to terminal as fallback +5. Polls `/api/auth/cli/device-code/` every 1.5 seconds +6. On approval, receives `{ session_token, user }` + stores in `~/.claudemesh/auth.json` +7. Calls `POST /cli-sync` to fetch meshes + +**Backend requirement**: `/api/auth/cli/device-code/*` endpoints must exist. These are net new. Pass 1 scope includes adding them to `apps/web/src/app/api/auth/cli/` as a thin wrapper around Better Auth's session creation. This is ~100 lines of backend TypeScript. + +### 5.3 `claudemesh logout` + +``` +$ claudemesh logout + + ✔ Revoked session on claudemesh.com + ✔ Removed ~/.claudemesh/auth.json +``` + +Implementation: +1. Calls `DELETE /api/my/sessions/current` (or equivalent Better Auth revoke endpoint) +2. Deletes `~/.claudemesh/auth.json` +3. On server revocation failure, still removes local file and warns: + ``` + ✔ Removed local credentials. + ⚠ Could not revoke session on claudemesh.com. Revoke manually at + https://claudemesh.com/dashboard/settings/sessions + ``` + +### 5.4 `claudemesh whoami` + +``` +$ claudemesh whoami + + Signed in as Alejandro (alejandro@example.com) + Token source: device-code (~/.claudemesh/auth.json) + Meshes: 3 owned, 1 guest +``` + +With `--json`: + +```json +{ + "schema_version": "1.0", + "signed_in": true, + "user": { + "id": "usr_abc123", + "display_name": "Alejandro", + "email": "alejandro@example.com" + }, + "token_source": "device-code", + "meshes": { "owned": 3, "guest": 1 } +} +``` + +Implementation: reads `~/.claudemesh/auth.json`, calls `GET /api/my/profile` to verify + refresh, prints formatted output. + +### 5.5 `claudemesh new ` + +Creates a mesh on claudemesh.com from the CLI. + +``` +$ claudemesh new "Platform team" + + ✔ Created "platform-team" (id: msh_abc123) + ✔ You're the owner + ✔ Joined locally + + Invite teammates with: claudemesh invite +``` + +Implementation: +1. Requires authenticated session (if not, triggers `login` flow first) +2. POSTs `/api/my/meshes` with `{ name, slug?: "platform-team" }` (slug auto-derived from name if not provided) +3. On slug collision, suggests alternative: `"A mesh called 'platform-team' already exists. Try 'platform-team-2'?"` +4. On success, stores the mesh in local config via `services/config/facade.ts` +5. Joins locally (generates Ed25519 keypair, stores in `~/.claudemesh/keys/.key`) + +Flags: +- `--template ` — use a mesh template (reuses `templates/` directory ported from v1) +- `--description ` — optional description + +**Backend requirement**: `POST /api/my/meshes` must exist. It may already exist (v1 backend has mesh creation endpoints per `member-api.ts`). If not, adding it is ~50 lines on top of existing schema patterns. + +### 5.6 `claudemesh invite [email]` + +Generates an invite URL for the current mesh, optionally sending email. + +``` +$ claudemesh invite + + ✔ Invite URL copied to clipboard: + https://claudemesh.com/i/AB12CD34 + + Expires in 7 days. Anyone with this link can join "platform-team". +``` + +With email: + +``` +$ claudemesh invite alice@example.com + + ✔ Sent to alice@example.com + ✔ Also copied to clipboard +``` + +Implementation: +1. Resolves current mesh (via `services/mesh/resolve-target.ts`) +2. POSTs `/api/my/meshes/:slug/invites` with `{ email?, expires_in?: "7d", role?: "member" }` +3. Receives `{ url, code, expires_at }` +4. Copies to clipboard via `services/clipboard/facade.ts` +5. If email provided, backend sends an invitation email (reuses existing transactional email path in the web app) + +Flags: +- `--mesh ` — target mesh (default: current) +- `--expires ` — expiry (default: `7d`) +- `--uses ` — max uses (default: unlimited) +- `--role ` — role for the invitee (default: `member`) + +**Backend requirement**: `POST /api/my/meshes/:slug/invites` must exist. v1 has the inverse flow (`POST /api/public/invites/:code/claim`). Generating an invite is presumably already there since the dashboard has an "Invite" button — this just exposes it over the CLI. + +### 5.7 `claudemesh connect telegram` + +Interactive wizard that picks a mesh, requests a connect token from the broker, and shows a QR code + `t.me` deep link for the user to scan. Replaces v1's silent auto-pick of `config.meshes[0]` (which was a multi-mesh footgun). + +``` +$ claudemesh connect telegram + + Connect Telegram to a mesh + ────────────────────────── + + Select mesh: + ❯ 1) platform-team (owner, 4 members) + 2) alejandro-mbp (owner, 1 member) + 3) claudefarm (guest, 12 members) + + [enter to confirm, ↑↓ to change, q to cancel] + + ✔ Token generated for "platform-team" + + Scan with your phone camera, or tap the link: + + █▀▀▀▀▀█ ▀▀ █▄▄▀ █▀▀▀▀▀█ + █ ███ █ ██▄▄▄▄▀ █ ███ █ + ... + + https://t.me/claudemeshbot?start=abc123… + + Waiting for confirmation from Telegram… (Ctrl-C to skip) + + ✔ Connected as @alejandro_m + Messages to tg:alejandro_m now route to Telegram. +``` + +Implementation: + +1. Loads joined meshes from `services/config/facade.ts`. If zero → error with `claudemesh join ` hint. If one → skip the picker. If multiple → render Ink mesh picker in `ui/telegram/ConnectWizard.tsx`. +2. Converts the chosen mesh's `brokerUrl` from `wss://` to `https://`, POSTs `{meshId, memberId, pubkey, secretKey}` to `/tg/token`. +3. Broker returns `{token, deepLink}`. Wizard renders `qrcode-terminal` ASCII block + the raw link so users on desktop can copy-paste. +4. Opens a short-lived WS subscription on the chosen mesh and waits for a `telegram_bridge_connected` push (new broker event — see §5.7.1 below). If received within 5 minutes, prints the success line with the resolved `tg:`. +5. On Ctrl-C, prints `The link stays valid. Run 'claudemesh connect telegram --status' later to check.` and exits 0. + +Flags: +- `--mesh ` — skip the picker, target a specific mesh +- `--link` — print only the deep link, no QR, no wait (for scripting) +- `--status` — check whether the current mesh already has a Telegram bridge registered, without generating a new token + +**Broker dependencies to verify before Phase 5:** + +1. **Token TTL** — `/tg/token` currently returns `{token, deepLink}`. If the broker knows the TTL (it should — `generateTelegramConnectToken` encodes an expiry), extend the response to `{token, deepLink, expiresAt}`. This is a 3-line broker change and is the only broker touch permitted in Pass 1. Alternatively, the wizard just prints "link stays valid for a while" without a specific duration. +2. **Bridge-connected push event** — if the broker doesn't already emit a push when a Telegram bridge claims a token, the wizard falls back to polling `GET /mesh/:id/members` every 2s for a new `tg:*` member, with a 5-minute timeout. Polling is acceptable for Pass 1 — no broker change needed. +3. **Rate limit** — `/tg/token` is capped at 10 requests/hour/IP. Wizard must catch the 429 and print `You've requested too many Telegram tokens in the last hour. Try again in a few minutes.` instead of raw HTTP errors. + +### 5.7.1 Optional broker edit (small, allowed) + +The rule "broker unchanged in Pass 1" has one permitted exception: **extending the `/tg/token` response with `expiresAt`**. This is additive, unversioned, and costs three lines. If the v2 implementer judges even this too risky, the wizard simply omits the TTL display. + +No other broker change is in scope. + +--- + +## 6. Broker compatibility + +### 6.1 The broker is unchanged + +`apps/broker/src/` stays exactly as it is in v1. No files are added, modified, or removed. The broker: + +- Listens on the same WS endpoint +- Handles the same 85 WS message types +- Serves the same 18 HTTP endpoints +- Uses the same Postgres schema +- Runs the same Neo4j, Qdrant, MinIO, Docker backends +- Executes the same Telegram bridge (`telegram-bridge.ts`, 1711 lines, unchanged) +- Applies the same rate limits, audit logging, status engine, priority delivery + +**Why**: because there are no broker-side changes in scope, and touching the broker introduces risk without reward for Pass 1. Every Pass 2 improvement to the broker (role-per-mesh isolation, egress proxy, catalog audit, etc.) is deferred until its own decision point. + +### 6.2 The CLI speaks v1's WS protocol + +v2's `services/broker/ws-client.ts` is a port of v1's `ws/client.ts`. The port preserves: + +- Every WS message type's envelope shape +- Every field name and type +- The Ed25519 hello signature format +- NaCl crypto_box envelope wrapping for `send_message` +- Reconnect logic, message queue, request/response correlation + +The parity test layer 2 (WS contract tests) verifies this empirically by capturing v1's envelope for each message type and asserting v2's envelope matches byte-for-byte (modulo nonces and timestamps). + +### 6.3 HTTP API compatibility + +v2's `services/api/my.ts` and `services/api/public.ts` call the same endpoints v1 uses. `POST /cli-sync`, `POST /invites/:code/claim`, `POST /hook/set-status`, `POST /upload`, `GET /download/:id`, etc. — all unchanged. + +The new user flow endpoints (`/api/auth/cli/device-code/*`, `POST /api/my/meshes`, `POST /api/my/meshes/:slug/invites`, `DELETE /api/my/sessions/current`, `GET /api/my/profile`) are net new but live in the claudemesh.com web app (`apps/web/`), not the broker. They're ~200 lines of Next.js API routes total. + +### 6.4 Backend services unchanged + +Postgres, Neo4j, Qdrant, MinIO, Docker — all on their v1 configurations. No role changes, no schema changes, no hardening, no egress controls. These all stay as Pass 2 work. + +--- + +## 7. Implementation phases + +Phases are organized by what gets built in each step. No time estimates — each phase ships when its acceptance criteria are met. + +### Phase 0 — Scaffold + +- Create `apps/cli-v2/` with the full source tree +- All files exist as stubs throwing `NotImplementedError` with the relevant spec reference in the header +- `package.json`, `tsconfig.json`, `bunfig.toml`, `build.ts`, `.eslintrc.cjs`, `dependency-cruiser.config.js`, `biome.json` all configured +- Custom ESLint plugin `tools/eslint-plugin-claudemesh/` scaffolded (3 rules) +- Test files for every layer of the parity plan scaffolded (~300 files) +- CI runs: lint green, type-check green, zero violations in dependency-cruiser + boundaries +- **Done when**: scaffold compiles and tests run (even if most tests are `NotImplementedError`) + +### Phase 1 — Foundation layers + +- `types/`, `constants/`, `utils/`, `locales/` fully implemented +- Pure services (`services/device/`, `services/clipboard/`, `services/spawn/`, `services/crypto/`, `services/i18n/`, `services/logger/`, `services/lifecycle/`) +- Unit tests for each (colocated) +- No user-visible behavior yet +- **Done when**: pure-layer unit tests all pass, 80%+ branch coverage on foundation services + +### Phase 2 — Config + state + API client + +- `services/config/` reads + writes `~/.claudemesh/config.json` with 0600 enforcement +- `services/state/last-used.ts` persists mesh picker cache +- `services/api/client.ts` typed fetch wrapper with retry +- `services/api/my.ts` + `services/api/public.ts` endpoint helpers +- Unit tests for each +- **Done when**: config round-trips, API client calls a mock server + +### Phase 3 — Broker client (port from v1 ws/) + +- `services/broker/ws-client.ts` ports v1's `ws/client.ts` into the facade pattern +- `services/broker/envelope.ts` and `hello-sig.ts` port v1's crypto primitives +- All 85 WS message types have typed facade methods +- **Contract test layer** (parity plan layer 2) goes green +- **Done when**: every WS contract test passes against captured v1 envelopes + +### Phase 4 — MCP server (port from v1 mcp/server.ts) + +- `mcp/server.ts` + `router.ts` + 21 tool family files +- Each tool family ports v1's handler logic unchanged, calling the new broker facade instead of v1's direct WS client +- **MCP tool parity test layer** (parity plan layer 3) goes green +- **Done when**: all 79 MCP tool parity tests pass + +### Phase 5 — Auth + CLI user flows + +- `services/auth/` full implementation: device-code flow, token store, dashboard sync, callback listener, browser opener +- `commands/register.ts`, `commands/login.ts`, `commands/logout.ts`, `commands/whoami.ts` +- Backend work (if needed): add `/api/auth/cli/device-code/*` endpoints to `apps/web/` +- `commands/new.ts`, `commands/invite.ts` — call the new backend endpoints +- End-to-end test of full flow: register → login → new mesh → invite → logout +- **Done when**: fresh user can register, create a mesh, invite a teammate, all from the CLI + +### Phase 6 — Mesh + invite + remaining commands + +- `services/mesh/` — list, rename, leave, join, resolve-target (port from v1) +- `services/invite/` — generate, parse-url, claim (port from v1) +- `commands/join.ts`, `commands/list.ts`, `commands/rename.ts`, `commands/leave.ts`, `commands/peers.ts`, `commands/send.ts`, `commands/inbox.ts`, `commands/state.ts`, `commands/info.ts`, `commands/remember.ts`, `commands/recall.ts`, `commands/remind.ts`, `commands/profile.ts`, `commands/status.ts`, `commands/sync.ts` +- `commands/install.ts`, `commands/uninstall.ts` — port from v1 (writes to `~/.claude.json` + `~/.claude/settings.json`) +- `commands/doctor.ts` — port from v1's 7 checks +- **Parity test layer** (parity plan layer 1) goes green for all non-launch commands +- **Done when**: every command in §4.1 main help works identically to v1 + +### Phase 7 — UI flows (welcome wizard + launch) + +- `ui/welcome/` — welcome wizard screens (port from v1 commands/welcome.ts) +- `ui/launch/` — launch flow screens (port from v1 commands/launch.ts interactive bits) +- `commands/welcome.ts` → renders `ui/welcome/WelcomeScreen` +- Bare `claudemesh` dispatches to welcome (first run) or launch (returning) +- **Parity test layer** for first-run + launch journeys goes green +- **Done when**: welcome wizard and launch flow work identically to v1 + +### Phase 8 — Golden JSON + E2E tests + +- `tests/golden/` — lock JSON output shape for every command that supports `--json` +- `tests/e2e/` — full journey tests against a real broker in testcontainers +- E2E tests cover the 28 flows from parity plan §6 +- **Done when**: all e2e tests green on `main`, all golden tests green on PR + +### Phase 9 — Docs + ship + +- `apps/cli-v2/README.md` — usage, install, architecture overview +- `apps/cli-v2/CHANGELOG.md` — v1.0.0 entry listing changes from v0.10.5 +- Root `README.md` updated to reference v2 +- `docs/quickstart.md` rewritten for v2 command surface +- Migration notes: none (no users to migrate) +- **Done when**: docs complete, all layers green, ready to ship + +### Phase 10 — Atomic swap + v1.0.0 publish + +Single atomic commit: +``` +rm -rf apps/cli && mv apps/cli-v2 apps/cli +git commit -m "v2: replace v1 CLI with the refactored v2 implementation" +``` + +Followed by: +``` +cd apps/cli && pnpm publish --access public --no-git-checks +git tag v1.0.0 +git push origin main --tags +``` + +No legacy preservation, no rollback window, no deprecation period. Clean break. (Feature is at `apps/cli/` after the swap, not `apps/cli-v2/`.) + +--- + +## 8. Acceptance criteria + +v2 Pass 1 ships when all of these are true: + +### Test gates + +- [ ] Every test file in `2026-04-11-v2-parity-test-plan.md` has a passing assertion (no `NotImplementedError` remaining) +- [ ] Layer 1 parity: ~70 test files covering v1 inventory §12 regression checks — all green +- [ ] Layer 2 contract: ~85 test files covering WS message types — all green against captured v1 envelopes +- [ ] Layer 3 MCP tool parity: 79 test files covering every MCP tool — all green +- [ ] Layer 4 e2e: 28 journey tests — all green against real broker in testcontainers +- [ ] Layer 5 golden JSON: 12 test files locking `--json` output shapes — all green +- [ ] Layer 6 facade units: colocated tests + boundary scanner — all green +- [ ] Layer 7 port-forwarded v1 tests: crypto round-trip + invite parse — all green + +### Lint / type / structure gates + +- [ ] `biome check` — zero violations +- [ ] `tsc --noEmit` — zero errors, strict mode +- [ ] ESLint `boundaries` plugin — zero violations (facade pattern holds) +- [ ] ESLint custom rules (`no-index-reexport-internal`, `type-imports-count-as-edges`, `no-dynamic-service-imports`) — zero violations +- [ ] `dependency-cruiser` — zero circular imports, zero layer violations, zero v1 imports (`no-v1-dependencies` rule) +- [ ] `no-restricted-imports` — zero violations + +### Coverage gates + +- [ ] `services/*/*.test.ts` — 80%+ branch coverage on non-broker services +- [ ] `services/broker/*` — 70%+ branch coverage via integration tests (WS client is the bulk of this) +- [ ] No file below 60% branch coverage without a documented reason in a PR comment + +### Build / ship gates + +- [ ] `bun build.ts` produces `dist/entrypoints/cli.js` and `dist/entrypoints/mcp.js` +- [ ] Gzipped JS bundle < 1.2 MB (enforced in `build.ts`) +- [ ] `bin/claudemesh` shebang works on macOS + Linux +- [ ] `pnpm publish` dry-run passes +- [ ] Published package installable via `npm i -g claudemesh-cli@1.0.0-rc.1` +- [ ] Cold start < 400 ms on Apple M2 Pro (benchmarked in `tests/bench/cold-start.bench.ts`) + +### Behavioral gates (against v1 feature inventory §12) + +- [ ] First-run install works on a fresh machine +- [ ] Welcome wizard shows on bare command with no config +- [ ] Launch works on returning machine (bare + flag-first) +- [ ] All 79 MCP tools dispatch correctly through the v2 MCP server +- [ ] Status priority engine (hook > manual > jsonl) unchanged +- [ ] Message queue priority delivery (now / next / low) unchanged +- [ ] Cryptographic integrity (Ed25519 signatures, NaCl envelopes, AES-GCM files) unchanged +- [ ] Scheduled reminders survive broker restart +- [ ] URL watch detects changes + survives broker restart +- [ ] Telegram bridge continues to work on the broker side (verified via e2e against broker directly, not via v2 CLI subcommand which is gone) +- [ ] Dashboard sync (`POST /cli-sync`) works +- [ ] Webhooks (`POST /hook/:meshId/:webhookId`) route external requests to mesh messages +- [ ] Doctor's 7 checks all pass on a clean install + +### New behavior gates (net new in Pass 1) + +- [ ] `claudemesh register` creates a new claudemesh.com account from the CLI +- [ ] `claudemesh login` completes the device-code flow +- [ ] `claudemesh logout` revokes the session + clears local credentials +- [ ] `claudemesh whoami` prints current identity +- [ ] `claudemesh new ` creates a mesh via `POST /api/my/meshes` +- [ ] `claudemesh invite` generates an invite via `POST /api/my/meshes/:slug/invites` +- [ ] `claudemesh invite ` sends a transactional email (if backend supports) +- [ ] Complete first-time user journey: `register → new → invite → send → logout` works end-to-end without opening the browser except for account signup email verification + +### Regression gates (nothing should break) + +- [ ] Zero previously-passing tests become failing +- [ ] No `--json` output shape change for any command that supported `--json` in v1 +- [ ] Broker test suite (unchanged in Pass 1) stays green + +When all gates are true, Pass 1 ships. + +--- + +## 9. What gets renamed vs what gets rewritten + +### Files that are ports (move + update imports, preserve logic) + +These files' business logic is preserved verbatim from v1. The only changes are folder location, import paths, and the addition of a facade wrapper. + +| v1 file | v2 location | Change | +|---|---|---| +| `cli/src/ws/client.ts` | `services/broker/ws-client.ts` | Import paths, facade wrap | +| `cli/src/ws/manager.ts` | `services/broker/manager.ts` | Import paths | +| `cli/src/crypto/envelope.ts` | `services/broker/envelope.ts` | Folder move only | +| `cli/src/crypto/hello-sig.ts` | `services/broker/hello-sig.ts` | Folder move only | +| `cli/src/crypto/keypair.ts` | `services/crypto/keypair.ts` | Facade wrap | +| `cli/src/crypto/file-crypto.ts` | `services/crypto/file-crypto.ts` | Facade wrap | +| `cli/src/auth/callback-listener.ts` | `services/auth/callback-listener.ts` | Folder move | +| `cli/src/auth/open-browser.ts` | `services/spawn/browser.ts` | Consolidated with launch spawn | +| `cli/src/auth/pairing-code.ts` | `services/auth/pairing-code.ts` | Folder move | +| `cli/src/auth/sync-with-broker.ts` | `services/auth/dashboard-sync.ts` | Folder move + facade wrap | +| `cli/src/invite/parse.ts` | `services/invite/parse-url.ts` | Folder move | +| `cli/src/invite/enroll.ts` | `services/invite/claim.ts` | Folder move | +| `cli/src/lib/invite-v2.ts` | `services/invite/parse-url.ts` (merged) | Consolidated | +| `cli/src/state/config.ts` | `services/config/` (split into read + write) | Split + facade wrap | +| `cli/src/mcp/server.ts` (2139 lines) | `mcp/server.ts` + `mcp/router.ts` + 21 tool family files | Split by family | +| `cli/src/mcp/tools.ts` | `mcp/tools/*.ts` — 21 family files | Split by family | +| `cli/src/templates/index.ts` | `templates/` — 5 individual files | Split by template name | +| `cli/src/commands/launch.ts` (775 lines) | `commands/welcome.ts` + `ui/launch/` + launch handler dispatched from bare command | Split | +| `cli/src/commands/welcome.ts` | `commands/welcome.ts` + `ui/welcome/` | Split | +| `cli/src/commands/install.ts` | `commands/install.ts` | Folder move, port ~538 lines | +| `cli/src/commands/doctor.ts` | `commands/doctor.ts` + `services/health/check-*.ts` | Split into health checks | +| `cli/src/commands/join.ts` | `commands/join.ts` + `services/mesh/join.ts` | Split into command + service | +| `cli/src/commands/list.ts` | `commands/list.ts` + `services/mesh/list.ts` | Split | +| `cli/src/commands/leave.ts` | `commands/leave.ts` + `services/mesh/leave.ts` | Split | +| `cli/src/commands/peers.ts` | `commands/peers.ts` | Folder move | +| `cli/src/commands/send.ts` | `commands/send.ts` | Folder move | +| `cli/src/commands/inbox.ts` | `commands/inbox.ts` | Folder move | +| `cli/src/commands/state.ts` | `commands/state.ts` | Folder move | +| `cli/src/commands/info.ts` | `commands/info.ts` | Folder move | +| `cli/src/commands/memory.ts` | `commands/remember.ts` + `commands/recall.ts` | Split into two commands | +| `cli/src/commands/remind.ts` | `commands/remind.ts` | Folder move | +| `cli/src/commands/profile.ts` | `commands/profile.ts` | Folder move | +| `cli/src/commands/status.ts` | `commands/status.ts` | Folder move | +| `cli/src/commands/sync.ts` | `commands/sync.ts` | Folder move | +| `cli/src/commands/hook.ts` | `commands/hook.ts` | Folder move | +| `cli/src/commands/create.ts` | `commands/new.ts` | **Renamed** (from `create` to `new`, matches spec) | +| `cli/src/commands/seed-test-mesh.ts` | `commands/seed-test-mesh.ts` | Folder move | +| `cli/src/commands/connect-telegram.ts` | `commands/connect-telegram.ts` + `ui/telegram/ConnectWizard.tsx` | Port + split: command stays thin, mesh picker + QR render live in Ink wizard (§5.7) | +| `cli/src/tui/colors.ts` | `ui/styles.ts` | Folder move, keep v1 values (Pass 2 redesigns) | +| `cli/src/tui/index.ts` | `ui/index.ts` | Folder move | +| `cli/src/tui/screen.ts` | `ui/screen.ts` | Folder move | +| `cli/src/tui/spinner.ts` | `ui/spinner.ts` | Folder move | +| `cli/src/env.ts` | `constants/urls.ts` + `constants/paths.ts` (split) | Consolidated into constants | +| `cli/src/version.ts` | `constants/version.ts` | Folder move | +| `cli/src/logo-spinner.ts` | `ui/logo-spinner.ts` | Folder move | +| `cli/src/index.ts` | `entrypoints/cli.ts` + `commands/index.ts` | Split entry point from command registry | +| `cli/src/__tests__/crypto-roundtrip.test.ts` | `tests/unit/crypto-roundtrip.test.ts` | Folder move | +| `cli/src/__tests__/invite-parse.test.ts` | `tests/unit/invite-parse.test.ts` | Folder move | + +### Files that are NET NEW in Pass 1 + +| v2 file | Purpose | +|---|---| +| `commands/register.ts` | Account creation via browser handoff | +| `commands/login.ts` | Device-code OAuth | +| `commands/logout.ts` | Revoke session | +| `commands/whoami.ts` | Identity check | +| `commands/invite.ts` | Generate invite (was dashboard-only in v1) | +| `commands/rename.ts` | Rename a mesh (was dashboard-only in v1) | +| `services/auth/device-code.ts` | Device-code flow orchestration | +| `services/auth/token-store.ts` | `~/.claudemesh/auth.json` R/W | +| `services/auth/client.ts` | HTTP calls to new `/api/auth/cli/*` endpoints | +| `services/auth/facade.ts` | Public auth API | +| `services/mesh/create.ts` | `POST /api/my/meshes` | +| `services/mesh/facade.ts` | Public mesh API | +| `services/invite/generate.ts` | `POST /api/my/meshes/:slug/invites` | +| `services/invite/send-email.ts` | Email delivery through backend | +| `services/invite/facade.ts` | Public invite API | +| `services/api/facade.ts` | Public HTTP API | +| `services/config/facade.ts` | Public config API | +| `services/state/last-used.ts` | Last-used mesh/name/role cache | +| `services/state/facade.ts` | Public state API | +| `services/device/facade.ts` | Device info for device-code registration | +| `services/clipboard/facade.ts` | Clipboard read | +| `services/spawn/facade.ts` | Claude + browser spawning | +| `services/telemetry/facade.ts` | Usage event emission (opt-out) | +| `services/health/facade.ts` | Doctor checks | +| `services/update/facade.ts` | npm registry version poll | +| `services/i18n/facade.ts` | String templates (no ICU in Pass 1) | +| `services/lifecycle/facade.ts` | Service lifecycle manager | +| `services/logger/facade.ts` | Structured logger | +| `cli/argv.ts` | argv parsing | +| `cli/print.ts` | stdout helpers | +| `cli/structured-io.ts` | JSON output | +| `cli/exit.ts` | Exit code + cleanup hooks | +| `cli/update-notice.ts` | Update banner | +| `cli/handlers/signal.ts` | SIGINT/SIGTERM | +| `cli/handlers/error.ts` | Top-level error handler | +| `cli/output/list.ts` | Plain-text list renderer | +| `cli/output/peers.ts` | Plain-text peers renderer | +| `cli/output/whoami.ts` | Plain-text whoami renderer | +| `cli/output/version.ts` | Version output | +| `tools/eslint-plugin-claudemesh/` | Custom ESLint plugin (3 rules) | +| `tests/helpers/v1-runner.ts` | Spawns v1 CLI for parity comparisons | +| `tests/helpers/v2-runner.ts` | Spawns v2 CLI for parity comparisons | +| `tests/helpers/wire-capture.ts` | Records WS envelopes | +| `tests/helpers/mock-broker.ts` | In-memory broker | +| `tests/helpers/real-broker.ts` | Testcontainers broker harness | + +### Files that are DELETED (no v2 replacement) + +| v1 file | Why deleted | +|---|---| +| `cli/src/commands/disconnect-telegram.ts` | Bridge teardown is handled inside Telegram (`/revoke` in the bot) or by leaving the mesh; a CLI wrapper is purely cosmetic | + +### Files that are NEW in the backend (apps/web/) + +| File | Purpose | +|---|---| +| `apps/web/src/app/api/auth/cli/device-code/new/route.ts` | Issue device code | +| `apps/web/src/app/api/auth/cli/device-code/[code]/route.ts` | Poll device code status | +| `apps/web/src/app/api/auth/cli/device-code/[code]/approve/route.ts` | Approve from browser | +| `apps/web/src/app/[locale]/cli-auth/page.tsx` | Browser-side approval UI | +| `apps/web/src/app/api/my/meshes/route.ts` | POST = create mesh (may already exist) | +| `apps/web/src/app/api/my/meshes/[slug]/invites/route.ts` | POST = generate invite (may already exist) | +| `apps/web/src/app/api/my/profile/route.ts` | GET = whoami (may already exist) | +| `apps/web/src/app/api/my/sessions/current/route.ts` | DELETE = logout | + +These are small Next.js API routes (~50 lines each) that wrap Better Auth's session management. The exact set depends on what already exists in the web app — an audit is part of Phase 5. + +--- + +## 10. Decisions locked in by this spec + +If you want to change any of these, update this spec and run the parity tests again. + +1. **CLI command surface is flat.** No `advanced` prefix. No `launch` subcommand. No `connect telegram` / `disconnect telegram`. The main `--help` groups commands visually but the namespace is flat. + +2. **Pass 1 preserves every v1 feature in the inventory.** No feature cuts, no "while we're at it, let's also change X." The only removals are CLI subcommands that don't add value (launch, connect-telegram, disconnect-telegram). + +3. **The broker is unchanged in Pass 1.** No files added, modified, or removed. All broker improvements are deferred to Pass 2. + +4. **Facade pattern is mandatory.** Every service has a `facade.ts` as the single public entry point. ESLint boundaries enforce this. + +5. **No Pass 2 features sneak in.** No local SQLite, no Lamport, no sync daemon, no hardened Postgres, no MCP catalog tiering, no ICU, no accessibility matrix, no session_kind enum, no six-color palette redesign, no facade bypass path closures beyond what exists in the Pass 2 facade spec. + +6. **No backwards compatibility hedges.** There are no users to migrate. v2 picks the best names, shapes, and conventions without alias support or deprecation windows. + +7. **The parity test suite is the acceptance criteria.** Not vibes, not "looks good to me," not code review alone. A red test means not-done. + +8. **The broker's WS protocol is the only compatibility contract that matters.** Everything above that layer (command names, config shape, JSON output, env vars, exit codes, stdout text) can change freely. + +9. **Atomic swap, clean break.** When Pass 1 is ready, a single commit replaces `apps/cli` with `apps/cli-v2`. No legacy preservation. + +10. **Pass 1 is done when the acceptance criteria in §8 are all green.** Nothing more, nothing less. + +--- + +**End of Pass 1 spec.** diff --git a/.claude/scheduled_tasks.lock b/.claude/scheduled_tasks.lock new file mode 100644 index 0000000..ecf3078 --- /dev/null +++ b/.claude/scheduled_tasks.lock @@ -0,0 +1 @@ +{"sessionId":"ae5dbe38-9c56-4d07-9fb6-a38cb8a250a6","pid":4612,"acquiredAt":1776217467441} \ No newline at end of file diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..38e0ac6 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,22 @@ +{ + "permissions": { + "allow": [ + "Bash(/Users/agutierrez/.claude/hooks/play-tts.sh Connected to mesh, setting up:*)", + "Bash(/Users/agutierrez/.claude/hooks/play-tts.sh Connected to mesh, setting up session:*)", + "Bash(npx tsx:*)", + "Bash(grep -r \"defineCommand\\\\|export const run\" /Users/agutierrez/Desktop/claudemesh/apps/cli/src/commands/*.ts)", + "Bash(pnpm build:*)", + "Bash(/Users/agutierrez/.claude/hooks/play-tts.sh Ready to help:*)", + "Bash(pnpm publish:*)", + "Bash(grep -E \"\\\\.\\(tsx?|jsx?\\)$\")", + "Bash(/Users/agutierrez/.claude/hooks/play-tts.sh Investigating dropped keystrokes in claudemesh launch:*)", + "Read(//Users/agutierrez/.claude/**)", + "Read(//private/tmp/**)", + "Bash(timeout 3 node dist/index.js mcp)", + "Bash(/Users/agutierrez/.claude/hooks/play-tts.sh Fixed ZodError in MCP notification handler:*)", + "Bash(npm i:*)", + "Bash(claudemesh --version)", + "Bash(/Users/agutierrez/.claude/hooks/play-tts.sh:*)" + ] + } +} diff --git a/.claude/skills/integration-nextjs-app-router/SKILL.md b/.claude/skills/integration-nextjs-app-router/SKILL.md new file mode 100644 index 0000000..18cac13 --- /dev/null +++ b/.claude/skills/integration-nextjs-app-router/SKILL.md @@ -0,0 +1,58 @@ +--- +name: integration-nextjs-app-router +description: PostHog integration for Next.js App Router applications +metadata: + author: PostHog + version: 1.9.5 +--- + +# PostHog integration for Next.js App Router + +This skill helps you add PostHog analytics to Next.js App Router applications. + +## Workflow + +Follow these steps in order to complete the integration: + +1. `basic-integration-1.0-begin.md` - PostHog Setup - Begin ← **Start here** +2. `basic-integration-1.1-edit.md` - PostHog Setup - Edit +3. `basic-integration-1.2-revise.md` - PostHog Setup - Revise +4. `basic-integration-1.3-conclude.md` - PostHog Setup - Conclusion + +## Reference files + +- `references/EXAMPLE.md` - Next.js App Router example project code +- `references/next-js.md` - Next.js - docs +- `references/identify-users.md` - Identify users - docs +- `references/basic-integration-1.0-begin.md` - PostHog setup - begin +- `references/basic-integration-1.1-edit.md` - PostHog setup - edit +- `references/basic-integration-1.2-revise.md` - PostHog setup - revise +- `references/basic-integration-1.3-conclude.md` - PostHog setup - conclusion + +The example project shows the target implementation pattern. Consult the documentation for API details. + +## Key principles + +- **Environment variables**: Always use environment variables for PostHog keys. Never hardcode them. +- **Minimal changes**: Add PostHog code alongside existing integrations. Don't replace or restructure existing code. +- **Match the example**: Your implementation should follow the example project's patterns as closely as possible. + +## Framework guidelines + +- For Next.js 15.3+, initialize PostHog in instrumentation-client.ts for the simplest setup +- For feature flags, use useFeatureFlagEnabled() or useFeatureFlagPayload() hooks - they handle loading states and external sync automatically +- Add analytics capture in event handlers where user actions occur, NOT in useEffect reacting to state changes +- Do NOT use useEffect for data transformation - calculate derived values during render instead +- Do NOT use useEffect to respond to user events - put that logic in the event handler itself +- Do NOT use useEffect to chain state updates - calculate all related updates together in the event handler +- Do NOT use useEffect to notify parent components - call the parent callback alongside setState in the event handler +- To reset component state when a prop changes, pass the prop as the component's key instead of using useEffect +- useEffect is ONLY for synchronizing with external systems (non-React widgets, browser APIs, network subscriptions) + +## Identifying users + +Identify users during login and signup events. Refer to the example code and documentation for the correct identify pattern for this framework. If both frontend and backend code exist, pass the client-side session and distinct ID using `X-POSTHOG-DISTINCT-ID` and `X-POSTHOG-SESSION-ID` headers to maintain correlation. + +## Error tracking + +Add PostHog error tracking to relevant files, particularly around critical user flows and API boundaries. diff --git a/.claude/skills/integration-nextjs-app-router/references/EXAMPLE.md b/.claude/skills/integration-nextjs-app-router/references/EXAMPLE.md new file mode 100644 index 0000000..4dcacd0 --- /dev/null +++ b/.claude/skills/integration-nextjs-app-router/references/EXAMPLE.md @@ -0,0 +1,706 @@ +# PostHog Next.js App Router Example Project + +Repository: https://github.com/PostHog/context-mill +Path: basics/next-app-router + +--- + +## README.md + +# PostHog Next.js app router example + +This is a [Next.js](https://nextjs.org) App Router example demonstrating PostHog integration with product analytics, session replay, feature flags, and error tracking. + +## Features + +- **Product analytics**: Track user events and behaviors +- **Session replay**: Record and replay user sessions +- **Error tracking**: Capture and track errors +- **User authentication**: Demo login system with PostHog user identification +- **Server-side & Client-side tracking**: Examples of both tracking methods +- **Reverse proxy**: PostHog ingestion through Next.js rewrites + +## Getting started + +### 1. Install dependencies + +```bash +npm install +# or +pnpm install +``` + +### 2. Configure environment variables + +Create a `.env.local` file in the root directory: + +```bash +NEXT_PUBLIC_POSTHOG_PROJECT_TOKEN=your_posthog_project_token +NEXT_PUBLIC_POSTHOG_HOST=https://us.i.posthog.com +``` + +Get your PostHog project token from your [PostHog project settings](https://app.posthog.com/project/settings). + +### 3. Run the development server + +```bash +npm run dev +# or +pnpm dev +``` + +Open [http://localhost:3000](http://localhost:3000) with your browser to see the app. + +## Project structure + +``` +src/ +├── app/ +│ ├── api/ +│ │ └── auth/ +│ │ └── login/ +│ │ └── route.ts # Login API with server-side tracking +│ ├── burrito/ +│ │ └── page.tsx # Demo feature page with event tracking +│ ├── profile/ +│ │ └── page.tsx # User profile with error tracking demo +│ ├── layout.tsx # Root layout with providers +│ ├── page.tsx # Home/Login page +│ └── globals.css # Global styles +├── components/ +│ └── Header.tsx # Navigation header with auth state +├── contexts/ +│ └── AuthContext.tsx # Authentication context with PostHog integration +└── lib/ + └── posthog-server.ts # Server-side PostHog client + +instrumentation-client.ts # Client-side PostHog initialization +``` + +## Key integration points + +### Client-side initialization (instrumentation-client.ts) + +```typescript +import posthog from "posthog-js" + +posthog.init(process.env.NEXT_PUBLIC_POSTHOG_PROJECT_TOKEN!, { + api_host: "/ingest", + ui_host: "https://us.posthog.com", + defaults: '2026-01-30', + capture_exceptions: true, + debug: process.env.NODE_ENV === "development", +}); +``` + +### User identification (AuthContext.tsx) + +```typescript +posthog.identify(username, { + username: username, +}); +``` + +### Event tracking (burrito/page.tsx) + +```typescript +posthog.capture('burrito_considered', { + total_considerations: count, + username: username, +}); +``` + +### Error tracking (profile/page.tsx) + +```typescript +posthog.captureException(error); +``` + +### Server-side tracking (app/api/auth/login/route.ts) + +```typescript +const posthog = getPostHogClient(); +posthog.capture({ + distinctId: username, + event: 'server_login', + properties: { ... } +}); +``` + +## App router differences from pages router + +This example uses Next.js App Router instead of Pages Router. Key differences: + +1. **File-based routing**: Pages in `src/app/` instead of `src/pages/` +2. **layout.tsx**: Root layout component wraps all pages +3. **API Routes**: Located in `src/app/api/` with `route.ts` files +4. **'use client'**: Client components need explicit directive +5. **useRouter**: From `next/navigation` instead of `next/router` +6. **Metadata**: Exported from layout/page instead of Head component +7. **Server Components**: Components are server-side by default + +## Learn more + +- [PostHog Documentation](https://posthog.com/docs) +- [Next.js App Router Documentation](https://nextjs.org/docs/app) +- [PostHog Next.js Integration Guide](https://posthog.com/docs/libraries/next-js) + +## Deploy on Vercel + +The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new). + +Check out the [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details. + +--- + +## .env.example + +```example +# PostHog Configuration +# Get your PostHog project token from: https://app.posthog.com/project/settings +NEXT_PUBLIC_POSTHOG_PROJECT_TOKEN=your_posthog_project_token_here +# NEXT_PUBLIC_POSTHOG_HOST=https://eu.i.posthog.com +NEXT_PUBLIC_POSTHOG_HOST=https://us.i.posthog.com +``` + +--- + +## instrumentation-client.ts + +```ts +import posthog from "posthog-js" + +posthog.init(process.env.NEXT_PUBLIC_POSTHOG_PROJECT_TOKEN!, { + api_host: "/ingest", + ui_host: "https://us.posthog.com", + // Include the defaults option as required by PostHog + defaults: '2026-01-30', + // Enables capturing unhandled exceptions via Error Tracking + capture_exceptions: true, + // Turn on debug in development mode + debug: process.env.NODE_ENV === "development", +}); + +//IMPORTANT: Never combine this approach with other client-side PostHog initialization approaches, especially components like a PostHogProvider. instrumentation-client.ts is the correct solution for initializating client-side PostHog in Next.js 15.3+ apps. +``` + +--- + +## next.config.ts + +```ts +import type { NextConfig } from "next"; + +const nextConfig: NextConfig = { + /* config options here */ + async rewrites() { + return [ + { + source: "/ingest/static/:path*", + destination: "https://us-assets.i.posthog.com/static/:path*", + }, + { + source: "/ingest/:path*", + destination: "https://us.i.posthog.com/:path*", + }, + ]; + }, + // This is required to support PostHog trailing slash API requests + skipTrailingSlashRedirect: true, +}; + +export default nextConfig; + +``` + +--- + +## src/app/api/auth/login/route.ts + +```ts +import { NextResponse } from 'next/server'; +import { getPostHogClient } from '@/lib/posthog-server'; + +const users = new Map(); + +export async function POST(request: Request) { + const { username, password } = await request.json(); + + if (!username || !password) { + return NextResponse.json({ error: 'Username and password required' }, { status: 400 }); + } + + let user = users.get(username); + const isNewUser = !user; + + if (!user) { + user = { username, burritoConsiderations: 0 }; + users.set(username, user); + } + + // Capture server-side login event + const posthog = getPostHogClient(); + posthog.capture({ + distinctId: username, + event: 'server_login', + properties: { + username: username, + isNewUser: isNewUser, + source: 'api' + } + }); + + // Identify user on server side + posthog.identify({ + distinctId: username, + properties: { + username: username, + createdAt: isNewUser ? new Date().toISOString() : undefined + } + }); + + return NextResponse.json({ success: true, user }); +} +``` + +--- + +## src/app/burrito/page.tsx + +```tsx +'use client'; + +import { useState } from 'react'; +import { useAuth } from '@/contexts/AuthContext'; +import { useRouter } from 'next/navigation'; +import posthog from 'posthog-js'; + +export default function BurritoPage() { + const { user, incrementBurritoConsiderations } = useAuth(); + const router = useRouter(); + const [hasConsidered, setHasConsidered] = useState(false); + + // Redirect to home if not logged in + if (!user) { + router.push('/'); + return null; + } + + const handleConsideration = () => { + incrementBurritoConsiderations(); + setHasConsidered(true); + setTimeout(() => setHasConsidered(false), 2000); + + // Capture burrito consideration event + posthog.capture('burrito_considered', { + total_considerations: user.burritoConsiderations + 1, + username: user.username, + }); + }; + + return ( +
+

Burrito consideration zone

+

Take a moment to truly consider the potential of burritos.

+ +
+ + + {hasConsidered && ( +

+ Thank you for your consideration! Count: {user.burritoConsiderations} +

+ )} +
+ +
+

Consideration stats

+

Total considerations: {user.burritoConsiderations}

+
+
+ ); +} +``` + +--- + +## src/app/layout.tsx + +```tsx +import type { Metadata } from "next"; +import "./globals.css"; +import { AuthProvider } from "@/contexts/AuthContext"; +import Header from "@/components/Header"; + +export const metadata: Metadata = { + title: "Burrito Consideration App", + description: "Consider the potential of burritos", +}; + +export default function RootLayout({ + children, +}: Readonly<{ + children: React.ReactNode; +}>) { + return ( + + + +
+
{children}
+ + + + ); +} + +``` + +--- + +## src/app/page.tsx + +```tsx +'use client'; + +import { useState } from 'react'; +import { useAuth } from '@/contexts/AuthContext'; + +export default function Home() { + const { user, login } = useAuth(); + const [username, setUsername] = useState(''); + const [password, setPassword] = useState(''); + const [error, setError] = useState(''); + + const handleSubmit = async (e: React.FormEvent) => { + e.preventDefault(); + setError(''); + + try { + const success = await login(username, password); + if (success) { + setUsername(''); + setPassword(''); + } else { + setError('Please provide both username and password'); + } + } catch (err) { + console.error('Login failed:', err); + setError('An error occurred during login'); + } + }; + + if (user) { + return ( +
+

Welcome back, {user.username}!

+

You are now logged in. Feel free to explore:

+
    +
  • Consider the potential of burritos
  • +
  • View your profile and statistics
  • +
+
+ ); + } + + return ( +
+

Welcome to Burrito Consideration App

+

Please sign in to begin your burrito journey

+ +
+
+ + setUsername(e.target.value)} + placeholder="Enter any username" + /> +
+ +
+ + setPassword(e.target.value)} + placeholder="Enter any password" + /> +
+ + {error &&

{error}

} + + +
+ +

+ Note: This is a demo app. Use any username and password to sign in. +

+
+ ); +} +``` + +--- + +## src/app/profile/page.tsx + +```tsx +'use client'; + +import { useAuth } from '@/contexts/AuthContext'; +import { useRouter } from 'next/navigation'; +import posthog from 'posthog-js'; + +export default function ProfilePage() { + const { user } = useAuth(); + const router = useRouter(); + + // Redirect to home if not logged in + if (!user) { + router.push('/'); + return null; + } + + const triggerTestError = () => { + try { + throw new Error('Test error for PostHog error tracking'); + } catch (err) { + posthog.captureException(err); + console.error('Captured error:', err); + alert('Error captured and sent to PostHog!'); + } + }; + + return ( +
+

User Profile

+ +
+

Your Information

+

Username: {user.username}

+

Burrito Considerations: {user.burritoConsiderations}

+
+ +
+ +
+ +
+

Your Burrito Journey

+ {user.burritoConsiderations === 0 ? ( +

You haven't considered any burritos yet. Visit the Burrito Consideration page to start!

+ ) : user.burritoConsiderations === 1 ? ( +

You've considered the burrito potential once. Keep going!

+ ) : user.burritoConsiderations < 5 ? ( +

You're getting the hang of burrito consideration!

+ ) : user.burritoConsiderations < 10 ? ( +

You're becoming a burrito consideration expert!

+ ) : ( +

You are a true burrito consideration master! 🌯

+ )} +
+
+ ); +} +``` + +--- + +## src/components/Header.tsx + +```tsx +'use client'; + +import Link from 'next/link'; +import { useAuth } from '@/contexts/AuthContext'; + +export default function Header() { + const { user, logout } = useAuth(); + + return ( +
+
+ +
+ {user ? ( + <> + Welcome, {user.username}! + + + ) : ( + Not logged in + )} +
+
+
+ ); +} +``` + +--- + +## src/contexts/AuthContext.tsx + +```tsx +'use client'; + +import { createContext, useContext, useState, ReactNode } from 'react'; +import posthog from 'posthog-js'; + +interface User { + username: string; + burritoConsiderations: number; +} + +interface AuthContextType { + user: User | null; + login: (username: string, password: string) => Promise; + logout: () => void; + incrementBurritoConsiderations: () => void; +} + +const AuthContext = createContext(undefined); + +const users: Map = new Map(); + +export function AuthProvider({ children }: { children: ReactNode }) { + // Use lazy initializer to read from localStorage only once on mount + const [user, setUser] = useState(() => { + if (typeof window === 'undefined') return null; + + const storedUsername = localStorage.getItem('currentUser'); + if (storedUsername) { + const existingUser = users.get(storedUsername); + if (existingUser) { + return existingUser; + } + } + return null; + }); + + const login = async (username: string, password: string): Promise => { + try { + const response = await fetch('/api/auth/login', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ username, password }), + }); + + if (response.ok) { + const { user: userData } = await response.json(); + + let localUser = users.get(username); + if (!localUser) { + localUser = userData as User; + users.set(username, localUser); + } + + setUser(localUser); + localStorage.setItem('currentUser', username); + + // Identify user in PostHog using username as distinct ID + posthog.identify(username, { + username: username, + }); + + // Capture login event + posthog.capture('user_logged_in', { + username: username, + }); + + return true; + } + return false; + } catch (error) { + console.error('Login error:', error); + return false; + } + }; + + const logout = () => { + // Capture logout event before resetting + posthog.capture('user_logged_out'); + posthog.reset(); + + setUser(null); + localStorage.removeItem('currentUser'); + }; + + const incrementBurritoConsiderations = () => { + if (user) { + user.burritoConsiderations++; + users.set(user.username, user); + setUser({ ...user }); + } + }; + + return ( + + {children} + + ); +} + +export function useAuth() { + const context = useContext(AuthContext); + if (context === undefined) { + throw new Error('useAuth must be used within an AuthProvider'); + } + return context; +} +``` + +--- + +## src/lib/posthog-server.ts + +```ts +import { PostHog } from 'posthog-node'; + +let posthogClient: PostHog | null = null; + +export function getPostHogClient() { + if (!posthogClient) { + posthogClient = new PostHog( + process.env.NEXT_PUBLIC_POSTHOG_PROJECT_TOKEN!, + { + host: process.env.NEXT_PUBLIC_POSTHOG_HOST, + flushAt: 1, + flushInterval: 0 + } + ); + posthogClient.debug(true); + } + return posthogClient; +} + +export async function shutdownPostHog() { + if (posthogClient) { + await posthogClient.shutdown(); + } +} +``` + +--- + diff --git a/.claude/skills/integration-nextjs-app-router/references/basic-integration-1.0-begin.md b/.claude/skills/integration-nextjs-app-router/references/basic-integration-1.0-begin.md new file mode 100644 index 0000000..a97bbe7 --- /dev/null +++ b/.claude/skills/integration-nextjs-app-router/references/basic-integration-1.0-begin.md @@ -0,0 +1,43 @@ +--- +title: PostHog Setup - Begin +description: Start the event tracking setup process by analyzing the project and creating an event tracking plan +--- + +We're making an event tracking plan for this project. + +Before proceeding, find any existing `posthog.capture()` code. Make note of event name formatting. + +From the project's file list, select between 10 and 15 files that might have interesting business value for event tracking, especially conversion and churn events. Also look for additional files related to login that could be used for identifying users, along with error handling. Read the files. If a file is already well-covered by PostHog events, replace it with another option. Do not spawn subagents. + +Look for opportunities to track client-side events. + +**IMPORTANT: Server-side events are REQUIRED** if the project includes any instrumentable server-side code. If the project has API routes (e.g., `app/api/**/route.ts`) or Server Actions, you MUST include server-side events for critical business operations like: + + - Payment/checkout completion + - Webhook handlers + - Authentication endpoints + +Do not skip server-side events - they capture actions that cannot be tracked client-side. + +Create a new file with a JSON array at the root of the project: .posthog-events.json. It should include one object for each event we want to add: event name, event description, and the file path we want to place the event in. If events already exist, don't duplicate them; supplement them. + +Track actions only, not pageviews. These can be captured automatically. Exceptions can be made for "viewed"-type events that correspond to the top of a conversion funnel. + +As you review files, make an internal note of opportunities to identify users and catch errors. We'll need them for the next step. + +## Status + +Before beginning a phase of the setup, you will send a status message with the exact prefix '[STATUS]', as in: + +[STATUS] Checking project structure. + +Status to report in this phase: + +- Checking project structure +- Verifying PostHog dependencies +- Generating events based on project + + +--- + +**Upon completion, continue with:** [basic-integration-1.1-edit.md](basic-integration-1.1-edit.md) \ No newline at end of file diff --git a/.claude/skills/integration-nextjs-app-router/references/basic-integration-1.1-edit.md b/.claude/skills/integration-nextjs-app-router/references/basic-integration-1.1-edit.md new file mode 100644 index 0000000..ca9d70e --- /dev/null +++ b/.claude/skills/integration-nextjs-app-router/references/basic-integration-1.1-edit.md @@ -0,0 +1,37 @@ +--- +title: PostHog Setup - Edit +description: Implement PostHog event tracking in the identified files, following best practices and the example project +--- + +For each of the files and events noted in .posthog-events.json, make edits to capture events using PostHog. Make sure to set up any helper files needed. Carefully examine the included example project code: your implementation should match it as closely as possible. Do not spawn subagents. + +Use environment variables for PostHog keys. Do not hardcode PostHog keys. + +If a file already has existing integration code for other tools or services, don't overwrite or remove that code. Place PostHog code below it. + +For each event, add useful properties, and use your access to the PostHog source code to ensure correctness. You also have access to documentation about creating new events with PostHog. Consider this documentation carefully and follow it closely before adding events. Your integration should be based on documented best practices. Carefully consider how the user project's framework version may impact the correct PostHog integration approach. + +Remember that you can find the source code for any dependency in the node_modules directory. This may be necessary to properly populate property names. There are also example project code files available via the PostHog MCP; use these for reference. + +Where possible, add calls for PostHog's identify() function on the client side upon events like logins and signups. Use the contents of login and signup forms to identify users on submit. If there is server-side code, pass the client-side session and distinct ID to the server-side code to identify the user. On the server side, make sure events have a matching distinct ID where relevant. + +It's essential to do this in both client code and server code, so that user behavior from both domains is easy to correlate. + +You should also add PostHog exception capture error tracking to these files where relevant. + +Remember: Do not alter the fundamental architecture of existing files. Make your additions minimal and targeted. + +Remember the documentation and example project resources you were provided at the beginning. Read them now. + +## Status + +Status to report in this phase: + +- Inserting PostHog capture code +- A status message for each file whose edits you are planning, including a high level summary of changes +- A status message for each file you have edited + + +--- + +**Upon completion, continue with:** [basic-integration-1.2-revise.md](basic-integration-1.2-revise.md) \ No newline at end of file diff --git a/.claude/skills/integration-nextjs-app-router/references/basic-integration-1.2-revise.md b/.claude/skills/integration-nextjs-app-router/references/basic-integration-1.2-revise.md new file mode 100644 index 0000000..5ac72f0 --- /dev/null +++ b/.claude/skills/integration-nextjs-app-router/references/basic-integration-1.2-revise.md @@ -0,0 +1,22 @@ +--- +title: PostHog Setup - Revise +description: Review and fix any errors in the PostHog integration implementation +--- + +Check the project for errors. Read the package.json file for any type checking or build scripts that may provide input about what to fix. Remember that you can find the source code for any dependency in the node_modules directory. Do not spawn subagents. + +Ensure that any components created were actually used. + +Once all other tasks are complete, run any linter or prettier-like scripts found in the package.json, but ONLY on the files you have edited or created during this session. Do not run formatting or linting across the entire project's codebase. + +## Status + +Status to report in this phase: + +- Finding and correcting errors +- Report details of any errors you fix +- Linting, building and prettying + +--- + +**Upon completion, continue with:** [basic-integration-1.3-conclude.md](basic-integration-1.3-conclude.md) \ No newline at end of file diff --git a/.claude/skills/integration-nextjs-app-router/references/basic-integration-1.3-conclude.md b/.claude/skills/integration-nextjs-app-router/references/basic-integration-1.3-conclude.md new file mode 100644 index 0000000..b48af6a --- /dev/null +++ b/.claude/skills/integration-nextjs-app-router/references/basic-integration-1.3-conclude.md @@ -0,0 +1,38 @@ +--- +title: PostHog Setup - Conclusion +description: Review and fix any errors in the PostHog integration implementation +--- + +Use the PostHog MCP to create a new dashboard named "Analytics basics" based on the events created here. Make sure to use the exact same event names as implemented in the code. Populate it with up to five insights, with special emphasis on things like conversion funnels, churn events, and other business critical insights. + +Search for a file called `.posthog-events.json` and read it for available events. Do not spawn subagents. + +Create the file posthog-setup-report.md. It should include a summary of the integration edits, a table with the event names, event descriptions, and files where events were added, along with a list of links for the dashboard and insights created. Follow this format: + + +# PostHog post-wizard report + +The wizard has completed a deep integration of your project. [Detailed summary of changes] + +[table of events/descriptions/files] + +## Next steps + +We've built some insights and a dashboard for you to keep an eye on user behavior, based on the events we just instrumented: + +[links] + +### Agent skill + +We've left an agent skill folder in your project. You can use this context for further agent development when using Claude Code. This will help ensure the model provides the most up-to-date approaches for integrating PostHog. + + + +Upon completion, remove .posthog-events.json. + +## Status + +Status to report in this phase: + +- Configured dashboard: [insert PostHog dashboard URL] +- Created setup report: [insert full local file path] \ No newline at end of file diff --git a/.claude/skills/integration-nextjs-app-router/references/identify-users.md b/.claude/skills/integration-nextjs-app-router/references/identify-users.md new file mode 100644 index 0000000..c27226e --- /dev/null +++ b/.claude/skills/integration-nextjs-app-router/references/identify-users.md @@ -0,0 +1,202 @@ +# Identify users - Docs + +Linking events to specific users enables you to build a full picture of how they're using your product across different sessions, devices, and platforms. + +This is straightforward to do when [capturing backend events](/docs/product-analytics/capture-events?tab=Node.js.md), as you associate events to a specific user using a `distinct_id`, which is a required argument. + +However, in the frontend of a [web](/docs/libraries/js/features.md#capturing-events) or [mobile app](/docs/libraries/ios.md#capturing-events), a `distinct_id` is not a required argument — PostHog's SDKs will generate an anonymous `distinct_id` for you automatically and you can capture events anonymously, provided you use the appropriate [configuration](/docs/libraries/js/features.md#capturing-anonymous-events). + +To link events to specific users, call `identify`: + +PostHog AI + +### Web + +```javascript +posthog.identify( + 'distinct_id', // Replace 'distinct_id' with your user's unique identifier + { email: 'max@hedgehogmail.com', name: 'Max Hedgehog' } // optional: set additional person properties +); +``` + +### Android + +```kotlin +PostHog.identify( + distinctId = distinctID, // Replace 'distinctID' with your user's unique identifier + // optional: set additional person properties + userProperties = mapOf( + "name" to "Max Hedgehog", + "email" to "max@hedgehogmail.com" + ) +) +``` + +### iOS + +```swift +PostHogSDK.shared.identify("distinct_id", // Replace "distinct_id" with your user's unique identifier + userProperties: ["name": "Max Hedgehog", "email": "max@hedgehogmail.com"]) // optional: set additional person properties +``` + +### React Native + +```jsx +posthog.identify('distinct_id', { // Replace "distinct_id" with your user's unique identifier + email: 'max@hedgehogmail.com', // optional: set additional person properties + name: 'Max Hedgehog' +}) +``` + +### Dart + +```dart +await Posthog().identify( + userId: 'distinct_id', // Replace "distinct_id" with your user's unique identifier + userProperties: { + email: "max@hedgehogmail.com", // optional: set additional person properties + name: "Max Hedgehog" +}); +``` + +Events captured after calling `identify` are identified events and this creates a person profile if one doesn't exist already. + +Due to the cost of processing them, anonymous events can be up to 4x cheaper than identified events, so it's recommended you only capture identified events when needed. + +## How identify works + +When a user starts browsing your website or app, PostHog automatically assigns them an **anonymous ID**, which is stored locally. + +Provided you've [configured persistence](/docs/libraries/js/persistence.md) to use cookies or `localStorage`, this enables us to track anonymous users – even across different sessions. + +By calling `identify` with a `distinct_id` of your choice (usually the user's ID in your database, or their email), you link the anonymous ID and distinct ID together. + +Thus, all past and future events made with that anonymous ID are now associated with the distinct ID. + +This enables you to do things like associate events with a user from before they log in for the first time, or associate their events across different devices or platforms. + +Using identify in the backend + +Although you can call `identify` using our backend SDKs, it is used most in frontends. This is because there is no concept of anonymous sessions in the backend SDKs, so calling `identify` only updates person profiles. + +## Best practices when using `identify` + +### 1\. Call `identify` as soon as you're able to + +In your frontend, you should call `identify` as soon as you're able to. + +Typically, this is every time your **app loads** for the first time, and directly after your **users log in**. + +This ensures that events sent during your users' sessions are correctly associated with them. + +You only need to call `identify` once per session, and you should avoid calling it multiple times unnecessarily. + +If you call `identify` multiple times with the same data without reloading the page in between, PostHog will ignore the subsequent calls. + +### 2\. Use unique strings for distinct IDs + +If two users have the same distinct ID, their data is merged and they are considered one user in PostHog. Two common ways this can happen are: + +- Your logic for generating IDs does not generate sufficiently strong IDs and you can end up with a clash where 2 users have the same ID. +- There's a bug, typo, or mistake in your code leading to most or all users being identified with generic IDs like `null`, `true`, or `distinctId`. + +PostHog also has built-in protections to stop the most common distinct ID mistakes. + +### 3\. Reset after logout + +If a user logs out on your frontend, you should call `reset()` to unlink any future events made on that device with that user. + +This is important if your users are sharing a computer, as otherwise all of those users are grouped together into a single user due to shared cookies between sessions. + +**We strongly recommend you call `reset` on logout even if you don't expect users to share a computer.** + +You can do that like so: + +PostHog AI + +### Web + +```javascript +posthog.reset() +``` + +### iOS + +```swift +PostHogSDK.shared.reset() +``` + +### Android + +```kotlin +PostHog.reset() +``` + +### React Native + +```jsx +posthog.reset() +``` + +### Dart + +```dart +Posthog().reset() +``` + +If you *also* want to reset the `device_id` so that the device will be considered a new device in future events, you can pass `true` as an argument: + +Web + +PostHog AI + +```javascript +posthog.reset(true) +``` + +### 4\. Person profiles and properties + +You'll notice that one of the parameters in the `identify` method is a `properties` object. + +This enables you to set [person properties](/docs/product-analytics/person-properties.md). + +Whenever possible, we recommend passing in all person properties you have available each time you call identify, as this ensures their person profile on PostHog is up to date. + +Person properties can also be set being adding a `$set` property to a event `capture` call. + +See our [person properties docs](/docs/product-analytics/person-properties.md) for more details on how to work with them and best practices. + +### 5\. Use deep links between platforms + +We recommend you call `identify` [as soon as you're able](#1-call-identify-as-soon-as-youre-able), typically when a user signs up or logs in. + +This doesn't work if one or both platforms are unauthenticated. Some examples of such cases are: + +- Onboarding and signup flows before authentication. +- Unauthenticated web pages redirecting to authenticated mobile apps. +- Authenticated web apps prompting an app download. + +In these cases, you can use a [deep link](https://developer.android.com/training/app-links/deep-linking) on Android and [universal links](https://developer.apple.com/documentation/xcode/supporting-universal-links-in-your-app) on iOS to identify users. + +1. Use `posthog.get_distinct_id()` to get the current distinct ID. Even if you cannot call identify because the user is unauthenticated, this will return an anonymous distinct ID generated by PostHog. +2. Add the distinct ID to the deep link as query parameters, along with other properties like UTM parameters. +3. When the user is redirected to the app, parse the deep link and handle the following cases: + +- The user is already authenticated on the mobile app. In this case, call [`posthog.alias()`](/docs/libraries/js/features.md#alias) with the distinct ID from the web. This associates the two distinct IDs as a single person. +- The user is unauthenticated. In this case, call [`posthog.identify()`](/docs/libraries/js/features.md#identifying-users) with the distinct ID from the web. Events will be associated with this distinct ID. + +As long as you associate the distinct IDs with `posthog.identify()` or `posthog.alias()`, you can track events generated across platforms. + +## Further reading + +- [Identifying users docs](/docs/product-analytics/identify.md) +- [How person processing works](/docs/how-posthog-works/ingestion-pipeline.md#2-person-processing) +- [An introductory guide to identifying users in PostHog](/tutorials/identifying-users-guide.md) + +### Community questions + +Ask a question + +### Was this page useful? + +HelpfulCould be better \ No newline at end of file diff --git a/.claude/skills/integration-nextjs-app-router/references/next-js.md b/.claude/skills/integration-nextjs-app-router/references/next-js.md new file mode 100644 index 0000000..4e178d1 --- /dev/null +++ b/.claude/skills/integration-nextjs-app-router/references/next-js.md @@ -0,0 +1,385 @@ +# Next.js - Docs + +PostHog makes it easy to get data about traffic and usage of your [Next.js](https://nextjs.org/) app. Integrating PostHog into your site enables analytics about user behavior, custom events capture, session recordings, feature flags, and more. + +This guide walks you through integrating PostHog into your Next.js app using the [React](/docs/libraries/react.md) and the [Node.js](/docs/libraries/node.md) SDKs. + +> You can see a working example of this integration in our [Next.js demo app](https://github.com/PostHog/posthog-js/tree/main/playground/nextjs). + +Next.js has both client and server-side rendering, as well as pages and app routers. We'll cover all of these options in this guide. + +> **Try `@posthog/next` (pre-release):** A simplified Next.js integration with synchronized client/server identity, server-side flag bootstrapping, and a built-in API proxy. [Read the setup guide →](/docs/libraries/next-js/posthog-next.md) + +## Prerequisites + +To follow this guide along, you need: + +1. A PostHog instance (either [Cloud](https://app.posthog.com/signup) or [self-hosted](/docs/self-host.md)) +2. A Next.js application + +## Beta: integration via LLM + +Install PostHog for Next.js in seconds with our wizard by running this prompt with [LLM coding agents](/blog/envoy-wizard-llm-agent.md) like Cursor and Bolt, or by running it in your terminal. + +`npx @posthog/wizard@latest` + +[Learn more](/wizard.md) + +Or, to integrate manually, continue with the rest of this guide. + +## Client-side setup + +Install `posthog-js` using your package manager: + +PostHog AI + +### npm + +```bash +npm install --save posthog-js +``` + +### Yarn + +```bash +yarn add posthog-js +``` + +### pnpm + +```bash +pnpm add posthog-js +``` + +### Bun + +```bash +bun add posthog-js +``` + +Add your environment variables to your `.env.local` file and to your hosting provider (e.g. Vercel, Netlify, AWS). You can find your project token in your [project settings](https://app.posthog.com/project/settings). + +.env.local + +PostHog AI + +```shell +NEXT_PUBLIC_POSTHOG_TOKEN= +NEXT_PUBLIC_POSTHOG_HOST=https://us.i.posthog.com +``` + +These values need to start with `NEXT_PUBLIC_` to be accessible on the client-side. + +## Integration + +Next.js provides the [`instrumentation-client.ts|js`](https://nextjs.org/docs/app/api-reference/file-conventions/instrumentation-client) file for client-side setup. Add it to the root of your Next.js app (for both app and pages router) and initialize PostHog in it like this: + +PostHog AI + +### instrumentation-client.js + +```javascript +import posthog from 'posthog-js' +posthog.init(process.env.NEXT_PUBLIC_POSTHOG_TOKEN, { + api_host: process.env.NEXT_PUBLIC_POSTHOG_HOST, + defaults: '2026-01-30' +}); +``` + +### instrumentation-client.ts + +```typescript +import posthog from 'posthog-js' +posthog.init(process.env.NEXT_PUBLIC_POSTHOG_TOKEN!, { + api_host: process.env.NEXT_PUBLIC_POSTHOG_HOST, + defaults: '2026-01-30' +}); +``` + +Bootstrapping with `instrumentation-client` + +When using `instrumentation-client`, the values you pass to `posthog.init` remain fixed for the entire session. This means bootstrapping only works if you evaluate flags **before your app renders** (for example, on the server). + +If you need flag values after the app has rendered, you’ll want to: + +- Evaluate the flag on the server and pass the value into your app, or +- Evaluate the flag in an earlier page/state, then store and re-use it when needed. + +Both approaches avoid flicker and give you the same outcome as bootstrapping, as long as you use the same `distinct_id` across client and server. + +See the [bootstrapping guide](/docs/feature-flags/bootstrapping.md) for more information. + +## Identifying users + +> **Identifying users is required.** Call `posthog.identify('your-user-id')` after login to link events to a known user. This is what connects frontend event captures, [session replays](/docs/session-replay.md), [LLM traces](/docs/ai-engineering.md), and [error tracking](/docs/error-tracking.md) to the same person — and lets backend events link back too. +> +> See our guide on [identifying users](/docs/getting-started/identify-users.md) for how to set this up. + +Set up a reverse proxy (recommended) + +We recommend [setting up a reverse proxy](/docs/advanced/proxy.md), so that events are less likely to be intercepted by tracking blockers. + +We have our [own managed reverse proxy service](/docs/advanced/proxy/managed-reverse-proxy.md), which is free for all PostHog Cloud users, routes through our infrastructure, and makes setting up your proxy easy. + +If you don't want to use our managed service then there are several other options for creating a reverse proxy, including using [Cloudflare](/docs/advanced/proxy/cloudflare.md), [AWS Cloudfront](/docs/advanced/proxy/cloudfront.md), and [Vercel](/docs/advanced/proxy/vercel.md). + +Grouping products in one project (recommended) + +If you have multiple customer-facing products (e.g. a marketing website + mobile app + web app), it's best to install PostHog on them all and [group them in one project](/docs/settings/projects.md). + +This makes it possible to track users across their entire journey (e.g. from visiting your marketing website to signing up for your product), or how they use your product across multiple platforms. + +Add IPs to Firewall/WAF allowlists (recommended) + +For certain features like [heatmaps](/docs/toolbar/heatmaps.md), your Web Application Firewall (WAF) may be blocking PostHog’s requests to your site. Add these IP addresses to your WAF allowlist or rules to let PostHog access your site. + +**EU**: `3.75.65.221`, `18.197.246.42`, `3.120.223.253` + +**US**: `44.205.89.55`, `52.4.194.122`, `44.208.188.173` + +These are public, stable IPs used by PostHog services (e.g., Celery tasks for snapshots). + +## Accessing PostHog + +Once initialized in `instrumentation-client.js|ts`, import `posthog` from `posthog-js` anywhere and call the methods you need on the `posthog` object. + +JavaScript + +PostHog AI + +```javascript +'use client' +import posthog from 'posthog-js' +export default function Home() { + return ( +
+ +
+ ); +} +``` + +### Using React hooks + +The [React feature flag hooks](/docs/libraries/react.md#feature-flags) work automatically when PostHog is initialized via `instrumentation-client.ts`. The hooks use the initialized posthog-js singleton: + +JavaScript + +PostHog AI + +```javascript +'use client' +import { useFeatureFlagEnabled } from 'posthog-js/react' +export default function FeatureComponent() { + const showNewFeature = useFeatureFlagEnabled('new-feature') + return showNewFeature ? : +} +``` + +### Usage + +See the [React SDK docs](/docs/libraries/react.md) for examples of how to use: + +- [`posthog-js` functions like custom event capture, user identification, and more.](/docs/libraries/react.md#using-posthog-js-functions) +- [Feature flags including variants and payloads.](/docs/libraries/react.md#feature-flags) + +You can also read [the full `posthog-js` documentation](/docs/libraries/js/features.md) for all the usable functions. + +## Server-side analytics + +Next.js enables you to both server-side render pages and add server-side functionality. To integrate PostHog into your Next.js app on the server-side, you can use the [Node SDK](/docs/libraries/node.md). + +First, install the `posthog-node` library: + +PostHog AI + +### npm + +```bash +npm install posthog-node --save +``` + +### Yarn + +```bash +yarn add posthog-node +``` + +### pnpm + +```bash +pnpm add posthog-node +``` + +### Bun + +```bash +bun add posthog-node +``` + +### Router-specific instructions + +## App router + +For the app router, we can initialize the `posthog-node` SDK once with a `PostHogClient` function, and import it into files. + +This enables us to send events and fetch data from PostHog on the server – without making client-side requests. + +JavaScript + +PostHog AI + +```javascript +// app/posthog.js +import { PostHog } from 'posthog-node' +export default function PostHogClient() { + const posthogClient = new PostHog(process.env.NEXT_PUBLIC_POSTHOG_TOKEN, { + host: process.env.NEXT_PUBLIC_POSTHOG_HOST, + flushAt: 1, + flushInterval: 0 + }) + return posthogClient +} +``` + +> **Note:** Because server-side functions in Next.js can be short-lived, we set `flushAt` to `1` and `flushInterval` to `0`. +> +> - `flushAt` sets how many capture calls we should flush the queue (in one batch). +> - `flushInterval` sets how many milliseconds we should wait before flushing the queue. Setting them to the lowest number ensures events are sent immediately and not batched. We also need to call `await posthog.shutdown()` once done. + +To use this client, we import it into our pages and call it with the `PostHogClient` function: + +JavaScript + +PostHog AI + +```javascript +import Link from 'next/link' +import PostHogClient from '../posthog' +export default async function About() { + const posthog = PostHogClient() + const flags = await posthog.getAllFlags( + 'user_distinct_id' // replace with a user's distinct ID + ); + await posthog.shutdown() + return ( +
+

About

+ Go home + { flags['main-cta'] && + Go to PostHog + } +
+ ) +} +``` + +## Pages router + +For the pages router, we can use the `getServerSideProps` function to access PostHog on the server-side, send events, evaluate feature flags, and more. + +This looks like this: + +JavaScript + +PostHog AI + +```javascript +// pages/posts/[id].js +import { useContext, useEffect, useState } from 'react' +import { getServerSession } from "next-auth/next" +import { PostHog } from 'posthog-node' +export default function Post({ post, flags }) { + const [ctaState, setCtaState] = useState() + useEffect(() => { + if (flags) { + setCtaState(flags['blog-cta']) + } + }) + return ( +
+

{post.title}

+

By: {post.author}

+

{post.content}

+ {ctaState && +

Go to PostHog

+ } + +
+ ) +} +export async function getServerSideProps(ctx) { + const session = await getServerSession(ctx.req, ctx.res) + let flags = null + if (session) { + const client = new PostHog( + process.env.NEXT_PUBLIC_POSTHOG_TOKEN, + { + host: process.env.NEXT_PUBLIC_POSTHOG_HOST, + } + ) + flags = await client.getAllFlags(session.user.email); + client.capture({ + distinctId: session.user.email, + event: 'loaded blog article', + properties: { + $current_url: ctx.req.url, + }, + }); + await client.shutdown() + } + const { posts } = await import('../../blog.json') + const post = posts.find((post) => post.id.toString() === ctx.params.id) + return { + props: { + post, + flags + }, + } +} +``` + +> **Note**: Make sure to *always* call `await client.shutdown()` after sending events from the server-side. PostHog queues events into larger batches, and this call forces all batched events to be flushed immediately. + +### Server-side configuration + +Next.js overrides the default `fetch` behavior on the server to introduce their own cache. PostHog ignores that cache by default, as this is Next.js's default behavior for any fetch call. + +You can override that configuration when initializing PostHog, but make sure you understand the pros/cons of using Next.js's cache and that you might get cached results rather than the actual result our server would return. This is important for feature flags, for example. + +TSX + +PostHog AI + +```jsx +posthog.init(process.env.NEXT_PUBLIC_POSTHOG_TOKEN, { + // ... your configuration + fetch_options: { + cache: 'force-cache', // Use Next.js cache + next_options: { // Passed to the `next` option for `fetch` + revalidate: 60, // Cache for 60 seconds + tags: ['posthog'], // Can be used with Next.js `revalidateTag` function + }, + } +}) +``` + +## Configuring a reverse proxy to PostHog + +To improve the reliability of client-side tracking and make requests less likely to be intercepted by tracking blockers, you can setup a reverse proxy in Next.js. Read more about deploying a reverse proxy using [Next.js rewrites](/docs/advanced/proxy/nextjs.md), [Next.js middleware](/docs/advanced/proxy/nextjs-middleware.md), and [Vercel rewrites](/docs/advanced/proxy/vercel.md). + +## Further reading + +- [How to set up Next.js analytics, feature flags, and more](/tutorials/nextjs-analytics.md) +- [How to set up Next.js pages router analytics, feature flags, and more](/tutorials/nextjs-pages-analytics.md) +- [How to set up Next.js A/B tests](/tutorials/nextjs-ab-tests.md) + +### Community questions + +Ask a question + +### Was this page useful? + +HelpfulCould be better \ No newline at end of file diff --git a/.github/workflows/release-cli.yml b/.github/workflows/release-cli.yml index 187a9d8..bccb3ec 100644 --- a/.github/workflows/release-cli.yml +++ b/.github/workflows/release-cli.yml @@ -5,7 +5,7 @@ name: Release CLI binaries # (x64 + arm64) and attaches them to a GitHub Release. The `install.sh` # fallback path curls these when Node isn't available. # -# Publishing to npm is still a manual step (pnpm publish from apps/cli-v2) — +# Publishing to npm is still a manual step (pnpm publish from apps/cli) — # this workflow only handles binary distribution. on: @@ -48,7 +48,7 @@ jobs: run: pnpm install --frozen-lockfile --ignore-scripts - name: Compile binary - working-directory: apps/cli-v2 + working-directory: apps/cli shell: bash run: | mkdir -p dist/bin @@ -64,7 +64,7 @@ jobs: # on the build host, so skip them. - name: Smoke test (native only) if: matrix.target == 'darwin-arm64' || matrix.target == 'linux-x64' - working-directory: apps/cli-v2 + working-directory: apps/cli run: | ./dist/bin/claudemesh-${{ matrix.target }} --version ./dist/bin/claudemesh-${{ matrix.target }} --help | head -5 @@ -73,7 +73,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: claudemesh-${{ matrix.target }} - path: apps/cli-v2/dist/bin/claudemesh-${{ matrix.target }}${{ matrix.ext }} + path: apps/cli/dist/bin/claudemesh-${{ matrix.target }}${{ matrix.ext }} release: needs: build diff --git a/.nano-banana-config.json b/.nano-banana-config.json index f0e196a..042b3bc 100644 --- a/.nano-banana-config.json +++ b/.nano-banana-config.json @@ -1,3 +1,3 @@ { - "geminiApiKey": "AIzaSyBblLRkmypvabqI-xJ_b2KPVA9Pswtav0M" + "geminiApiKey": "AIzaSyDJEyW5Q_OT1X4iGO_5jdVnq1BNANR7s2k" } \ No newline at end of file diff --git a/apps/broker/Dockerfile b/apps/broker/Dockerfile index 4e4798c..99c4cdd 100644 --- a/apps/broker/Dockerfile +++ b/apps/broker/Dockerfile @@ -35,6 +35,10 @@ ENV BROKER_PORT=7900 COPY --from=deps --chown=bun:bun /deploy /app +# Copy migrations folder alongside the broker so runtime auto-migrate +# has files to apply. Workspace deploy subset drops them otherwise. +COPY --from=deps --chown=bun:bun /app/packages/db/migrations /app/migrations + EXPOSE 7900 HEALTHCHECK --interval=10s --timeout=5s --start-period=30s --retries=5 \ diff --git a/apps/broker/package.json b/apps/broker/package.json index a4c1c25..e66fa76 100644 --- a/apps/broker/package.json +++ b/apps/broker/package.json @@ -26,6 +26,7 @@ "libsodium-wrappers": "0.7.15", "minio": "8.0.7", "neo4j-driver": "6.0.1", + "postgres": "3.4.5", "react": "19.2.0", "react-dom": "19.2.0", "ws": "8.20.0", diff --git a/apps/broker/src/index.ts b/apps/broker/src/index.ts index 28cbfe7..a687dba 100644 --- a/apps/broker/src/index.ts +++ b/apps/broker/src/index.ts @@ -696,6 +696,12 @@ function handleHttpRequest(req: IncomingMessage, res: ServerResponse): void { return; } + if (req.method === "POST" && req.url?.startsWith("/cli/mesh/") && req.url?.endsWith("/grants")) { + const slug = req.url.slice("/cli/mesh/".length).replace("/grants", ""); + handleCliMeshGrants(req, slug, res, started); + return; + } + if (req.method === "DELETE" && req.url?.startsWith("/cli/mesh/")) { const slug = req.url.slice("/cli/mesh/".length); handleMeshDelete(req, slug, res, started); @@ -1836,6 +1842,28 @@ async function handleSend( ...(subtype ? { subtype } : {}), }; + // Per-peer grant enforcement — load recipient grant maps once per send. + // See .artifacts/specs/2026-04-15-per-peer-capabilities.md. + const DEFAULT_CAPS = ["read", "dm", "broadcast", "state-read"] as const; + const capNeeded: "dm" | "broadcast" = isMulticast ? "broadcast" : "dm"; + const senderPubkey = conn.memberPubkey; // stable member key (survives session rotation) + // Fetch grant maps for all connected peers in this mesh in one query. + // Small (bounded by concurrent connections per mesh); acceptable per send. + const grantRows = await db + .select({ id: meshMember.id, peerGrants: meshMember.peerGrants }) + .from(meshMember) + .where(eq(meshMember.meshId, conn.meshId)); + const grantsByMemberId = new Map>( + grantRows.map((r) => [r.id, (r.peerGrants as Record) ?? {}]), + ); + function allowed(recipientMemberId: string): boolean { + const grants = grantsByMemberId.get(recipientMemberId); + if (!grants) return DEFAULT_CAPS.includes(capNeeded); + const entry = grants[senderPubkey]; + if (entry === undefined) return DEFAULT_CAPS.includes(capNeeded); + return entry.includes(capNeeded); + } + for (const [pid, peer] of connections) { if (pid === senderPresenceId) continue; if (peer.meshId !== conn.meshId) continue; @@ -1854,6 +1882,14 @@ async function handleSend( continue; } + // Per-peer capability check — silent drop if recipient hasn't granted + // `capNeeded` to this sender (Signal block semantics: sender sees + // delivered, recipient sees nothing). + if (!allowed(peer.memberId)) { + metrics.messagesDroppedByGrantTotal?.inc?.({ cap: capNeeded }); + continue; + } + if (isMulticast) { // Multicast: push directly to each connected peer. The queue // row has one delivered_at — can only be claimed once. Direct @@ -4319,7 +4355,12 @@ async function recoverScheduledMessages(): Promise { } } -function main(): void { +async function main(): Promise { + // Run pending migrations before the first connection is accepted. + // Exits non-zero on failure so Coolify sees a broken container. + const { runMigrationsOnStartup } = await import("./migrate"); + await runMigrationsOnStartup(); + const wss = new WebSocketServer({ noServer: true, maxPayload: env.MAX_MESSAGE_BYTES, @@ -5036,6 +5077,52 @@ import { checkPermission, getPermissions, setPermissions } from "./permissions"; import { meshPermission } from "@turbostarter/db/schema/mesh"; /** POST /cli/mesh/create — create a new mesh via CLI. */ +/** POST /cli/mesh/:slug/grants — set per-peer grants for the caller's membership. + * + * Body: { user_id: string, grants: Record } + * Merges the map into the caller's mesh_member.peer_grants. Empty array + * for a specific peer = blocked. Explicit null = reset to defaults. + */ +async function handleCliMeshGrants(req: IncomingMessage, slug: string, res: ServerResponse, started: number): Promise { + let body: { user_id: string; grants: Record }; + try { + const chunks: Buffer[] = []; + for await (const chunk of req) chunks.push(chunk as Buffer); + body = JSON.parse(Buffer.concat(chunks).toString()) as typeof body; + } catch { + writeJson(res, 400, { error: "Invalid body" }); + return; + } + if (!body.user_id || !body.grants) { + writeJson(res, 400, { error: "user_id and grants required" }); + return; + } + try { + const [m] = await db.select().from(mesh).where(eq(mesh.slug, slug)).limit(1); + if (!m) { writeJson(res, 404, { error: "Mesh not found" }); return; } + // Find the caller's member row. + const [member] = await db.select().from(meshMember) + .where(and(eq(meshMember.meshId, m.id), eq(meshMember.userId, body.user_id), isNull(meshMember.revokedAt))) + .limit(1); + if (!member) { + writeJson(res, 403, { error: "Not a member of this mesh" }); + return; + } + const current = (member.peerGrants as Record) ?? {}; + const merged = { ...current }; + for (const [pk, caps] of Object.entries(body.grants)) { + if (caps === null) delete merged[pk]; + else merged[pk] = caps; + } + await db.update(meshMember).set({ peerGrants: merged }).where(eq(meshMember.id, member.id)); + writeJson(res, 200, { ok: true, grants: merged }); + log.info("mesh-grants", { route: "POST /cli/mesh/:slug/grants", slug, member_id: member.id, latency_ms: Date.now() - started }); + } catch (e) { + log.error("mesh-grants", { error: e instanceof Error ? e.message : String(e) }); + writeJson(res, 500, { error: "Failed to update grants" }); + } +} + /** POST /cli/mesh/:slug/invite — generate an invite for a mesh. */ async function handleCliMeshInvite(req: IncomingMessage, slug: string, res: ServerResponse, started: number): Promise { let body: { user_id: string; email?: string; expires_in?: string; role?: string }; @@ -5363,5 +5450,8 @@ async function handlePermissionsSet(req: IncomingMessage, slug: string, res: Ser // Skip starting the HTTP/WS server when running under vitest — tests import // claimInviteV2Core() directly and must not bind ports on module load. if (!process.env.VITEST) { - main(); + main().catch((e) => { + console.error("fatal:", e instanceof Error ? e.stack : e); + process.exit(1); + }); } diff --git a/apps/broker/src/metrics.ts b/apps/broker/src/metrics.ts index 9a564fb..67d0246 100644 --- a/apps/broker/src/metrics.ts +++ b/apps/broker/src/metrics.ts @@ -90,6 +90,10 @@ export const metrics = { "broker_messages_rejected_total", "Messages rejected (size, auth, malformed)", ), + messagesDroppedByGrantTotal: new Counter( + "broker_messages_dropped_by_grant_total", + "Messages silently dropped because recipient didn't grant sender the required capability", + ), queueDepth: new Gauge( "broker_queue_depth", "Undelivered messages currently in the queue", diff --git a/apps/broker/src/migrate.ts b/apps/broker/src/migrate.ts new file mode 100644 index 0000000..57eded5 --- /dev/null +++ b/apps/broker/src/migrate.ts @@ -0,0 +1,59 @@ +/** + * Runtime migrations on broker startup. + * + * Runs pending drizzle migrations against DATABASE_URL before the broker + * listens. Uses pg_advisory_lock so a multi-instance deploy doesn't race. + * If migrations fail, the process exits non-zero so the orchestrator (Coolify + * healthcheck) sees the container as broken and doesn't route traffic. + */ + +import { drizzle } from "drizzle-orm/postgres-js"; +import { migrate } from "drizzle-orm/postgres-js/migrator"; +import postgres from "postgres"; +import { dirname, join } from "node:path"; +import { existsSync, readdirSync } from "node:fs"; + +const LOCK_ID = 74737_73831; // "cmsh" ascii — stable magic constant + +export async function runMigrationsOnStartup(): Promise { + const url = process.env.DATABASE_URL; + if (!url) { + console.error("[migrate] DATABASE_URL not set — skipping auto-migrate"); + return; + } + + // Resolve the migrations folder — it's shipped inside @turbostarter/db's + // deploy subset in the runtime image. Dev path also works. + const candidates = [ + "/app/migrations", + "/app/node_modules/@turbostarter/db/migrations", + join(process.cwd(), "..", "..", "packages", "db", "migrations"), + join(process.cwd(), "packages", "db", "migrations"), + ]; + const migrationsFolder = candidates.find((p) => existsSync(p)); + if (!migrationsFolder) { + console.error("[migrate] migrations folder not found — skipping. Searched:", candidates); + return; + } + const count = readdirSync(migrationsFolder).filter((f) => f.endsWith(".sql")).length; + console.log(`[migrate] ${count} migration files at ${migrationsFolder}`); + + const sql = postgres(url, { max: 1, onnotice: () => { /* quiet */ } }); + try { + // Advisory lock so parallel instances serialise. + await sql`SELECT pg_advisory_lock(${LOCK_ID})`; + try { + const db = drizzle(sql); + const start = Date.now(); + await migrate(db, { migrationsFolder }); + console.log(`[migrate] ok (${Date.now() - start}ms)`); + } finally { + await sql`SELECT pg_advisory_unlock(${LOCK_ID})`; + } + } catch (e) { + console.error("[migrate] FAILED:", e instanceof Error ? e.message : e); + process.exit(1); + } finally { + await sql.end({ timeout: 5 }); + } +} diff --git a/apps/cli-v2/README.md b/apps/cli-v2/README.md deleted file mode 100644 index ae02f4d..0000000 --- a/apps/cli-v2/README.md +++ /dev/null @@ -1,90 +0,0 @@ -# claudemesh-cli - -Peer mesh for Claude Code sessions. Connect multiple Claude Code instances into a shared mesh with real-time messaging, shared state, memory, file sharing, and 79 MCP tools. - -## Install - -```bash -npm i -g claudemesh-cli -``` - -## Quick start - -```bash -claudemesh register # create account -claudemesh new "my-team" # create a mesh -claudemesh invite # generate invite link -claudemesh # start a session -``` - -## Commands - -``` -USAGE - claudemesh start a session (creates one if needed) - claudemesh join a mesh from an invite link - claudemesh new create a new mesh - claudemesh invite [email] generate an invite - claudemesh list see your meshes - claudemesh rename rename the current mesh - claudemesh leave [mesh] leave a mesh - claudemesh peers see who's online - - claudemesh send send a message - claudemesh inbox drain pending messages - claudemesh state ... get, set, or list shared state - claudemesh remember store a memory - claudemesh recall search memories - claudemesh remind ... schedule a reminder - claudemesh profile view or edit your profile - - claudemesh doctor diagnose issues - claudemesh whoami show current identity - claudemesh status check broker connectivity - - claudemesh register create account - claudemesh login sign in via browser - claudemesh logout sign out - - claudemesh install register MCP server + hooks - claudemesh uninstall remove MCP server + hooks -``` - -## Architecture - -``` -src/ -├── entrypoints/ CLI + MCP stdio entry points -├── cli/ argv parsing, output formatters, signal handling -├── commands/ one verb per file (29 commands) -├── services/ 17 feature-folders with facade pattern -│ ├── auth/ device-code OAuth, token storage -│ ├── broker/ WebSocket client (2200 lines), reconnect, crypto -│ ├── crypto/ Ed25519, NaCl crypto_box, AES-GCM file encryption -│ ├── config/ ~/.claudemesh/config.json with atomic writes -│ ├── mesh/ CRUD, join, resolve target -│ ├── invite/ generate, parse, claim (v1 + v2 formats) -│ ├── api/ typed HTTP client for claudemesh.com -│ ├── health/ 6 diagnostic checks -│ └── ... device, clipboard, spawn, telemetry, i18n, logger -├── mcp/ MCP server with 79 tools across 21 families -├── ui/ TUI: styles, spinner, welcome wizard, launch flow -├── constants/ exit codes, paths, URLs, timings -├── types/ API, mesh, peer interfaces -├── utils/ levenshtein, slug, URL, format, semver, retry -├── locales/ English strings (i18n ready) -└── templates/ 5 mesh templates -``` - -## Development - -```bash -pnpm install -bun run dev # hot-reload -bun run build # production build -bun run typecheck # tsc --noEmit -``` - -## License - -MIT diff --git a/apps/cli-v2/package.json b/apps/cli-v2/package.json deleted file mode 100644 index 0bd0d4e..0000000 --- a/apps/cli-v2/package.json +++ /dev/null @@ -1,69 +0,0 @@ -{ - "name": "claudemesh-cli-v2", - "version": "1.0.0-alpha.31", - "description": "Peer mesh for Claude Code sessions — CLI + MCP server.", - "keywords": [ - "claude-code", - "mcp", - "model-context-protocol", - "claudemesh", - "peer-messaging", - "multi-agent" - ], - "author": "Alejandro Gutiérrez", - "license": "MIT", - "homepage": "https://claudemesh.com", - "repository": { - "type": "git", - "url": "https://github.com/alezmad/claudemesh.git", - "directory": "apps/cli-v2" - }, - "type": "module", - "bin": { - "claudemesh": "./dist/entrypoints/cli.js" - }, - "files": [ - "dist", - "README.md", - "LICENSE" - ], - "publishConfig": { - "access": "public" - }, - "scripts": { - "build": "bun build.ts", - "clean": "git clean -xdf .cache .turbo dist node_modules", - "dev": "bun --hot src/entrypoints/cli.ts", - "start": "bun src/entrypoints/cli.ts", - "format": "prettier --check . --ignore-path ../../.gitignore", - "lint": "eslint", - "prepublishOnly": "bun run build", - "test": "vitest run", - "typecheck": "tsc --noEmit" - }, - "prettier": "@turbostarter/prettier-config", - "engines": { - "node": ">=20" - }, - "dependencies": { - "@modelcontextprotocol/sdk": "1.27.1", - "citty": "0.2.2", - "libsodium-wrappers": "0.7.15", - "qrcode-terminal": "0.12.0", - "ws": "8.20.0", - "zod": "4.1.13" - }, - "devDependencies": { - "@turbostarter/eslint-config": "workspace:*", - "@turbostarter/prettier-config": "workspace:*", - "@turbostarter/tsconfig": "workspace:*", - "@turbostarter/vitest-config": "workspace:*", - "@types/libsodium-wrappers": "0.7.14", - "@types/qrcode-terminal": "0.12.2", - "@types/ws": "8.5.13", - "eslint": "catalog:", - "prettier": "catalog:", - "typescript": "catalog:", - "vitest": "catalog:" - } -} diff --git a/apps/cli-v2/src/commands/connect-telegram.ts b/apps/cli-v2/src/commands/connect-telegram.ts deleted file mode 100644 index 3224e27..0000000 --- a/apps/cli-v2/src/commands/connect-telegram.ts +++ /dev/null @@ -1,65 +0,0 @@ -import { readConfig } from "~/services/config/facade.js"; - -export async function connectTelegram(args: string[]): Promise { - const config = readConfig(); - if (config.meshes.length === 0) { - console.error("No meshes joined. Run 'claudemesh join' first."); - process.exit(1); - } - - const mesh = config.meshes[0]!; - const linkOnly = args.includes("--link"); - - // Convert WS broker URL to HTTP - const brokerHttp = mesh.brokerUrl - .replace("wss://", "https://") - .replace("ws://", "http://") - .replace("/ws", ""); - - console.log("Requesting Telegram connect token..."); - - const res = await fetch(`${brokerHttp}/tg/token`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - meshId: mesh.meshId, - memberId: mesh.memberId, - pubkey: mesh.pubkey, - secretKey: mesh.secretKey, - }), - signal: AbortSignal.timeout(10_000), - }); - - if (!res.ok) { - const err = await res.json().catch(() => ({})); - console.error(`Failed: ${(err as any).error ?? res.statusText}`); - process.exit(1); - } - - const { token, deepLink } = (await res.json()) as { - token: string; - deepLink: string; - }; - - if (linkOnly) { - console.log(deepLink); - return; - } - - // Print QR code using simple block characters - console.log("\n Connect Telegram to your mesh:\n"); - console.log(` ${deepLink}\n`); - console.log(" Open this link on your phone, or scan the QR code"); - console.log(" with your Telegram camera.\n"); - - // Try to generate QR with qrcode-terminal if available - try { - const QRCode = require("qrcode-terminal"); - QRCode.generate(deepLink, { small: true }, (code: string) => { - console.log(code); - }); - } catch { - // qrcode-terminal not available, link is enough - console.log(" (Install qrcode-terminal for QR code display)"); - } -} diff --git a/apps/cli-v2/src/commands/connect.ts b/apps/cli-v2/src/commands/connect.ts deleted file mode 100644 index 6a6f7cd..0000000 --- a/apps/cli-v2/src/commands/connect.ts +++ /dev/null @@ -1,81 +0,0 @@ -/** - * Short-lived WS connection helper for CLI commands (peers, send, inbox, state). - * - * Opens a connection to one mesh, runs a callback, then closes cleanly. - * The caller never deals with connect/close lifecycle. - */ - -import { hostname } from "node:os"; -import { createInterface } from "node:readline"; -import { BrokerClient } from "~/services/broker/facade.js"; -import { readConfig } from "~/services/config/facade.js"; -import type { JoinedMesh } from "~/services/config/facade.js"; - -export interface ConnectOpts { - /** Mesh slug to connect to. Auto-selects if only one mesh joined. */ - meshSlug?: string | null; - /** Display name for this session. Defaults to hostname-pid. */ - displayName?: string; - /** Connect to all meshes and run fn for each. */ - all?: boolean; -} - -async function pickMesh(meshes: JoinedMesh[]): Promise { - console.log("\n Select mesh:"); - meshes.forEach((m, i) => { - console.log(` ${i + 1}) ${m.slug}`); - }); - console.log(""); - - const rl = createInterface({ input: process.stdin, output: process.stdout }); - return new Promise((resolve) => { - rl.question(" Choice [1]: ", (answer) => { - rl.close(); - const idx = parseInt(answer || "1", 10) - 1; - if (idx >= 0 && idx < meshes.length) { - resolve(meshes[idx]!); - } else { - console.error(" Invalid choice, using first mesh."); - resolve(meshes[0]!); - } - }); - }); -} - -export async function withMesh( - opts: ConnectOpts, - fn: (client: BrokerClient, mesh: JoinedMesh) => Promise, -): Promise { - const config = readConfig(); - if (config.meshes.length === 0) { - console.error("No meshes joined. Run `claudemesh join ` first."); - process.exit(1); - } - - let mesh: JoinedMesh; - if (opts.meshSlug) { - const found = config.meshes.find((m) => m.slug === opts.meshSlug); - if (!found) { - console.error( - `Mesh "${opts.meshSlug}" not found. Joined: ${config.meshes.map((m) => m.slug).join(", ")}`, - ); - process.exit(1); - } - mesh = found; - } else if (config.meshes.length === 1) { - mesh = config.meshes[0]!; - } else { - mesh = await pickMesh(config.meshes); - } - - const displayName = opts.displayName ?? config.displayName ?? `${hostname()}-${process.pid}`; - const client = new BrokerClient(mesh, { displayName }); - - try { - await client.connect(); - const result = await fn(client, mesh); - return result; - } finally { - client.close(); - } -} diff --git a/apps/cli-v2/src/commands/doctor.ts b/apps/cli-v2/src/commands/doctor.ts deleted file mode 100644 index c14edfa..0000000 --- a/apps/cli-v2/src/commands/doctor.ts +++ /dev/null @@ -1,281 +0,0 @@ -/** - * `claudemesh doctor` — diagnostic checks. - * - * Walks through the install + runtime preconditions and prints each - * as pass/fail with a fix hint on failure. Exit 0 if everything - * passes, 1 otherwise. - */ - -import { existsSync, readFileSync, statSync } from "node:fs"; -import { homedir, platform } from "node:os"; -import { join } from "node:path"; -import { spawnSync } from "node:child_process"; -import { readConfig, getConfigPath } from "~/services/config/facade.js"; -import { VERSION, URLS } from "~/constants/urls.js"; - -interface Check { - name: string; - pass: boolean; - detail?: string; - fix?: string; -} - -function checkNode(): Check { - const major = Number(process.versions.node.split(".")[0]); - return { - name: "Node.js >= 20", - pass: major >= 20, - detail: `v${process.versions.node}`, - fix: "Install Node 20 or newer (https://nodejs.org)", - }; -} - -function checkClaudeOnPath(): Check { - const res = - platform() === "win32" - ? spawnSync("where", ["claude"]) - : spawnSync("sh", ["-c", "command -v claude"]); - const onPath = res.status === 0; - const location = onPath ? res.stdout.toString().trim().split("\n")[0] : undefined; - return { - name: "claude binary on PATH", - pass: onPath, - detail: location, - fix: "Install Claude Code (https://claude.com/claude-code)", - }; -} - -function checkMcpRegistered(): Check { - const claudeConfig = join(homedir(), ".claude.json"); - if (!existsSync(claudeConfig)) { - return { - name: "claudemesh MCP registered in ~/.claude.json", - pass: false, - fix: "Run `claudemesh install`", - }; - } - try { - const cfg = JSON.parse(readFileSync(claudeConfig, "utf-8")) as { - mcpServers?: Record; - }; - const registered = Boolean(cfg.mcpServers?.["claudemesh"]); - return { - name: "claudemesh MCP registered in ~/.claude.json", - pass: registered, - fix: registered ? undefined : "Run `claudemesh install`", - }; - } catch (e) { - return { - name: "claudemesh MCP registered in ~/.claude.json", - pass: false, - detail: e instanceof Error ? e.message : String(e), - fix: "Check ~/.claude.json for JSON parse errors", - }; - } -} - -function checkHooksRegistered(): Check { - const settings = join(homedir(), ".claude", "settings.json"); - if (!existsSync(settings)) { - return { - name: "Status hooks registered in ~/.claude/settings.json", - pass: false, - fix: "Run `claudemesh install` (remove --no-hooks)", - }; - } - try { - const raw = readFileSync(settings, "utf-8"); - const has = raw.includes("claudemesh hook "); - return { - name: "Status hooks registered in ~/.claude/settings.json", - pass: has, - fix: has ? undefined : "Run `claudemesh install` (remove --no-hooks)", - }; - } catch (e) { - return { - name: "Status hooks registered in ~/.claude/settings.json", - pass: false, - detail: e instanceof Error ? e.message : String(e), - }; - } -} - -function checkConfigFile(): Check { - const path = getConfigPath(); - if (!existsSync(path)) { - return { - name: "~/.claudemesh/config.json exists and parses", - pass: true, - detail: "not created yet (fine — no meshes joined)", - }; - } - try { - readConfig(); - const st = statSync(path); - const mode = (st.mode & 0o777).toString(8); - const secure = platform() === "win32" || mode === "600"; - return { - name: "~/.claudemesh/config.json parses + chmod 0600", - pass: secure, - detail: platform() === "win32" ? "chmod skipped on Windows" : `0${mode}`, - fix: secure ? undefined : `chmod 600 ${path}`, - }; - } catch (e) { - return { - name: "~/.claudemesh/config.json exists and parses", - pass: false, - detail: e instanceof Error ? e.message : String(e), - fix: "Inspect or delete ~/.claudemesh/config.json and re-join", - }; - } -} - -function checkKeypairs(): Check { - try { - const cfg = readConfig(); - if (cfg.meshes.length === 0) { - return { - name: "Mesh keypairs valid", - pass: true, - detail: "no meshes joined", - }; - } - for (const m of cfg.meshes) { - if (m.pubkey.length !== 64 || !/^[0-9a-f]+$/.test(m.pubkey)) { - return { - name: "Mesh keypairs valid", - pass: false, - detail: `${m.slug}: pubkey malformed`, - fix: `Leave + re-join the mesh: claudemesh leave ${m.slug}`, - }; - } - if (m.secretKey.length !== 128 || !/^[0-9a-f]+$/.test(m.secretKey)) { - return { - name: "Mesh keypairs valid", - pass: false, - detail: `${m.slug}: secret key malformed`, - fix: `Leave + re-join the mesh: claudemesh leave ${m.slug}`, - }; - } - } - return { - name: "Mesh keypairs valid", - pass: true, - detail: `${cfg.meshes.length} mesh(es)`, - }; - } catch (e) { - return { - name: "Mesh keypairs valid", - pass: false, - detail: e instanceof Error ? e.message : String(e), - }; - } -} - -async function checkBrokerWs(): Promise { - const wsUrl = URLS.BROKER; - const start = Date.now(); - try { - const WebSocket = (await import("ws")).default; - const ws = new WebSocket(wsUrl); - const result = await new Promise((resolve) => { - const timer = setTimeout(() => { - try { ws.close(); } catch { /* noop */ } - resolve({ - name: "Broker WebSocket reachable", - pass: false, - detail: `timeout after 5s (${wsUrl})`, - fix: "Check firewall/proxy. Broker at ic.claudemesh.com:443 over WSS.", - }); - }, 5000); - ws.once("open", () => { - clearTimeout(timer); - const latency = Date.now() - start; - try { ws.close(); } catch { /* noop */ } - resolve({ - name: "Broker WebSocket reachable", - pass: true, - detail: `${latency}ms to ${wsUrl}`, - }); - }); - ws.once("error", (e) => { - clearTimeout(timer); - resolve({ - name: "Broker WebSocket reachable", - pass: false, - detail: e.message, - fix: "Check network. Broker URL can be overridden via CLAUDEMESH_BROKER_URL.", - }); - }); - }); - return result; - } catch (e) { - return { - name: "Broker WebSocket reachable", - pass: false, - detail: e instanceof Error ? e.message : String(e), - }; - } -} - -async function checkNpmLatest(): Promise { - try { - const res = await fetch(URLS.NPM_REGISTRY, { signal: AbortSignal.timeout(5000) }); - if (!res.ok) { - return { name: "CLI up-to-date", pass: true, detail: `npm unreachable (${res.status}) — skipped` }; - } - const body = (await res.json()) as { "dist-tags"?: { alpha?: string; latest?: string } }; - const latest = body["dist-tags"]?.alpha ?? body["dist-tags"]?.latest; - if (!latest) return { name: "CLI up-to-date", pass: true, detail: "no dist-tag — skipped" }; - const up = latest === VERSION; - return { - name: "CLI up-to-date", - pass: up, - detail: up ? `latest ${latest}` : `installed ${VERSION} → latest ${latest}`, - fix: up ? undefined : "npm i -g claudemesh-cli@alpha", - }; - } catch { - return { name: "CLI up-to-date", pass: true, detail: "npm check skipped" }; - } -} - -export async function runDoctor(): Promise { - const useColor = - !process.env.NO_COLOR && process.env.TERM !== "dumb" && process.stdout.isTTY; - const dim = (s: string): string => (useColor ? `\x1b[2m${s}\x1b[22m` : s); - const green = (s: string): string => (useColor ? `\x1b[32m${s}\x1b[39m` : s); - const red = (s: string): string => (useColor ? `\x1b[31m${s}\x1b[39m` : s); - - console.log(`claudemesh doctor (v${VERSION})`); - console.log("─".repeat(60)); - - const checks: Check[] = [ - checkNode(), - checkClaudeOnPath(), - checkMcpRegistered(), - checkHooksRegistered(), - checkConfigFile(), - checkKeypairs(), - await checkBrokerWs(), - await checkNpmLatest(), - ]; - - for (const c of checks) { - const mark = c.pass ? green("✓") : red("✗"); - const detail = c.detail ? dim(` (${c.detail})`) : ""; - console.log(`${mark} ${c.name}${detail}`); - if (!c.pass && c.fix) { - console.log(dim(` → ${c.fix}`)); - } - } - - const failing = checks.filter((c) => !c.pass); - console.log(""); - if (failing.length === 0) { - console.log(green("All checks passed.")); - process.exit(0); - } else { - console.log(red(`${failing.length} check(s) failed.`)); - process.exit(1); - } -} diff --git a/apps/cli-v2/src/commands/hook.ts b/apps/cli-v2/src/commands/hook.ts deleted file mode 100644 index a3aa932..0000000 --- a/apps/cli-v2/src/commands/hook.ts +++ /dev/null @@ -1,123 +0,0 @@ -/** - * `claudemesh hook ` — Claude Code hook handler. - * - * Registered as a Stop + UserPromptSubmit hook by `claudemesh install`. - * On each turn boundary, Claude Code invokes: - * - * Stop → `claudemesh hook idle` - * UserPromptSubmit → `claudemesh hook working` - * - * We read the Claude Code hook JSON payload from stdin (contains cwd + - * session_id), then POST `/hook/set-status` to EVERY joined mesh's - * broker with {cwd, pid, status, session_id}. Each broker looks up - * its local presence row by (pid, cwd) and updates status. - * - * Fire-and-forget, silent. Hooks must NEVER block Claude Code or - * surface errors to the user. Debug logging available via - * CLAUDEMESH_HOOK_DEBUG=1. - * - * Why send to every broker? A user joined to multiple meshes has - * one presence row per mesh, each on its own broker. A turn boundary - * updates the status on every broker where this session is active. - * Brokers that don't have a matching presence just queue the signal - * in pending_status (harmless, TTL-swept). - */ - -import { readConfig } from "~/services/config/facade.js"; - -const DEBUG = process.env.CLAUDEMESH_HOOK_DEBUG === "1"; - -function debug(msg: string): void { - if (DEBUG) console.error(`[claudemesh-hook] ${msg}`); -} - -/** WS URL → HTTP URL (same host, swap scheme). */ -function wsToHttp(wsUrl: string): string { - try { - const u = new URL(wsUrl); - const httpScheme = u.protocol === "wss:" ? "https:" : "http:"; - return `${httpScheme}//${u.host}`; - } catch { - return wsUrl; - } -} - -async function readStdinJson(): Promise> { - if (process.stdin.isTTY) return {}; - const chunks: Uint8Array[] = []; - const reader = process.stdin; - try { - for await (const chunk of reader) { - chunks.push(chunk as Uint8Array); - if (chunks.reduce((n, c) => n + c.length, 0) > 256 * 1024) break; - } - const raw = Buffer.concat(chunks).toString("utf-8").trim(); - if (!raw) return {}; - return JSON.parse(raw) as Record; - } catch { - return {}; - } -} - -async function postHook( - brokerWsUrl: string, - body: Record, -): Promise { - const base = wsToHttp(brokerWsUrl); - try { - const controller = new AbortController(); - const t = setTimeout(() => controller.abort(), 1000); - await fetch(`${base}/hook/set-status`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify(body), - signal: controller.signal, - }).finally(() => clearTimeout(t)); - } catch (e) { - debug(`post failed ${base}: ${e instanceof Error ? e.message : e}`); - } -} - -export async function runHook(args: string[]): Promise { - const status = args[0]; - if (!status || !["idle", "working", "dnd"].includes(status)) { - // Silent no-op — we never want a hook to surface an error. - process.exit(0); - } - - // Read Claude Code's stdin payload for cwd + session_id. - const stdinTimeout = new Promise>((r) => - setTimeout(() => r({}), 500), - ); - const payload = await Promise.race([readStdinJson(), stdinTimeout]); - const cwd = - (typeof payload.cwd === "string" && payload.cwd) || - process.env.CLAUDE_PROJECT_DIR || - process.cwd(); - const sessionId = - (typeof payload.session_id === "string" && payload.session_id) || ""; - - // Fan out to EVERY joined mesh's broker in parallel. - let config; - try { - config = readConfig(); - } catch (e) { - debug(`config load failed: ${e instanceof Error ? e.message : e}`); - process.exit(0); - } - if (config.meshes.length === 0) { - debug("no joined meshes, nothing to do"); - process.exit(0); - } - - const body = { cwd, pid: process.ppid, status, session_id: sessionId }; - debug( - `status=${status} cwd=${cwd} meshes=${config.meshes.length} session=${sessionId.slice(0, 8)}`, - ); - - // Dedupe by brokerUrl — if multiple meshes share a broker, one POST - // covers them (broker resolves presence by cwd+pid regardless). - const brokerUrls = [...new Set(config.meshes.map((m) => m.brokerUrl))]; - await Promise.all(brokerUrls.map((url) => postHook(url, body))); - process.exit(0); -} diff --git a/apps/cli-v2/src/commands/inbox.ts b/apps/cli-v2/src/commands/inbox.ts deleted file mode 100644 index 406e310..0000000 --- a/apps/cli-v2/src/commands/inbox.ts +++ /dev/null @@ -1,60 +0,0 @@ -/** - * `claudemesh inbox` — read pending peer messages. - * - * Connects, waits briefly for push delivery, drains the buffer, prints. - * Works best when message-mode is "inbox" or "off" (messages held at broker). - */ - -import { withMesh } from "./connect.js"; -import type { InboundPush } from "~/services/broker/facade.js"; - -export interface InboxFlags { - mesh?: string; - json?: boolean; - wait?: number; -} - -function formatMessage(msg: InboundPush, useColor: boolean): string { - const dim = (s: string) => (useColor ? `\x1b[2m${s}\x1b[22m` : s); - const bold = (s: string) => (useColor ? `\x1b[1m${s}\x1b[22m` : s); - - const text = msg.plaintext ?? `[encrypted: ${msg.ciphertext.slice(0, 32)}…]`; - const from = msg.senderPubkey.slice(0, 8); - const time = new Date(msg.createdAt).toLocaleTimeString(); - const kindTag = msg.kind === "direct" ? "→ direct" : msg.kind; - - return ` ${bold(from)} ${dim(`[${kindTag}] ${time}`)}\n ${text}`; -} - -export async function runInbox(flags: InboxFlags): Promise { - const useColor = - !process.env.NO_COLOR && process.env.TERM !== "dumb" && process.stdout.isTTY; - const dim = (s: string) => (useColor ? `\x1b[2m${s}\x1b[22m` : s); - const bold = (s: string) => (useColor ? `\x1b[1m${s}\x1b[22m` : s); - - const waitMs = (flags.wait ?? 1) * 1000; - - await withMesh({ meshSlug: flags.mesh ?? null }, async (client, mesh) => { - // Wait briefly for broker to push any held messages. - await new Promise((resolve) => setTimeout(resolve, waitMs)); - - const messages = client.drainPushBuffer(); - - if (flags.json) { - console.log(JSON.stringify(messages, null, 2)); - return; - } - - if (messages.length === 0) { - console.log(dim(`No messages on mesh "${mesh.slug}".`)); - return; - } - - console.log(bold(`Inbox — ${mesh.slug}`) + dim(` (${messages.length} message${messages.length === 1 ? "" : "s"})`)); - console.log(""); - for (const msg of messages) { - console.log(formatMessage(msg, useColor)); - console.log(""); - } - }); -} diff --git a/apps/cli-v2/src/commands/info.ts b/apps/cli-v2/src/commands/info.ts deleted file mode 100644 index 801497b..0000000 --- a/apps/cli-v2/src/commands/info.ts +++ /dev/null @@ -1,58 +0,0 @@ -/** - * `claudemesh info` — show mesh overview: slug, broker URL, peer count, state count. - * - * Useful for AI agents to orient themselves in a mesh via bash. - */ - -import { withMesh } from "./connect.js"; -import { readConfig } from "~/services/config/facade.js"; - -export interface InfoFlags { - mesh?: string; - json?: boolean; -} - -export async function runInfo(flags: InfoFlags): Promise { - const useColor = - !process.env.NO_COLOR && process.env.TERM !== "dumb" && process.stdout.isTTY; - const dim = (s: string) => (useColor ? `\x1b[2m${s}\x1b[22m` : s); - const bold = (s: string) => (useColor ? `\x1b[1m${s}\x1b[22m` : s); - - const config = readConfig(); - - await withMesh({ meshSlug: flags.mesh ?? null }, async (client, mesh) => { - const [brokerInfo, peers, state] = await Promise.all([ - client.meshInfo(), - client.listPeers(), - client.listState(), - ]); - - const output = { - slug: mesh.slug, - meshId: mesh.meshId, - memberId: mesh.memberId, - brokerUrl: mesh.brokerUrl, - displayName: config.displayName ?? null, - peerCount: peers.length, - stateCount: state.length, - ...(brokerInfo ?? {}), - }; - - if (flags.json) { - console.log(JSON.stringify(output, null, 2)); - return; - } - - console.log(bold(mesh.slug) + dim(` · ${mesh.brokerUrl}`)); - console.log(dim(` mesh: ${mesh.meshId}`)); - console.log(dim(` member: ${mesh.memberId}`)); - console.log(` peers: ${peers.length} connected`); - console.log(` state: ${state.length} keys`); - if (brokerInfo && typeof brokerInfo === "object") { - for (const [k, v] of Object.entries(brokerInfo)) { - if (["slug", "meshId", "brokerUrl"].includes(k)) continue; - console.log(dim(` ${k}: ${JSON.stringify(v)}`)); - } - } - }); -} diff --git a/apps/cli-v2/src/commands/install.ts b/apps/cli-v2/src/commands/install.ts deleted file mode 100644 index 80ffaad..0000000 --- a/apps/cli-v2/src/commands/install.ts +++ /dev/null @@ -1,564 +0,0 @@ -/** - * `claudemesh install` / `uninstall` — manage Claude Code MCP registration. - * - * install: - * 1. Preflight: bun is on PATH, this package's MCP entry is on disk. - * 2. Read ~/.claude.json (or empty object if absent). - * 3. Add/update `mcpServers.claudemesh` with the resolved entry path. - * 4. Write back with 0600 perms. - * 5. Verify via read-back, print success. - * - * uninstall: - * 1. Read ~/.claude.json (bail if missing). - * 2. Delete `mcpServers.claudemesh` if present. - * 3. Write back. - * - * Both are idempotent — re-running install is a no-op if the entry is - * already correct, and uninstall is a no-op if no entry exists. - */ - -import { - chmodSync, - copyFileSync, - existsSync, - mkdirSync, - readFileSync, - writeFileSync, -} from "node:fs"; -import { homedir, platform } from "node:os"; -import { dirname, join, resolve } from "node:path"; -import { fileURLToPath } from "node:url"; -import { spawnSync } from "node:child_process"; -import { readConfig } from "~/services/config/facade.js"; - -const MCP_NAME = "claudemesh"; -const CLAUDE_CONFIG = join(homedir(), ".claude.json"); -const CLAUDE_SETTINGS = join(homedir(), ".claude", "settings.json"); -const HOOK_COMMAND_STOP = "claudemesh hook idle"; -const HOOK_COMMAND_USER_PROMPT = "claudemesh hook working"; -const HOOK_MARKER = "claudemesh hook "; - -type McpEntry = { - command: string; - args?: string[]; - env?: Record; -}; - -interface HookCommand { - type: "command"; - command: string; -} -interface HookMatcher { - matcher?: string; - hooks: HookCommand[]; -} -type HooksConfig = Record; - -function readClaudeConfig(): Record { - if (!existsSync(CLAUDE_CONFIG)) return {}; - const text = readFileSync(CLAUDE_CONFIG, "utf-8").trim(); - if (!text) return {}; - try { - return JSON.parse(text) as Record; - } catch (e) { - throw new Error( - `failed to parse ${CLAUDE_CONFIG}: ${e instanceof Error ? e.message : String(e)}`, - ); - } -} - -/** - * Create a timestamped backup of ~/.claude.json before any write. - */ -function backupClaudeConfig(): void { - if (!existsSync(CLAUDE_CONFIG)) return; - const backupDir = join(dirname(CLAUDE_CONFIG), ".claude", "backups"); - mkdirSync(backupDir, { recursive: true }); - const ts = Date.now(); - const dest = join(backupDir, `.claude.json.pre-claudemesh.${ts}`); - copyFileSync(CLAUDE_CONFIG, dest); -} - -/** - * Atomic read-merge-write: re-reads ~/.claude.json at write time and - * patches ONLY the `claudemesh` MCP entry. Never touches other keys. - * Returns the action taken ("added" | "updated" | "unchanged"). - */ -function patchMcpServer(entry: McpEntry): "added" | "updated" | "unchanged" { - backupClaudeConfig(); - const cfg = readClaudeConfig(); - const servers = - ((cfg.mcpServers as Record) ?? {}); - if (!cfg.mcpServers) cfg.mcpServers = servers; - - const existing = servers[MCP_NAME]; - let action: "added" | "updated" | "unchanged"; - if (!existing) { - servers[MCP_NAME] = entry; - action = "added"; - } else if (entriesEqual(existing, entry)) { - return "unchanged"; - } else { - servers[MCP_NAME] = entry; - action = "updated"; - } - - flushClaudeConfig(cfg); - return action; -} - -/** - * Atomic read-merge-write: re-reads ~/.claude.json at write time and - * removes ONLY the `claudemesh` MCP entry. Never touches other keys. - * Returns true if an entry was removed. - */ -function removeMcpServer(): boolean { - if (!existsSync(CLAUDE_CONFIG)) return false; - backupClaudeConfig(); - const cfg = readClaudeConfig(); - const servers = cfg.mcpServers as Record | undefined; - if (!servers || !(MCP_NAME in servers)) return false; - delete servers[MCP_NAME]; - cfg.mcpServers = servers; - flushClaudeConfig(cfg); - return true; -} - -/** Low-level write — callers must backup + merge first. */ -function flushClaudeConfig(obj: Record): void { - mkdirSync(dirname(CLAUDE_CONFIG), { recursive: true }); - writeFileSync( - CLAUDE_CONFIG, - JSON.stringify(obj, null, 2) + "\n", - "utf-8", - ); - try { - chmodSync(CLAUDE_CONFIG, 0o600); - } catch { - /* windows has no chmod */ - } -} - - -/** Check `bun` is on PATH — OS-agnostic, node:child_process. */ -function bunAvailable(): boolean { - const res = - platform() === "win32" - ? spawnSync("where", ["bun"]) - : spawnSync("sh", ["-c", "command -v bun"]); - return res.status === 0; -} - -/** Absolute path to this CLI's entry file. */ -function resolveEntry(): string { - const here = fileURLToPath(import.meta.url); - // When bundled (dist/index.js), this file IS the entry → return self. - // When running from source (src/index.ts via bun), walk up to the - // dir + resolve index.ts. - if (here.endsWith("/dist/index.js") || here.endsWith("\\dist\\index.js")) { - return here; - } - return resolve(dirname(here), "..", "index.ts"); -} - -/** - * Build the MCP server entry for Claude Code's config. - * - * Two modes: - * - Installed globally (npm i -g claudemesh-cli): use `claudemesh` - * as the command, relies on it being on PATH. - * - Local dev (bun apps/cli/src/index.ts): use `bun `. - */ -function buildMcpEntry(entryPath: string): McpEntry { - const isBundled = entryPath.endsWith("/dist/index.js") || - entryPath.endsWith("\\dist\\index.js"); - if (isBundled) { - return { - command: "claudemesh", - args: ["mcp"], - }; - } - return { - command: "bun", - args: [entryPath, "mcp"], - }; -} - -function entriesEqual(a: McpEntry, b: McpEntry): boolean { - return ( - a.command === b.command && - JSON.stringify(a.args ?? []) === JSON.stringify(b.args ?? []) - ); -} - -function readClaudeSettings(): Record { - if (!existsSync(CLAUDE_SETTINGS)) return {}; - const text = readFileSync(CLAUDE_SETTINGS, "utf-8").trim(); - if (!text) return {}; - try { - return JSON.parse(text) as Record; - } catch (e) { - throw new Error( - `failed to parse ${CLAUDE_SETTINGS}: ${e instanceof Error ? e.message : String(e)}`, - ); - } -} - -function writeClaudeSettings(obj: Record): void { - mkdirSync(dirname(CLAUDE_SETTINGS), { recursive: true }); - writeFileSync( - CLAUDE_SETTINGS, - JSON.stringify(obj, null, 2) + "\n", - "utf-8", - ); -} - -/** - * All claudemesh MCP tool names, prefixed for allowedTools. - * These let Claude Code use claudemesh tools without --dangerously-skip-permissions. - */ -const CLAUDEMESH_TOOLS = [ - "mcp__claudemesh__cancel_scheduled", - "mcp__claudemesh__check_messages", - "mcp__claudemesh__claim_task", - "mcp__claudemesh__complete_task", - "mcp__claudemesh__create_stream", - "mcp__claudemesh__create_task", - "mcp__claudemesh__delete_file", - "mcp__claudemesh__file_status", - "mcp__claudemesh__forget", - "mcp__claudemesh__get_context", - "mcp__claudemesh__get_file", - "mcp__claudemesh__get_state", - "mcp__claudemesh__grant_file_access", - "mcp__claudemesh__graph_execute", - "mcp__claudemesh__graph_query", - "mcp__claudemesh__join_group", - "mcp__claudemesh__leave_group", - "mcp__claudemesh__list_collections", - "mcp__claudemesh__list_contexts", - "mcp__claudemesh__list_files", - "mcp__claudemesh__list_peers", - "mcp__claudemesh__list_scheduled", - "mcp__claudemesh__list_state", - "mcp__claudemesh__list_streams", - "mcp__claudemesh__list_tasks", - "mcp__claudemesh__mesh_execute", - "mcp__claudemesh__mesh_info", - "mcp__claudemesh__mesh_query", - "mcp__claudemesh__mesh_schema", - "mcp__claudemesh__message_status", - "mcp__claudemesh__ping_mesh", - "mcp__claudemesh__publish", - "mcp__claudemesh__recall", - "mcp__claudemesh__remember", - "mcp__claudemesh__schedule_reminder", - "mcp__claudemesh__send_message", - "mcp__claudemesh__set_state", - "mcp__claudemesh__set_status", - "mcp__claudemesh__set_summary", - "mcp__claudemesh__share_context", - "mcp__claudemesh__share_file", - "mcp__claudemesh__subscribe", - "mcp__claudemesh__vector_delete", - "mcp__claudemesh__vector_search", - "mcp__claudemesh__vector_store", -]; - -/** - * Pre-approve all claudemesh MCP tools in allowedTools. - * Merges into any existing list — never overwrites other entries. - * Returns which tools were added vs already present. - */ -function installAllowedTools(): { added: string[]; unchanged: number } { - const settings = readClaudeSettings(); - const existing = new Set((settings.allowedTools as string[] | undefined) ?? []); - const toAdd = CLAUDEMESH_TOOLS.filter((t) => !existing.has(t)); - if (toAdd.length > 0) { - settings.allowedTools = [...Array.from(existing), ...toAdd]; - writeClaudeSettings(settings); - } - return { added: toAdd, unchanged: CLAUDEMESH_TOOLS.length - toAdd.length }; -} - -/** - * Remove claudemesh tools from allowedTools. - * Leaves all other entries intact. Returns count removed. - */ -function uninstallAllowedTools(): number { - if (!existsSync(CLAUDE_SETTINGS)) return 0; - const settings = readClaudeSettings(); - const existing = (settings.allowedTools as string[] | undefined) ?? []; - const toolSet = new Set(CLAUDEMESH_TOOLS); - const kept = existing.filter((t) => !toolSet.has(t)); - const removed = existing.length - kept.length; - if (removed > 0) { - settings.allowedTools = kept; - writeClaudeSettings(settings); - } - return removed; -} - -/** - * Add a Stop + UserPromptSubmit hook entry to ~/.claude/settings.json, - * idempotent on the command string. Returns counts for reporting. - */ -function installHooks(): { added: number; unchanged: number } { - const settings = readClaudeSettings(); - const hooks = ((settings.hooks ??= {}) as HooksConfig) ?? {}; - let added = 0; - let unchanged = 0; - - const ensure = (event: string, command: string): void => { - const list = (hooks[event] ??= []); - const alreadyPresent = list.some((entry) => - (entry.hooks ?? []).some((h) => h.command === command), - ); - if (alreadyPresent) { - unchanged += 1; - return; - } - list.push({ hooks: [{ type: "command", command }] }); - added += 1; - }; - ensure("Stop", HOOK_COMMAND_STOP); - ensure("UserPromptSubmit", HOOK_COMMAND_USER_PROMPT); - - settings.hooks = hooks; - writeClaudeSettings(settings); - return { added, unchanged }; -} - -/** - * Remove every hook entry whose command contains "claudemesh hook " - * from ~/.claude/settings.json. Idempotent. Returns removed count. - */ -function uninstallHooks(): number { - if (!existsSync(CLAUDE_SETTINGS)) return 0; - const settings = readClaudeSettings(); - const hooks = settings.hooks as HooksConfig | undefined; - if (!hooks) return 0; - let removed = 0; - for (const event of Object.keys(hooks)) { - const kept: HookMatcher[] = []; - for (const entry of hooks[event] ?? []) { - const filtered = (entry.hooks ?? []).filter( - (h) => !(h.command ?? "").includes(HOOK_MARKER), - ); - removed += (entry.hooks ?? []).length - filtered.length; - if (filtered.length > 0) kept.push({ ...entry, hooks: filtered }); - } - if (kept.length === 0) delete hooks[event]; - else hooks[event] = kept; - } - settings.hooks = hooks; - writeClaudeSettings(settings); - return removed; -} - -function installStatusLine(): { installed: boolean } { - const settings = readClaudeSettings(); - const cmd = `claudemesh status-line`; - const current = (settings as { statusLine?: { command?: string } }).statusLine; - // If the user has their own statusLine command, don't clobber it. - if (current?.command && !current.command.includes("claudemesh status-line")) { - return { installed: false }; - } - (settings as { statusLine?: { type: string; command: string } }).statusLine = { - type: "command", - command: cmd, - }; - writeClaudeSettings(settings); - return { installed: true }; -} - -export function runInstall(args: string[] = []): void { - const skipHooks = args.includes("--no-hooks"); - const wantStatusLine = args.includes("--status-line"); - console.log("claudemesh install"); - console.log("------------------"); - - const entry = resolveEntry(); - const isBundled = entry.endsWith("/dist/index.js") || - entry.endsWith("\\dist\\index.js"); - - // Dev mode (running from src/) requires bun on PATH; bundled mode - // (npm install -g) just uses node + the claudemesh bin shim. - if (!isBundled && !bunAvailable()) { - console.error( - "✗ `bun` is not on PATH. Install Bun first: https://bun.com", - ); - process.exit(1); - } - if (!existsSync(entry)) { - console.error(`✗ MCP entry not found at ${entry}`); - process.exit(1); - } - - const desired = buildMcpEntry(entry); - const action = patchMcpServer(desired); - - // Read-back verification. - const verify = readClaudeConfig(); - const verifyServers = (verify.mcpServers ?? {}) as Record; - const stored = verifyServers[MCP_NAME]; - if (!stored || !entriesEqual(stored, desired)) { - console.error( - `✗ post-write verification failed — ${CLAUDE_CONFIG} may be corrupt`, - ); - process.exit(1); - } - - // ANSI color helpers — stick to 8-color set so terminals without - // truecolor still render. Fall back to plain if NO_COLOR or dumb TERM. - const useColor = - !process.env.NO_COLOR && process.env.TERM !== "dumb" && process.stdout.isTTY; - const bold = (s: string) => (useColor ? `\x1b[1m${s}\x1b[22m` : s); - const yellow = (s: string) => (useColor ? `\x1b[33m${s}\x1b[39m` : s); - const dim = (s: string) => (useColor ? `\x1b[2m${s}\x1b[22m` : s); - - console.log(`✓ MCP server "${MCP_NAME}" ${action}`); - console.log(dim(` config: ${CLAUDE_CONFIG}`)); - console.log( - dim( - ` command: ${desired.command}${desired.args?.length ? " " + desired.args.join(" ") : ""}`, - ), - ); - - // allowedTools — pre-approve claudemesh MCP tools so peers don't need - // --dangerously-skip-permissions just to call mesh tools. - try { - const { added, unchanged } = installAllowedTools(); - if (added.length > 0) { - console.log( - `✓ allowedTools: ${added.length} claudemesh tools pre-approved${unchanged > 0 ? `, ${unchanged} already present` : ""}`, - ); - console.log(dim(` This lets claudemesh tools run without --dangerously-skip-permissions.`)); - console.log(dim(` Your existing allowedTools entries were preserved.`)); - } else { - console.log(`✓ allowedTools: all ${unchanged} claudemesh tools already pre-approved`); - } - console.log(dim(` config: ${CLAUDE_SETTINGS}`)); - } catch (e) { - console.error( - `⚠ allowedTools update failed: ${e instanceof Error ? e.message : String(e)}`, - ); - } - - // Hooks — status accuracy (Stop/UserPromptSubmit → POST /hook/set-status). - if (!skipHooks) { - try { - const { added, unchanged } = installHooks(); - if (added > 0) { - console.log( - `✓ Hooks registered (Stop + UserPromptSubmit) → ${added} added, ${unchanged} already present`, - ); - } else { - console.log(`✓ Hooks already registered (${unchanged} present)`); - } - console.log(dim(` config: ${CLAUDE_SETTINGS}`)); - } catch (e) { - console.error( - `⚠ hook registration failed: ${e instanceof Error ? e.message : String(e)}`, - ); - console.error( - " (MCP is still installed — hooks just skip. Retry with --no-hooks to suppress.)", - ); - } - } else { - console.log(dim("· Hooks skipped (--no-hooks)")); - } - - // Opt-in status line (shows mesh + peer count in Claude Code). - if (wantStatusLine) { - try { - const { installed } = installStatusLine(); - if (installed) { - console.log(`✓ Claude Code statusLine → \`claudemesh status-line\``); - console.log(dim(` Shows: ◇ · / online · `)); - } else { - console.log(dim("· statusLine already set to a custom command — left alone")); - } - } catch (e) { - console.error(`⚠ statusLine install failed: ${e instanceof Error ? e.message : String(e)}`); - } - } - - // Check if user has any meshes joined — nudge them if not. - let hasMeshes = false; - try { - const meshConfig = readConfig(); - hasMeshes = meshConfig.meshes.length > 0; - } catch { - // Config missing or corrupt — treat as no meshes. - } - - console.log(""); - console.log(yellow(bold("⚠ RESTART CLAUDE CODE")) + yellow(" for MCP tools to appear.")); - - if (!hasMeshes) { - console.log(""); - console.log(yellow("No meshes joined.") + " To connect with peers:"); - console.log( - ` ${bold("claudemesh ")}` + - dim(" — joins + launches in one step"), - ); - console.log( - ` ${dim("Create one at")} ${bold("https://claudemesh.com/dashboard")}`, - ); - } else { - console.log(""); - console.log( - `Next: ${bold("claudemesh")}` + dim(" — launch with your joined mesh"), - ); - } - - console.log(""); - console.log(dim("Optional:")); - console.log(dim(` claudemesh url-handler install # click-to-launch from email`)); - console.log(dim(` claudemesh install --status-line # live peer count in Claude Code`)); - console.log(dim(` claudemesh completions zsh # shell completions`)); -} - -export function runUninstall(): void { - console.log("claudemesh uninstall"); - console.log("--------------------"); - - // MCP entry — only removes claudemesh, never touches other servers. - if (removeMcpServer()) { - console.log(`✓ MCP server "${MCP_NAME}" removed`); - } else { - console.log(`· MCP server "${MCP_NAME}" not present`); - } - - // allowedTools - try { - const removed = uninstallAllowedTools(); - if (removed > 0) { - console.log(`✓ allowedTools: ${removed} claudemesh tools removed`); - } else { - console.log("· No claudemesh allowedTools to remove"); - } - } catch (e) { - console.error( - `⚠ allowedTools removal failed: ${e instanceof Error ? e.message : String(e)}`, - ); - } - - // Hooks - try { - const removed = uninstallHooks(); - if (removed > 0) { - console.log(`✓ Hooks removed (${removed} entries)`); - } else { - console.log("· No claudemesh hooks to remove"); - } - } catch (e) { - console.error( - `⚠ hook removal failed: ${e instanceof Error ? e.message : String(e)}`, - ); - } - - console.log(""); - console.log("Restart Claude Code to drop the MCP connection + hooks."); -} diff --git a/apps/cli-v2/src/commands/join.ts b/apps/cli-v2/src/commands/join.ts deleted file mode 100644 index 36edd65..0000000 --- a/apps/cli-v2/src/commands/join.ts +++ /dev/null @@ -1,193 +0,0 @@ -/** - * `claudemesh join ` — full join flow. - * - * Accepts either: - * - v2 short invite: `claudemesh.com/i/` or bare `` - * → POSTs to /api/public/invites/:code/claim, unseals root_key, - * persists mesh + fresh ed25519 identity. - * - v1 legacy invite: `ic://join/` or `https://.../join/` - * → parses signed payload, calls broker /join, persists. - * - * v1 continues to work throughout v0.1.x. v1 endpoints 410 Gone at v0.2.0. - */ - -import { parseInviteLink } from "~/services/invite/facade.js"; -import { enrollWithBroker } from "~/services/invite/facade.js"; -import { generateKeypair } from "~/services/crypto/facade.js"; -import { readConfig, writeConfig, getConfigPath } from "~/services/config/facade.js"; -import { claimInviteV2, parseV2InviteInput } from "~/services/invite/facade.js"; -import sodium from "libsodium-wrappers"; -import { writeFileSync, mkdirSync } from "node:fs"; -import { join, dirname } from "node:path"; -import { homedir, hostname } from "node:os"; -import { env } from "~/constants/urls.js"; - -/** Derive the web app base URL from the broker URL, unless explicitly overridden. */ -function deriveAppBaseUrl(): string { - const override = process.env.CLAUDEMESH_APP_URL; - if (override) return override.replace(/\/$/, ""); - // Broker is `wss://ic.claudemesh.com/ws` → app is `https://claudemesh.com`. - // For self-hosted: honour the broker host's parent domain as best-effort. - try { - const u = new URL(env.CLAUDEMESH_BROKER_URL); - const host = u.host.replace(/^ic\./, ""); - const scheme = u.protocol === "wss:" ? "https:" : "http:"; - return `${scheme}//${host}`; - } catch { - return "https://claudemesh.com"; - } -} - -async function runJoinV2(code: string): Promise { - const appBaseUrl = deriveAppBaseUrl(); - console.log(`Claiming invite ${code} via ${appBaseUrl}…`); - - let claim; - try { - claim = await claimInviteV2({ appBaseUrl, code }); - } catch (e) { - console.error( - `claudemesh: ${e instanceof Error ? e.message : String(e)}`, - ); - process.exit(1); - } - - // Generate a fresh ed25519 identity for this peer. The v2 claim - // endpoint creates the member row keyed on the x25519 pubkey we sent; - // the ed25519 keypair is what the `hello` handshake and future - // envelope signing will use. Stored locally only. - const keypair = await generateKeypair(); - const displayName = `${hostname()}-${process.pid}`; - - // Encode the unsealed 32-byte root key as URL-safe base64url (no pad) - // to match the format used everywhere else (broker stores it the - // same way in mesh.rootKey). - await sodium.ready; - const rootKeyB64 = sodium.to_base64( - claim.rootKey, - sodium.base64_variants.URLSAFE_NO_PADDING, - ); - - // Persist. We don't have a mesh_slug in the v2 response — the server - // derives slug from name and slug is no longer globally unique. Use a - // stable short derivative of the mesh id so `list` / `launch --mesh` - // still have something to match on. - const fallbackSlug = `mesh-${claim.meshId.slice(0, 8)}`; - const config = readConfig(); - config.meshes = config.meshes.filter((m) => m.meshId !== claim.meshId); - config.meshes.push({ - meshId: claim.meshId, - memberId: claim.memberId, - slug: fallbackSlug, - name: fallbackSlug, - pubkey: keypair.publicKey, - secretKey: keypair.secretKey, - brokerUrl: env.CLAUDEMESH_BROKER_URL, - joinedAt: new Date().toISOString(), - rootKey: rootKeyB64, - inviteVersion: 2, - }); - writeConfig(config); - - console.log(""); - console.log(`✓ Joined mesh ${claim.meshId} via v2 invite`); - console.log(` member id: ${claim.memberId}`); - console.log(` pubkey: ${keypair.publicKey.slice(0, 16)}…`); - console.log(` broker: ${env.CLAUDEMESH_BROKER_URL}`); - console.log(` config: ${getConfigPath()}`); - console.log(""); - console.log("Restart Claude Code to pick up the new mesh."); -} - -export async function runJoin(args: string[]): Promise { - const link = args[0]; - if (!link) { - console.error("Usage: claudemesh join "); - console.error(""); - console.error("Examples:"); - console.error(" claudemesh join https://claudemesh.com/i/abc12345"); - console.error(" claudemesh join abc12345"); - console.error(" claudemesh join ic://join/eyJ2IjoxLC4uLn0 (v1 legacy)"); - process.exit(1); - } - - // Try v2 first — short code / `/i/` URL. - const v2Code = parseV2InviteInput(link); - if (v2Code) { - await runJoinV2(v2Code); - return; - } - - // 1. Parse + verify signature client-side. - let invite; - try { - invite = await parseInviteLink(link); - } catch (e) { - console.error( - `claudemesh: ${e instanceof Error ? e.message : String(e)}`, - ); - process.exit(1); - } - const { payload, token } = invite; - console.log(`Joining mesh "${payload.mesh_slug}" (${payload.mesh_id})…`); - - // 2. Generate keypair. - const keypair = await generateKeypair(); - - // 3. Enroll with broker. - const displayName = `${hostname()}-${process.pid}`; - let enroll; - try { - enroll = await enrollWithBroker({ - brokerWsUrl: payload.broker_url, - inviteToken: token, - invitePayload: payload, - peerPubkey: keypair.publicKey, - displayName, - }); - } catch (e) { - console.error( - `claudemesh: broker enrollment failed: ${e instanceof Error ? e.message : String(e)}`, - ); - process.exit(1); - } - - // 4. Persist. - const config = readConfig(); - config.meshes = config.meshes.filter( - (m) => m.slug !== payload.mesh_slug, - ); - config.meshes.push({ - meshId: payload.mesh_id, - memberId: enroll.memberId, - slug: payload.mesh_slug, - name: payload.mesh_slug, - pubkey: keypair.publicKey, - secretKey: keypair.secretKey, - brokerUrl: payload.broker_url, - joinedAt: new Date().toISOString(), - }); - writeConfig(config); - - // 4b. Store invite token for per-session re-enrollment (launch --name). - const configDir = env.CLAUDEMESH_CONFIG_DIR ?? join(homedir(), ".claudemesh"); - const inviteFile = join(configDir, `invite-${payload.mesh_slug}.txt`); - try { - mkdirSync(dirname(inviteFile), { recursive: true }); - writeFileSync(inviteFile, link, "utf-8"); - } catch { - // Non-fatal — launch will fall back to shared identity. - } - - // 5. Report. - console.log(""); - console.log( - `✓ Joined "${payload.mesh_slug}" as ${displayName}${enroll.alreadyMember ? " (already a member — re-enrolled with same pubkey)" : ""}`, - ); - console.log(` member id: ${enroll.memberId}`); - console.log(` pubkey: ${keypair.publicKey.slice(0, 16)}…`); - console.log(` broker: ${payload.broker_url}`); - console.log(` config: ${getConfigPath()}`); - console.log(""); - console.log("Restart Claude Code to pick up the new mesh."); -} diff --git a/apps/cli-v2/src/commands/launch.ts b/apps/cli-v2/src/commands/launch.ts deleted file mode 100644 index d6c899c..0000000 --- a/apps/cli-v2/src/commands/launch.ts +++ /dev/null @@ -1,823 +0,0 @@ -// @ts-nocheck — v1 port, runtime-tested -/** - * `claudemesh launch` — spawn `claude` with peer mesh identity. - * - * Flags are defined in index.ts (citty command) — that is the source of - * truth. This file receives already-parsed flags and rawArgs. - * - * Flow: - * 1. Receive parsed flags from citty + rawArgs for -- passthrough - * 2. If --join: run join flow first - * 3. Load config → pick mesh (auto if 1, interactive picker if >1) - * 4. Write per-session config to tmpdir (isolates mesh selection) - * 5. Spawn claude with CLAUDEMESH_CONFIG_DIR + CLAUDEMESH_DISPLAY_NAME - * 6. On exit: cleanup tmpdir - */ - -import { spawnSync } from "node:child_process"; -import { randomUUID } from "node:crypto"; -import { mkdtempSync, writeFileSync, rmSync, readdirSync, statSync, existsSync, readFileSync } from "node:fs"; -import { tmpdir, hostname, homedir } from "node:os"; -import { join } from "node:path"; -import { createInterface } from "node:readline"; -import { readConfig, getConfigPath } from "~/services/config/facade.js"; -import type { Config, JoinedMesh, GroupEntry } from "~/services/config/facade.js"; -import { startCallbackListener, generatePairingCode } from "~/services/auth/facade.js"; -import { openBrowser } from "~/services/spawn/facade.js"; -import { BrokerClient } from "~/services/broker/facade.js"; - -// Flags as parsed by citty (index.ts is the source of truth for definitions). -export interface LaunchFlags { - name?: string; - role?: string; - groups?: string; - join?: string; - mesh?: string; - "message-mode"?: string; - "system-prompt"?: string; - resume?: string; - continue?: boolean; - yes?: boolean; - quiet?: boolean; -} - -// --- Interactive mesh picker --- - -async function pickMesh(meshes: JoinedMesh[]): Promise { - if (meshes.length === 1) return meshes[0]!; - - console.log("\n Select mesh:"); - meshes.forEach((m, i) => { - console.log(` ${i + 1}) ${m.slug}`); - }); - console.log(""); - - const rl = createInterface({ input: process.stdin, output: process.stdout }); - return new Promise((resolve) => { - rl.question(" Choice [1]: ", (answer) => { - rl.close(); - const idx = parseInt(answer || "1", 10) - 1; - if (idx >= 0 && idx < meshes.length) { - resolve(meshes[idx]!); - } else { - console.error(" Invalid choice, using first mesh."); - resolve(meshes[0]!); - } - }); - }); -} - -// --- Group string parser --- - -/** Parse "frontend:lead,reviewers:member,all" → GroupEntry[] */ -function parseGroupsString(raw: string): GroupEntry[] { - return raw - .split(",") - .map((s) => s.trim()) - .filter(Boolean) - .map((token) => { - const idx = token.indexOf(":"); - if (idx === -1) return { name: token }; - return { name: token.slice(0, idx), role: token.slice(idx + 1) }; - }); -} - -// --- Interactive role/groups prompts --- - -function askLine(prompt: string): Promise { - const rl = createInterface({ input: process.stdin, output: process.stdout }); - return new Promise((resolve) => { - rl.question(prompt, (answer) => { - rl.close(); - resolve(answer.trim()); - }); - }); -} - -// --- Permission confirmation --- - -async function confirmPermissions(): Promise { - const useColor = - !process.env.NO_COLOR && process.env.TERM !== "dumb" && process.stdout.isTTY; - const bold = (s: string): string => (useColor ? `\x1b[1m${s}\x1b[22m` : s); - const dim = (s: string): string => (useColor ? `\x1b[2m${s}\x1b[22m` : s); - const yellow = (s: string): string => (useColor ? `\x1b[33m${s}\x1b[39m` : s); - - console.log(yellow(bold(" Autonomous mode"))); - console.log(""); - console.log(" Claude will run with --dangerously-skip-permissions, bypassing"); - console.log(" ALL permission prompts — not just claudemesh tools."); - console.log(" Peers exchange text only — no file access, no tool calls."); - console.log(""); - console.log(dim(" Without -y: only claudemesh tools are pre-approved (via allowedTools).")); - console.log(dim(" Use -y for autonomous agents. Omit it for shared/multi-person meshes.")); - console.log(""); - - const rl = createInterface({ input: process.stdin, output: process.stdout }); - return new Promise((resolve, reject) => { - rl.question(` ${bold("Continue?")} [Y/n] `, (answer) => { - rl.close(); - const a = answer.trim().toLowerCase(); - if (a === "" || a === "y" || a === "yes") { - resolve(); - } else { - console.log("\n Aborted. Run without autonomous mode:"); - console.log(" claude --dangerously-load-development-channels server:claudemesh\n"); - process.exit(0); - } - }); - }); -} - -// --- Banner --- - -import { - bold as tBold, dim as tDim, green as tGreen, orange as tOrange, - boldOrange, HIDE_CURSOR, SHOW_CURSOR, -} from "~/ui/styles.js"; -import { - enterFullScreen, exitFullScreen, writeCentered, termSize, - drawTopBar, drawBottomBar, menuSelect, textInput, confirmPrompt, -} from "~/ui/screen.js"; -import { createSpinner, FRAME_HEIGHT } from "~/ui/spinner.js"; - -interface LaunchWizardResult { - mesh: JoinedMesh; - role: string | null; - groups: GroupEntry[]; - messageMode: "push" | "inbox" | "off"; - skipPermissions: boolean; -} - -/** - * Full-screen launch wizard — spinning logo + interactive config. - * Mesh selection, role, groups, message mode, permissions — all in one TUI. - * Falls back to plain text on non-TTY. - */ -async function runLaunchWizard(opts: { - displayName: string; - meshes: JoinedMesh[]; - selectedMesh: JoinedMesh | null; - existingRole: string | null; - existingGroups: GroupEntry[]; - existingMessageMode: "push" | "inbox" | "off" | null; - skipPermConfirm: boolean; -}): Promise { - if (!process.stdout.isTTY) { - return { - mesh: opts.selectedMesh ?? opts.meshes[0]!, - role: opts.existingRole, - groups: opts.existingGroups, - messageMode: opts.existingMessageMode ?? "push", - skipPermissions: opts.skipPermConfirm, - }; - } - - const { rows } = termSize(); - enterFullScreen(); - drawTopBar(); - - // Spinning logo centered in upper portion - const logoTop = Math.floor((rows - FRAME_HEIGHT - 16) / 2); - const brandRow = logoTop + FRAME_HEIGHT + 1; - const subtitleRow = brandRow + 1; - const formRow = subtitleRow + 2; - - writeCentered(brandRow, boldOrange("claudemesh")); - writeCentered(subtitleRow, tDim("peer mesh for Claude Code")); - - const spinner = createSpinner({ - render(lines) { - for (let i = 0; i < lines.length; i++) { - writeCentered(logoTop + i, lines[i]!); - } - }, - interval: 70, - }); - spinner.start(); - - // Show detected info - let row = formRow; - writeCentered(row, `Directory ${tGreen("✓")} ${process.cwd()}`); - row++; - writeCentered(row, `Name ${tGreen("✓")} ${opts.displayName}`); - row += 2; - - // Mesh selection - let mesh: JoinedMesh; - if (opts.selectedMesh) { - mesh = opts.selectedMesh; - writeCentered(row, `Mesh ${tGreen("✓")} ${mesh.slug}`); - row++; - } else if (opts.meshes.length === 1) { - mesh = opts.meshes[0]!; - writeCentered(row, `Mesh ${tGreen("✓")} ${mesh.slug}`); - row++; - } else { - spinner.stop(); - const choice = await menuSelect({ - title: "Select mesh", - items: opts.meshes.map(m => m.slug), - row, - }); - mesh = opts.meshes[choice]!; - // Redraw as confirmed - for (let i = 0; i < opts.meshes.length + 1; i++) { - writeCentered(row + i, " "); - } - writeCentered(row, `Mesh ${tGreen("✓")} ${mesh.slug}`); - spinner.start(); - row++; - } - - row++; - - // Interactive fields - let role = opts.existingRole; - let groups = opts.existingGroups; - let messageMode = opts.existingMessageMode ?? "push" as "push" | "inbox" | "off"; - - // Role input - if (role === null) { - spinner.stop(); - const answer = await textInput({ label: "Role", row, placeholder: "optional — press Enter to skip" }); - if (answer) role = answer; - spinner.start(); - row++; - } else { - writeCentered(row, `Role ${tGreen("✓")} ${role}`); - row++; - } - - // Groups input - if (groups.length === 0) { - spinner.stop(); - const answer = await textInput({ label: "Groups", row, placeholder: "comma-separated, optional" }); - if (answer) groups = parseGroupsString(answer); - spinner.start(); - row++; - } else { - const tags = groups.map(g => `@${g.name}${g.role ? `:${g.role}` : ""}`).join(", "); - writeCentered(row, `Groups ${tGreen("✓")} ${tags}`); - row++; - } - - // Message mode selection - if (opts.existingMessageMode === null) { - row++; - spinner.stop(); - const choice = await menuSelect({ - title: "Message mode", - items: [ - "Push (real-time, peers can interrupt)", - "Inbox (held until you check)", - "Off (tools only, no messages)", - ], - row, - }); - messageMode = (["push", "inbox", "off"] as const)[choice]; - spinner.start(); - row += 5; - } else { - writeCentered(row, `Messages ${tGreen("✓")} ${messageMode}`); - row++; - } - - // Permissions confirmation - let skipPermissions = opts.skipPermConfirm; - if (!skipPermissions) { - row++; - spinner.stop(); - writeCentered(row, tDim("Claude will run with --dangerously-skip-permissions,")); - writeCentered(row + 1, tDim("bypassing ALL permission prompts — not just claudemesh.")); - row += 3; - const confirmed = await confirmPrompt({ - message: boldOrange("Autonomous mode?"), - row, - defaultYes: true, - }); - if (!confirmed) { - exitFullScreen(); - console.log(" Run without autonomous mode:"); - console.log(" claude --dangerously-load-development-channels server:claudemesh\n"); - process.exit(0); - } - skipPermissions = true; - spinner.start(); - } - - // Final animation - row += 2; - writeCentered(row, tDim("Launching Claude Code...")); - - await new Promise(r => setTimeout(r, 800)); - spinner.stop(); - exitFullScreen(); - - return { mesh, role, groups, messageMode, skipPermissions }; -} - -function printBanner(name: string, meshSlug: string, role: string | null, groups: GroupEntry[], messageMode: "push" | "inbox" | "off"): void { - const useColor = - !process.env.NO_COLOR && process.env.TERM !== "dumb" && process.stdout.isTTY; - const dim = (s: string): string => (useColor ? `\x1b[2m${s}\x1b[22m` : s); - const bold = (s: string): string => (useColor ? `\x1b[1m${s}\x1b[22m` : s); - - const roleSuffix = role ? ` (${role})` : ""; - const groupTags = groups.length - ? " [" + groups.map((g) => `@${g.name}${g.role ? `:${g.role}` : ""}`).join(", ") + "]" - : ""; - - const rule = "─".repeat(60); - console.log(bold(`claudemesh launch`) + dim(` — as ${name}${roleSuffix} on ${meshSlug}${groupTags} [${messageMode}]`)); - console.log(rule); - if (messageMode === "push") { - console.log("Peer messages arrive as reminders in real-time."); - } else if (messageMode === "inbox") { - console.log("Peer messages held in inbox. Use check_messages to read."); - } else { - console.log("Messages off. Use check_messages to poll manually."); - } - console.log("Peers send text only — they cannot call tools or read files."); - console.log(dim(`Config: ${getConfigPath()}`)); - console.log(rule); - console.log(""); -} - -// --- Main --- - -export async function runLaunch(flags: LaunchFlags, rawArgs: string[]): Promise { - // Extract args that follow "--" — passed straight through to claude. - const dashIdx = rawArgs.indexOf("--"); - const claudePassthrough = dashIdx >= 0 ? rawArgs.slice(dashIdx + 1) : []; - - // Normalise flags into the internal shape used below. - const args = { - name: flags.name ?? null, - role: flags.role ?? null, - groups: flags.groups ?? null, - joinLink: flags.join ?? null, - meshSlug: flags.mesh ?? null, - messageMode: (["push", "inbox", "off"].includes(flags["message-mode"] ?? "") - ? flags["message-mode"] as "push" | "inbox" | "off" - : null), - systemPrompt: flags["system-prompt"] ?? null, - resume: flags.resume ?? null, - continueSession: flags.continue ?? false, - quiet: flags.quiet ?? false, - skipPermConfirm: flags.yes ?? false, - claudeArgs: claudePassthrough, - }; - - // 1. If --join, run join flow first. - if (args.joinLink) { - console.log("Joining mesh..."); - const invite = await parseInviteLink(args.joinLink); - const keypair = await generateKeypair(); - const displayName = (args.name ?? process.env.USER ?? process.env.USERNAME ?? hostname()); - const enroll = await enrollWithBroker({ - brokerWsUrl: invite.payload.broker_url, - inviteToken: invite.token, - invitePayload: invite.payload, - peerPubkey: keypair.publicKey, - displayName, - }); - const config = readConfig(); - config.meshes = config.meshes.filter( - (m) => m.slug !== invite.payload.mesh_slug, - ); - config.meshes.push({ - meshId: invite.payload.mesh_id, - memberId: enroll.memberId, - slug: invite.payload.mesh_slug, - name: invite.payload.mesh_slug, - pubkey: keypair.publicKey, - secretKey: keypair.secretKey, - brokerUrl: invite.payload.broker_url, - joinedAt: new Date().toISOString(), - }); - const { writeConfig } = await import("~/services/config/facade.js"); - writeConfig(config); - console.log( - `✓ Joined "${invite.payload.mesh_slug}"${enroll.alreadyMember ? " (already member)" : ""}`, - ); - } - - // 2. Load config, pick mesh. - const config = readConfig(); - let justSynced = false; - - if (config.meshes.length === 0 && !args.joinLink) { - const useColor = !process.env.NO_COLOR && process.env.TERM !== "dumb" && process.stdout.isTTY; - const bold = (s: string): string => (useColor ? `\x1b[1m${s}\x1b[22m` : s); - const dim = (s: string): string => (useColor ? `\x1b[2m${s}\x1b[22m` : s); - const green = (s: string): string => (useColor ? `\x1b[32m${s}\x1b[39m` : s); - - const code = generatePairingCode(); - const listener = await startCallbackListener(); - const url = `https://claudemesh.com/cli-auth?port=${listener.port}&code=${code}&action=sync`; - - console.log(`\n ${bold("Welcome to claudemesh!")} No meshes found.`); - console.log(` Opening browser to sign in...\n`); - - const opened = await openBrowser(url); - if (!opened) { - console.log(` Couldn't open browser automatically.`); - } - console.log(` ${dim(`Visit: ${url}`)}`); - console.log(` ${dim(`Or join with invite: claudemesh launch --join `)}\n`); - - // Race: localhost callback vs manual paste vs timeout - const manualPromise = new Promise((resolve) => { - const rl = createInterface({ input: process.stdin, output: process.stdout }); - rl.question(" Paste sync token (or wait for browser): ", (answer) => { - rl.close(); - if (answer.trim()) resolve(answer.trim()); - }); - }); - - const timeoutPromise = new Promise((resolve) => { - setTimeout(() => resolve(null), 15 * 60_000); - }); - - const syncToken = await Promise.race([ - listener.token, - manualPromise, - timeoutPromise, - ]); - - listener.close(); - - if (!syncToken) { - console.error("\n Timed out waiting for sign-in."); - process.exit(1); - } - - // Generate keypair and sync with broker - const { generateKeypair } = await import("~/services/crypto/facade.js"); - const keypair = await generateKeypair(); - const displayNameForSync = (args.name ?? process.env.USER ?? process.env.USERNAME ?? hostname()); - - const { syncWithBroker } = await import("~/services/auth/facade.js"); - const result = await syncWithBroker(syncToken, keypair.publicKey, displayNameForSync); - - // Write all meshes to config - const { writeConfig } = await import("~/services/config/facade.js"); - for (const m of result.meshes) { - config.meshes.push({ - meshId: m.mesh_id, - memberId: m.member_id, - slug: m.slug, - name: m.slug, - pubkey: keypair.publicKey, - secretKey: keypair.secretKey, - brokerUrl: m.broker_url, - joinedAt: new Date().toISOString(), - }); - } - config.accountId = result.account_id; - writeConfig(config); - justSynced = true; - - console.log(`\n ${green("✓")} Synced ${result.meshes.length} mesh(es): ${result.meshes.map(m => m.slug).join(", ")}\n`); - } - - if (config.meshes.length === 0) { - console.error("No meshes joined. Run `claudemesh join ` or use --join ."); - process.exit(1); - } - - // Resolve mesh — by flag, auto (if 1), or defer to wizard (if >1) - let mesh: JoinedMesh; - if (args.meshSlug) { - const found = config.meshes.find((m) => m.slug === args.meshSlug); - if (!found) { - console.error( - `Mesh "${args.meshSlug}" not found. Joined: ${config.meshes.map((m) => m.slug).join(", ")}`, - ); - process.exit(1); - } - mesh = found; - } else if (config.meshes.length === 1) { - mesh = config.meshes[0]!; - } else { - // Multiple meshes — wizard will handle selection - mesh = null as unknown as JoinedMesh; // set by wizard below - } - - // 3. Session identity + role/groups via TUI wizard. - const displayName = (args.name ?? process.env.USER ?? process.env.USERNAME ?? hostname()); - - let role: string | null = args.role; - let parsedGroups: GroupEntry[] = args.groups ? parseGroupsString(args.groups) : []; - let messageMode: "push" | "inbox" | "off" = args.messageMode ?? "push"; - - // `-y` (skipPermConfirm) implies fully non-interactive — skip the wizard - // entirely and use sensible defaults (role=member, no groups, push mode). - // Same applies to `--quiet` and the post-sync path where we already picked. - const nonInteractive = args.quiet || justSynced || args.skipPermConfirm; - if (!nonInteractive) { - const wizardResult = await runLaunchWizard({ - displayName, - meshes: config.meshes, - selectedMesh: mesh ?? null, - existingRole: args.role, - existingGroups: parsedGroups, - existingMessageMode: args.messageMode ?? null, - skipPermConfirm: args.skipPermConfirm, - }); - mesh = wizardResult.mesh; - role = wizardResult.role; - parsedGroups = wizardResult.groups; - messageMode = wizardResult.messageMode; - args.skipPermConfirm = wizardResult.skipPermissions; - } else if (!mesh) { - // No mesh picked yet + non-interactive — pick the first one deterministically. - mesh = config.meshes[0]!; - } - - // Clean up orphaned tmpdirs from crashed sessions (older than 1 hour) - const tmpBase = tmpdir(); - try { - for (const entry of readdirSync(tmpBase)) { - if (!entry.startsWith("claudemesh-")) continue; - const full = join(tmpBase, entry); - const age = Date.now() - statSync(full).mtimeMs; - if (age > 3600_000) rmSync(full, { recursive: true, force: true }); - } - } catch { /* best effort */ } - - // Clean up stale mesh MCP entries from crashed sessions - try { - const claudeConfigPath = join(homedir(), ".claude.json"); - if (existsSync(claudeConfigPath)) { - const claudeConfig = JSON.parse(readFileSync(claudeConfigPath, "utf-8")); - const mcpServers = claudeConfig.mcpServers ?? {}; - let cleaned = 0; - for (const key of Object.keys(mcpServers)) { - if (!key.startsWith("mesh:")) continue; - const meta = mcpServers[key]?._meshSession; - if (!meta?.pid) continue; - // Check if the PID is still alive - try { - process.kill(meta.pid, 0); // signal 0 = check existence - } catch { - // PID is dead — remove stale entry - delete mcpServers[key]; - cleaned++; - } - } - if (cleaned > 0) { - claudeConfig.mcpServers = mcpServers; - writeFileSync(claudeConfigPath, JSON.stringify(claudeConfig, null, 2) + "\n", "utf-8"); - } - } - } catch { /* best effort */ } - - // --- Fetch deployed services for native MCP entries --- - let serviceCatalog: Array<{ - name: string; - description: string; - status: string; - tools: Array<{ name: string; description: string; inputSchema: object }>; - deployed_by: string; - }> = []; - - try { - const tmpClient = new BrokerClient(mesh, { displayName }); - await tmpClient.connect(); - // Wait briefly for hello_ack with service catalog - await new Promise(r => setTimeout(r, 2000)); - serviceCatalog = tmpClient.serviceCatalog; - tmpClient.close(); - } catch { - // Non-fatal — launch without native service entries - if (!args.quiet) { - console.log(" (Could not fetch service catalog — mesh services won't be natively available)"); - } - } - - // 4. Write session config to tmpdir (isolates mesh selection). - const tmpDir = mkdtempSync(join(tmpdir(), "claudemesh-")); - const sessionConfig: Config = { - version: 1, - meshes: [mesh], - displayName, - ...(role ? { role } : {}), - ...(parsedGroups.length > 0 ? { groups: parsedGroups } : {}), - messageMode, - }; - writeFileSync( - join(tmpDir, "config.json"), - JSON.stringify(sessionConfig, null, 2) + "\n", - "utf-8", - ); - - // 5. Print summary banner (wizard already handled all interactive config). - if (!args.quiet) { - printBanner(displayName, mesh.slug, role, parsedGroups, messageMode); - } - - // --- Install native MCP entries for deployed mesh services --- - const meshMcpEntries: Array<{ key: string; entry: unknown }> = []; - - if (serviceCatalog.length > 0) { - const claudeConfigPath = join(homedir(), ".claude.json"); - - // Read-modify-write: only touch mesh:* entries in mcpServers - let claudeConfig: Record = {}; - try { - claudeConfig = JSON.parse(readFileSync(claudeConfigPath, "utf-8")); - } catch { - claudeConfig = {}; - } - - const mcpServers = (claudeConfig.mcpServers ?? {}) as Record; - - // Session-scoped key: mesh:: - const sessionTag = `${process.pid}`; - - for (const svc of serviceCatalog) { - if (svc.status !== "running") continue; - const entryKey = `mesh:${svc.name}:${sessionTag}`; - const entry = { - command: "claudemesh", - args: ["mcp", "--service", svc.name], - env: { - CLAUDEMESH_CONFIG_DIR: tmpDir, - }, - _meshSession: { - pid: process.pid, - meshSlug: mesh.slug, - serviceName: svc.name, - createdAt: new Date().toISOString(), - }, - }; - mcpServers[entryKey] = entry; - meshMcpEntries.push({ key: entryKey, entry }); - } - - claudeConfig.mcpServers = mcpServers; - writeFileSync(claudeConfigPath, JSON.stringify(claudeConfig, null, 2) + "\n", "utf-8"); - - if (!args.quiet && meshMcpEntries.length > 0) { - console.log(` ${meshMcpEntries.length} mesh service(s) registered as native MCPs:`); - for (const { key } of meshMcpEntries) { - const svcName = key.split(":")[1]; - const svc = serviceCatalog.find(s => s.name === svcName); - console.log(` ${svcName} (${svc?.tools.length ?? 0} tools)`); - } - console.log(""); - } - } - - // 6. Spawn claude with ephemeral config + dev channel + auto-permissions. - // Strip any user-supplied --dangerously flags to avoid duplicates. - const filtered: string[] = []; - for (let i = 0; i < args.claudeArgs.length; i++) { - if (args.claudeArgs[i] === "--dangerously-load-development-channels" - || args.claudeArgs[i] === "--dangerously-skip-permissions") { - if (args.claudeArgs[i] === "--dangerously-load-development-channels") i++; - continue; - } - filtered.push(args.claudeArgs[i]!); - } - // --dangerously-skip-permissions is only added when the user explicitly - // passes -y / --yes. Without it, claudemesh tools still work because - // `claudemesh install` pre-approves them via allowedTools in settings.json. - // This keeps permissions tight for multi-person meshes. - // Session identity: --resume reuses existing session, otherwise generate new. - // When resuming, Claude Code reuses the session ID so the mesh peer identity persists. - const isResume = args.resume !== null || args.continueSession; - const claudeSessionId = isResume ? undefined : randomUUID(); - - const claudeArgs = [ - "--dangerously-load-development-channels", - "server:claudemesh", - ...(claudeSessionId ? ["--session-id", claudeSessionId] : []), - ...(args.resume ? ["--resume", args.resume] : []), - ...(args.continueSession ? ["--continue"] : []), - ...(args.skipPermConfirm ? ["--dangerously-skip-permissions"] : []), - ...(args.systemPrompt ? ["--system-prompt", args.systemPrompt] : []), - ...filtered, - ]; - - // Resolve the full path to `claude` — when launched from a non-interactive - // shell (e.g. nvm node shebang), ~/.local/bin may not be in PATH. - const isWindows = process.platform === "win32"; - let claudeBin = "claude"; - if (!isWindows) { - const candidates = [ - join(homedir(), ".local", "bin", "claude"), - "/usr/local/bin/claude", - join(homedir(), ".claude", "bin", "claude"), - ]; - for (const c of candidates) { - if (existsSync(c)) { claudeBin = c; break; } - } - } - - // 7. Define cleanup — runs on every exit path via process.on('exit'). - // Synchronous-only (rmSync + writeFileSync) so it works inside the - // 'exit' event, which does not allow async work. - const cleanup = (): void => { - // Remove mesh MCP entries from ~/.claude.json - if (meshMcpEntries.length > 0) { - try { - const claudeConfigPath = join(homedir(), ".claude.json"); - const claudeConfig = JSON.parse(readFileSync(claudeConfigPath, "utf-8")); - const mcpServers = claudeConfig.mcpServers ?? {}; - for (const { key } of meshMcpEntries) { - delete mcpServers[key]; - } - claudeConfig.mcpServers = mcpServers; - writeFileSync(claudeConfigPath, JSON.stringify(claudeConfig, null, 2) + "\n", "utf-8"); - } catch { /* best effort */ } - } - // Ephemeral config dir - try { - rmSync(tmpDir, { recursive: true, force: true }); - } catch { /* best effort */ } - }; - - // Register cleanup on every exit path — including normal exit, uncaught - // throws, and fatal signals. process.on('exit') fires synchronously, which - // is what the rmSync + writeFileSync above need. - process.on("exit", cleanup); - - // 8. Hard-reset the TTY before handing control to claude. - // - // Every interactive element in the pre-launch flow — the full-screen - // wizard (tui/screen.ts), the permission confirmation, the callback- - // listener paste prompt, the mesh picker — attaches listeners to - // process.stdin, toggles raw mode, hides the cursor, and sometimes - // enters the alt-screen. Those helpers do best-effort cleanup in their - // own finally blocks, but any leak — an orphaned 'data' listener, a - // still-raw TTY, a pending render paint — means the parent node process - // keeps competing with claude's Ink TUI for the same keystrokes and - // stdout frames. Symptoms: dropped keystrokes at the claude prompt, or - // the wizard visibly repainting on top of claude after launch. - // - // Defensive reset here is cheap and guarantees a clean TTY regardless - // of what the wizard helpers did or didn't restore. - if (process.stdin.isTTY) { - try { process.stdin.setRawMode(false); } catch { /* not a TTY under some parents */ } - } - process.stdin.removeAllListeners("data"); - process.stdin.removeAllListeners("keypress"); - process.stdin.removeAllListeners("readable"); - process.stdin.pause(); - if (process.stdout.isTTY) { - process.stdout.write("\x1b[?25h"); // show cursor - process.stdout.write("\x1b[?1049l"); // exit alt-screen if any wizard step entered it - } - - // 9. Block-and-wait on claude with spawnSync. - // - // Why spawnSync instead of spawn + child.on('exit'): - // - spawn keeps the parent node event loop running alongside claude. - // Any stray listener, setImmediate, or async wizard tail-end can - // still fire during claude's lifetime, stealing input or painting - // over claude's TUI. - // - spawnSync blocks the parent event loop completely until claude - // exits. No listeners fire. Nothing paints. The parent is effectively - // suspended, and claude has exclusive ownership of the TTY. - // - // Signal forwarding: claude inherits the TTY process group via - // stdio: "inherit". When the user hits Ctrl-C, the terminal sends - // SIGINT to the whole group. Claude handles it (Ink unmounts, exits - // cleanly); spawnSync returns with result.signal='SIGINT'. We re-raise - // the same signal on the parent so it dies the same way. - const result = spawnSync(claudeBin, claudeArgs, { - stdio: "inherit", - shell: isWindows, - env: { - ...process.env, - CLAUDEMESH_CONFIG_DIR: tmpDir, - CLAUDEMESH_DISPLAY_NAME: displayName, - ...(claudeSessionId ? { CLAUDEMESH_SESSION_ID: claudeSessionId } : {}), - MCP_TIMEOUT: process.env.MCP_TIMEOUT ?? "30000", - MAX_MCP_OUTPUT_TOKENS: process.env.MAX_MCP_OUTPUT_TOKENS ?? "50000", - ...(role ? { CLAUDEMESH_ROLE: role } : {}), - }, - }); - - // 10. Handle the result. Cleanup runs automatically via process.on('exit'). - if (result.error) { - const err = result.error as NodeJS.ErrnoException; - if (err.code === "ENOENT") { - console.error("✗ `claude` not found on PATH. Install Claude Code first."); - } else { - console.error(`✗ failed to launch claude: ${err.message}`); - } - process.exit(1); - } - - if (result.signal) { - // Re-raise the same signal so the parent dies the same way the child did. - process.kill(process.pid, result.signal); - return; - } - - process.exit(result.status ?? 0); -} diff --git a/apps/cli-v2/src/commands/leave.ts b/apps/cli-v2/src/commands/leave.ts deleted file mode 100644 index 190e6be..0000000 --- a/apps/cli-v2/src/commands/leave.ts +++ /dev/null @@ -1,25 +0,0 @@ -/** - * `claudemesh leave ` — remove a mesh from local config. - * - * Does NOT (yet) notify the broker. In 15b+ this will send a - * best-effort revoke request before removing the entry. - */ - -import { readConfig, writeConfig } from "~/services/config/facade.js"; - -export function runLeave(args: string[]): void { - const slug = args[0]; - if (!slug) { - console.error("Usage: claudemesh leave "); - process.exit(1); - } - const config = readConfig(); - const before = config.meshes.length; - config.meshes = config.meshes.filter((m) => m.slug !== slug); - if (config.meshes.length === before) { - console.error(`claudemesh: no joined mesh with slug "${slug}"`); - process.exit(1); - } - writeConfig(config); - console.log(`Left mesh "${slug}". Remaining: ${config.meshes.length}`); -} diff --git a/apps/cli-v2/src/commands/list.ts b/apps/cli-v2/src/commands/list.ts deleted file mode 100644 index 14240ba..0000000 --- a/apps/cli-v2/src/commands/list.ts +++ /dev/null @@ -1,104 +0,0 @@ -/** - * `claudemesh mesh list` — merged view of server + local meshes. - */ - -import { readConfig, getConfigPath } from "~/services/config/facade.js"; -import { getStoredToken } from "~/services/auth/facade.js"; -import { request } from "~/services/api/facade.js"; -import { URLS } from "~/constants/urls.js"; -import { bold, dim, green, yellow, red } from "~/ui/styles.js"; - -const BROKER_HTTP = URLS.BROKER.replace("wss://", "https://").replace("ws://", "http://").replace("/ws", ""); - -interface ServerMesh { - id: string; - slug: string; - name: string; - role: string; - is_owner: boolean; - member_count: number; - active_peers: number; - joined_at: string; -} - -export async function runList(): Promise { - const config = readConfig(); - const auth = getStoredToken(); - - // Try to fetch from server - let serverMeshes: ServerMesh[] = []; - if (auth) { - try { - let userId = ""; - try { - const payload = JSON.parse(Buffer.from(auth.session_token.split(".")[1]!, "base64url").toString()) as { sub?: string }; - userId = payload.sub ?? ""; - } catch {} - - if (userId) { - const res = await request<{ meshes: ServerMesh[] }>({ - path: `/cli/meshes?user_id=${userId}`, - baseUrl: BROKER_HTTP, - }); - serverMeshes = res.meshes ?? []; - } - } catch {} - } - - // Merge: server meshes + local-only meshes - const localSlugs = new Set(config.meshes.map(m => m.slug)); - const serverSlugs = new Set(serverMeshes.map(m => m.slug)); - - const allSlugs = new Set([...localSlugs, ...serverSlugs]); - - if (allSlugs.size === 0) { - console.log("\n No meshes yet.\n"); - console.log(" Create one: claudemesh mesh create "); - console.log(" Join one: claudemesh mesh add \n"); - return; - } - - console.log("\n Your meshes:\n"); - - for (const slug of allSlugs) { - const local = config.meshes.find(m => m.slug === slug); - const server = serverMeshes.find(m => m.slug === slug); - - const name = server?.name ?? local?.name ?? slug; - const role = server?.role ?? "member"; - const isOwner = server?.is_owner ?? false; - const roleLabel = isOwner ? "owner" : role; - const memberCount = server?.member_count; - const activePeers = server?.active_peers ?? 0; - - // Status indicator - const inLocal = localSlugs.has(slug); - const inServer = serverSlugs.has(slug); - let status: string; - let icon: string; - - if (inLocal && inServer) { - icon = green("●"); - status = activePeers > 0 ? green(`${activePeers} online`) : dim("synced"); - } else if (inLocal && !inServer) { - icon = yellow("●"); - status = yellow("local only"); - } else { - icon = dim("○"); - status = dim("not added locally"); - } - - const memberInfo = memberCount ? dim(`${memberCount} member${memberCount !== 1 ? "s" : ""}`) : ""; - const parts = [roleLabel, memberInfo, status].filter(Boolean); - - console.log(` ${icon} ${bold(name)} ${dim(slug)}`); - console.log(` ${parts.join(" · ")}`); - } - - console.log(""); - if (serverMeshes.some(m => !localSlugs.has(m.slug))) { - console.log(dim(" ○ = server only — run `claudemesh mesh add` to use locally")); - } - console.log(dim(` Config: ${getConfigPath()}`)); - console.log(""); -} diff --git a/apps/cli-v2/src/commands/peers.ts b/apps/cli-v2/src/commands/peers.ts deleted file mode 100644 index 52baae8..0000000 --- a/apps/cli-v2/src/commands/peers.ts +++ /dev/null @@ -1,74 +0,0 @@ -/** - * `claudemesh peers` — list connected peers in the mesh. - * - * Shows all meshes by default, or filter with --mesh. - */ - -import { withMesh } from "./connect.js"; -import { readConfig } from "~/services/config/facade.js"; -import { render } from "~/ui/render.js"; -import { bold, dim, green, yellow } from "~/ui/styles.js"; - -export interface PeersFlags { - mesh?: string; - json?: boolean; -} - -export async function runPeers(flags: PeersFlags): Promise { - const config = readConfig(); - const slugs = flags.mesh ? [flags.mesh] : config.meshes.map((m) => m.slug); - - if (slugs.length === 0) { - render.err("No meshes joined."); - render.hint("claudemesh # join + launch"); - process.exit(1); - } - - const allJson: Array<{ mesh: string; peers: unknown[] }> = []; - - for (const slug of slugs) { - try { - await withMesh({ meshSlug: slug }, async (client, mesh) => { - const peers = await client.listPeers(); - - if (flags.json) { - allJson.push({ mesh: mesh.slug, peers }); - return; - } - - render.section(`peers on ${mesh.slug} (${peers.length})`); - - if (peers.length === 0) { - render.info(dim(" (no peers connected)")); - return; - } - - for (const p of peers) { - const groups = p.groups.length - ? " [" + - p.groups - .map((g: { name: string; role?: string }) => `@${g.name}${g.role ? `:${g.role}` : ""}`) - .join(", ") + - "]" - : ""; - const statusDot = p.status === "working" ? yellow("●") : green("●"); - const name = bold(p.displayName); - const meta: string[] = []; - if (p.peerType) meta.push(p.peerType); - if (p.channel) meta.push(p.channel); - if (p.model) meta.push(p.model); - const metaStr = meta.length ? dim(` (${meta.join(", ")})`) : ""; - const summary = p.summary ? dim(` — ${p.summary}`) : ""; - render.info(`${statusDot} ${name}${groups}${metaStr}${summary}`); - if (p.cwd) render.info(dim(` cwd: ${p.cwd}`)); - } - }); - } catch (e) { - render.err(`${slug}: ${e instanceof Error ? e.message : String(e)}`); - } - } - - if (flags.json) { - process.stdout.write(JSON.stringify(slugs.length === 1 ? allJson[0]?.peers : allJson, null, 2) + "\n"); - } -} diff --git a/apps/cli-v2/src/commands/profile.ts b/apps/cli-v2/src/commands/profile.ts deleted file mode 100644 index 15d9017..0000000 --- a/apps/cli-v2/src/commands/profile.ts +++ /dev/null @@ -1,114 +0,0 @@ -/** - * `claudemesh profile` — view or edit your member profile. - * - * Profile fields (roleTag, groups, messageMode, displayName) are persistent - * on the server. Changes are pushed to active sessions in real-time. - */ - -import { readConfig } from "~/services/config/facade.js"; -import { BrokerClient } from "~/services/broker/facade.js"; - -export interface ProfileFlags { - mesh?: string; - "role-tag"?: string; - groups?: string; - "message-mode"?: string; - name?: string; - member?: string; // admin only: edit another member - json?: boolean; -} - -export async function runProfile(flags: ProfileFlags): Promise { - const useColor = !process.env.NO_COLOR && process.env.TERM !== "dumb" && process.stdout.isTTY; - const dim = (s: string): string => (useColor ? `\x1b[2m${s}\x1b[22m` : s); - const green = (s: string): string => (useColor ? `\x1b[32m${s}\x1b[39m` : s); - - const config = readConfig(); - if (config.meshes.length === 0) { - console.error("No meshes joined. Run `claudemesh join ` first."); - process.exit(1); - } - - // Pick mesh - const mesh = flags.mesh - ? config.meshes.find(m => m.slug === flags.mesh) - : config.meshes[0]!; - - if (!mesh) { - console.error(`Mesh "${flags.mesh}" not found. Joined: ${config.meshes.map(m => m.slug).join(", ")}`); - process.exit(1); - } - - // Derive broker HTTP URL from WSS URL - const brokerUrl = mesh.brokerUrl.replace("wss://", "https://").replace("ws://", "http://").replace(/\/ws\/?$/, ""); - - const hasEdits = flags["role-tag"] !== undefined || flags.groups !== undefined || flags["message-mode"] !== undefined || flags.name !== undefined; - - if (hasEdits) { - // PATCH member profile - const targetMemberId = flags.member ?? mesh.memberId; // TODO: resolve --member by name - const body: Record = {}; - if (flags.name !== undefined) body.displayName = flags.name; - if (flags["role-tag"] !== undefined) body.roleTag = flags["role-tag"]; - if (flags.groups !== undefined) { - body.groups = flags.groups.split(",").map(s => { - const [name, role] = s.trim().split(":"); - return role ? { name: name!, role } : { name: name! }; - }); - } - if (flags["message-mode"] !== undefined) body.messageMode = flags["message-mode"]; - - const res = await fetch(`${brokerUrl}/mesh/${mesh.meshId}/member/${targetMemberId}`, { - method: "PATCH", - headers: { - "Content-Type": "application/json", - "X-Member-Id": mesh.memberId, - }, - body: JSON.stringify(body), - }); - - const result = await res.json() as Record; - if (flags.json) { - console.log(JSON.stringify(result, null, 2)); - } else if (result.ok) { - console.log(green("✓ Profile updated")); - const member = result.member as Record; - printProfile(member, dim); - } else { - console.error(`Error: ${result.error}`); - process.exit(1); - } - } else { - // GET members list, show current user's profile - const res = await fetch(`${brokerUrl}/mesh/${mesh.meshId}/members`); - const result = await res.json() as { ok: boolean; members?: Array>; error?: string }; - - if (!result.ok) { - console.error(`Error: ${result.error}`); - process.exit(1); - } - - const me = result.members?.find(m => m.id === mesh.memberId); - if (flags.json) { - console.log(JSON.stringify(me ?? {}, null, 2)); - } else if (me) { - printProfile(me, dim); - } else { - console.log("Member not found in mesh."); - } - } -} - -function printProfile(member: Record, dim: (s: string) => string): void { - const groups = member.groups as Array<{ name: string; role?: string }> | undefined; - const groupStr = groups?.length - ? groups.map(g => g.role ? `${g.name} (${g.role})` : g.name).join(", ") - : dim("(none)"); - - console.log(` Name: ${member.displayName ?? dim("(not set)")}`); - console.log(` Role: ${member.roleTag ?? dim("(not set)")}`); - console.log(` Groups: ${groupStr}`); - console.log(` Messages: ${member.messageMode ?? "push"}`); - console.log(` Access: ${member.permission ?? "member"}`); - console.log(` Mesh: ${dim(String(member.id ?? ""))}`); -} diff --git a/apps/cli-v2/src/commands/remind.ts b/apps/cli-v2/src/commands/remind.ts deleted file mode 100644 index 275575e..0000000 --- a/apps/cli-v2/src/commands/remind.ts +++ /dev/null @@ -1,142 +0,0 @@ -/** - * `claudemesh remind --in | --at