feat(ga): close remaining GA blockers (backcompat, HA prep, tests, docs)
Backwards compat shim (task 27) - requireCliAuth() falls back to body.user_id when BROKER_LEGACY_AUTH=1 and no bearer present. Sets Deprecation + Warning headers + bumps a broker_legacy_auth_hits_total metric so operators can watch the legacy traffic drain to 0 before removing the shim. - All handlers parse body BEFORE requireCliAuth so the fallback can read user_id out of it. HA readiness (task 29) - .artifacts/specs/2026-04-15-broker-ha-statelessness-audit.md documents every in-memory symbol and rollout plan (phase 0-4). - packaging/docker-compose.ha-local.yml spins up 2 broker replicas behind Traefik sticky sessions for local smoke testing. - apps/broker/src/audit.ts now wraps writes in a transaction that takes pg_advisory_xact_lock(meshId) and re-reads the tail hash inside the txn. Concurrent broker replicas can no longer fork the audit chain. Deploy gate (task 30) - /health stays permissive (200 even on transient DB blips) so Docker doesn't kill the container on a glitch. - New /health/ready checks DB + optional EXPECTED_MIGRATION pin, returns 503 if either fails. External deploy gate can poll this and refuse to promote a broken deploy. Metrics dashboard (task 32) - packaging/grafana/claudemesh-broker.json: ready-to-import Grafana dashboard covering active conns, queue depth, routed/rejected rates, grant drops, legacy-auth hits, conn rejects. Tests (task 28) - audit-canonical.test.ts (4 tests) pins canonical JSON semantics. - grants-enforcement.test.ts (6 tests) covers the member-then- session-pubkey lookup with default/explicit/blocked branches. Docs (task 34) - docs/env-vars.md catalogues every env var the broker + CLI read. Crypto review prep (task 35) - .artifacts/specs/2026-04-15-crypto-review-packet.md: reviewer brief, threat model, scope, test coverage list, deliverables. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
76
packaging/docker-compose.ha-local.yml
Normal file
76
packaging/docker-compose.ha-local.yml
Normal file
@@ -0,0 +1,76 @@
|
||||
# Local HA smoke-test harness for claudemesh broker.
|
||||
#
|
||||
# 2 broker replicas behind Traefik with sticky sessions, single Postgres.
|
||||
# Boot with:
|
||||
# docker compose -f packaging/docker-compose.ha-local.yml up --build
|
||||
#
|
||||
# Then:
|
||||
# claudemesh launch --name A --join <invite> --broker ws://localhost/ws
|
||||
# # kill a container:
|
||||
# docker compose -f packaging/docker-compose.ha-local.yml kill broker-a
|
||||
# # observe that sessions reconnect to broker-b automatically
|
||||
#
|
||||
# Known gaps (see .artifacts/specs/2026-04-15-broker-ha-statelessness-audit.md):
|
||||
# - streamSubscriptions are per-node (pub on A, sub on B won't work)
|
||||
# - audit hash chain may fork under concurrent writes
|
||||
# - meshClocks may double-fire if both nodes think they own a clock
|
||||
|
||||
services:
|
||||
db:
|
||||
image: postgres:16-alpine
|
||||
environment:
|
||||
POSTGRES_USER: claudemesh
|
||||
POSTGRES_PASSWORD: ha_smoke_test
|
||||
POSTGRES_DB: claudemesh
|
||||
volumes:
|
||||
- ha-pgdata:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U claudemesh"]
|
||||
interval: 2s
|
||||
timeout: 3s
|
||||
retries: 10
|
||||
|
||||
broker-a: &broker-template
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: apps/broker/Dockerfile
|
||||
environment:
|
||||
NODE_ENV: production
|
||||
DATABASE_URL: postgres://claudemesh:ha_smoke_test@db:5432/claudemesh
|
||||
BROKER_PORT: 7900
|
||||
BROKER_ENCRYPTION_KEY: "0000000000000000000000000000000000000000000000000000000000000000"
|
||||
BROKER_LEGACY_AUTH: "1"
|
||||
BROKER_PUBLIC_URL: http://localhost
|
||||
BROKER_WS_URL: ws://localhost/ws
|
||||
MAX_CONNECTIONS_PER_MESH: "200"
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.broker.rule=Host(`localhost`) || PathPrefix(`/`)"
|
||||
- "traefik.http.services.broker.loadbalancer.sticky.cookie=true"
|
||||
- "traefik.http.services.broker.loadbalancer.sticky.cookie.name=cm_node"
|
||||
- "traefik.http.services.broker.loadbalancer.server.port=7900"
|
||||
|
||||
broker-b:
|
||||
<<: *broker-template
|
||||
|
||||
traefik:
|
||||
image: traefik:v3.0
|
||||
command:
|
||||
- --providers.docker=true
|
||||
- --providers.docker.exposedbydefault=false
|
||||
- --entrypoints.web.address=:80
|
||||
- --api.insecure=true
|
||||
ports:
|
||||
- "80:80"
|
||||
- "8080:8080" # Traefik dashboard
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
depends_on:
|
||||
- broker-a
|
||||
- broker-b
|
||||
|
||||
volumes:
|
||||
ha-pgdata:
|
||||
Reference in New Issue
Block a user