Files
claudemesh/docker-compose.production.yml
Alejandro Gutiérrez 873f588057
Some checks failed
CI / Lint (push) Has been cancelled
CI / Typecheck (push) Has been cancelled
CI / Broker tests (Postgres) (push) Has been cancelled
CI / Docker build (linux/amd64) (push) Has been cancelled
feat: runner container + broker deploy pipeline
- apps/runner/: Dockerfile (node22 + python3 + uv + bun) + supervisor.mjs
  (HTTP API for load/call/unload/health)
- docker-compose: runner service with shared services-data volume
- Broker mcp_deploy: git clone or zip extract → runner /load → MCP spawn
- Broker mcp_call: routes managed services to runner via HTTP, falls back
  to live-proxy for peer-hosted servers
- RUNNER_URL env var for broker → runner communication

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 13:06:43 +01:00

197 lines
5.7 KiB
YAML

# claudemesh — production compose (for Coolify Service deployment)
#
# Three services:
# - migrate → one-shot drizzle-kit migrate, exits 0, gates web startup
# - broker → ic.claudemesh.com (WSS /ws + HTTP /health + /hook/set-status)
# - web → claudemesh.com + dashboard.claudemesh.com (Next.js)
#
# Postgres is NOT declared here — managed externally by Coolify or a managed DB.
# Pass DATABASE_URL + all secrets at runtime via Coolify env config.
#
# Why broker does NOT depend on migrate:
# Broker tolerates DB-down gracefully (per apps/broker/DEPLOY_SPEC.md §Healthcheck).
# It should keep serving even if a migration is in-flight or has failed, so WS
# peers stay connected + /health reports degraded instead of going 502.
#
# Why web DOES depend on migrate:
# Next.js routes assume the schema they were built against. Starting web before
# migrations land → 500s on every query touching new tables/columns.
name: claudemesh
services:
migrate:
image: ${MIGRATE_IMAGE:-claudemesh-migrate:latest}
restart: "no"
environment:
DATABASE_URL: ${DATABASE_URL}
networks:
- claudemesh-internal
minio:
image: minio/minio
command: server /data --console-address ":9001"
restart: always
volumes:
- minio-data:/data
environment:
MINIO_ROOT_USER: claudemesh
MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY:-changeme}
expose:
- "9000"
networks:
- claudemesh-internal
healthcheck:
test: ["CMD", "mc", "ready", "local"]
interval: 15s
timeout: 5s
start_period: 10s
retries: 3
qdrant:
image: qdrant/qdrant
restart: always
volumes:
- qdrant-data:/qdrant/storage
expose:
- "6333"
networks:
- claudemesh-internal
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:6333/readyz"]
interval: 15s
timeout: 5s
retries: 3
neo4j:
image: neo4j:5
restart: always
environment:
NEO4J_AUTH: neo4j/${NEO4J_PASSWORD:-changeme}
NEO4J_PLUGINS: '[]'
volumes:
- neo4j-data:/data
expose:
- "7687"
- "7474"
networks:
- claudemesh-internal
healthcheck:
test: ["CMD", "cypher-shell", "-u", "neo4j", "-p", "${NEO4J_PASSWORD:-changeme}", "RETURN 1"]
interval: 15s
timeout: 5s
start_period: 30s
retries: 3
runner:
build:
context: ./apps/runner
restart: always
environment:
RUNNER_PORT: 7901
volumes:
- services-data:/var/claudemesh/services
expose:
- "7901"
networks:
- claudemesh-internal
healthcheck:
test: ["CMD", "node", "-e", "fetch('http://localhost:7901/health').then(r=>{process.exit(r.ok?0:1)}).catch(()=>process.exit(1))"]
interval: 30s
timeout: 5s
start_period: 10s
retries: 3
broker:
image: ${BROKER_IMAGE:-claudemesh-broker:latest}
restart: always
environment:
NODE_ENV: production
BROKER_PORT: 7900
DATABASE_URL: ${DATABASE_URL}
STATUS_TTL_SECONDS: ${STATUS_TTL_SECONDS:-60}
HOOK_FRESH_WINDOW_SECONDS: ${HOOK_FRESH_WINDOW_SECONDS:-30}
MAX_CONNECTIONS_PER_MESH: ${MAX_CONNECTIONS_PER_MESH:-100}
MAX_MESSAGE_BYTES: ${MAX_MESSAGE_BYTES:-65536}
HOOK_RATE_LIMIT_PER_MIN: ${HOOK_RATE_LIMIT_PER_MIN:-30}
MINIO_ENDPOINT: minio:9000
MINIO_ACCESS_KEY: claudemesh
MINIO_SECRET_KEY: ${MINIO_SECRET_KEY:-changeme}
MINIO_USE_SSL: "false"
QDRANT_URL: http://qdrant:6333
NEO4J_URL: bolt://neo4j:7687
NEO4J_USER: neo4j
NEO4J_PASSWORD: ${NEO4J_PASSWORD:-changeme}
RUNNER_URL: http://runner:7901
CLAUDEMESH_SERVICES_DIR: /var/claudemesh/services
BROKER_ENCRYPTION_KEY: ${BROKER_ENCRYPTION_KEY:-}
volumes:
- services-data:/var/claudemesh/services
expose:
- "7900"
networks:
- coolify
- claudemesh-internal
depends_on:
minio:
condition: service_healthy
qdrant:
condition: service_healthy
neo4j:
condition: service_healthy
runner:
condition: service_healthy
healthcheck:
test: ["CMD", "bun", "-e", "fetch('http://localhost:7900/health').then(r=>{process.exit(r.ok?0:1)}).catch(()=>process.exit(1))"]
interval: 15s
timeout: 5s
start_period: 10s
retries: 3
web:
image: ${WEB_IMAGE:-claudemesh-web:latest}
restart: always
environment:
NODE_ENV: production
PORT: 3000
HOSTNAME: 0.0.0.0
DATABASE_URL: ${DATABASE_URL}
BETTER_AUTH_SECRET: ${BETTER_AUTH_SECRET}
BETTER_AUTH_URL: ${BETTER_AUTH_URL:-https://claudemesh.com}
BETTER_AUTH_TRUSTED_ORIGINS: ${BETTER_AUTH_TRUSTED_ORIGINS:-https://claudemesh.com,https://dashboard.claudemesh.com,https://ic.claudemesh.com}
GITHUB_CLIENT_ID: ${GITHUB_CLIENT_ID:-}
GITHUB_CLIENT_SECRET: ${GITHUB_CLIENT_SECRET:-}
GOOGLE_CLIENT_ID: ${GOOGLE_CLIENT_ID:-}
GOOGLE_CLIENT_SECRET: ${GOOGLE_CLIENT_SECRET:-}
BROKER_INTERNAL_URL: http://broker:7900
expose:
- "3000"
networks:
- coolify
- claudemesh-internal
depends_on:
migrate:
condition: service_completed_successfully
broker:
condition: service_healthy
healthcheck:
test: ["CMD", "node", "-e", "fetch('http://localhost:3000').then(r=>{process.exit(r.ok?0:1)}).catch(()=>process.exit(1))"]
interval: 15s
timeout: 5s
start_period: 20s
retries: 3
volumes:
minio-data:
qdrant-data:
neo4j-data:
services-data:
networks:
# Coolify's shared Traefik network — must already exist on the host
coolify:
external: true
# Internal backplane between migrate + broker + web
claudemesh-internal:
driver: bridge