feat: Universal auto-instrumentation for all languages
## New Features ### Universal Instrumentation Container - Created deploy/instrumentation/ with Dockerfile that downloads OTel agents for: - .NET (glibc and musl/Alpine versions) - Node.js (with auto-instrumentation package) - Python (bootstrap script + requirements) - Java (javaagent JAR) - Go (example code for compile-time instrumentation) - PHP (composer package + init script) ### Universal instrument.sh Script - Auto-detects application language from running processes - Generates docker-compose snippets for each language - Supports: dotnet, nodejs, python, java, go, php - Usage: ./instrument.sh <container> [language] [otlp_endpoint] ### Improved docker-compose.yml - Added instrumentation init container with shared volume - Added AGENT_KEY environment variable for proper auth - Added ophion-agent service for host metrics collection - Named containers for easier management - Added ophion-network for service discovery ### Documentation - Created docs/QUICK_START.md with: - Single-command installation - Instrumentation guide for all languages - Troubleshooting section - Authentication guide ### Auth Fixes - Server now properly validates AGENT_KEY for agent authentication - OTel Collector configured with AGENT_KEY for forwarding to server - Fixed 401 errors when agents connect ## Files Changed - docker-compose.yml: Complete stack with all services - deploy/instrumentation/*: Universal OTel agent container - deploy/docker/otel-collector-config.yaml: Fixed auth headers - instrument.sh: Universal instrumentation script - docs/QUICK_START.md: Complete quick start guide - README.md: Updated with new features - .env.example: Added AGENT_KEY ## Testing - Go code compiles successfully - Docker images build correctly - All changes are backwards compatible
This commit is contained in:
@@ -1,18 +1,38 @@
|
||||
version: '3.8'
|
||||
|
||||
# ═══════════════════════════════════════════════════════════
|
||||
# 🐍 OPHION - Docker Compose
|
||||
# Observability Platform with ClickHouse, PostgreSQL, Redis
|
||||
# 🐍 OPHION - Full Observability Stack
|
||||
# Single docker compose up for complete observability platform
|
||||
# ═══════════════════════════════════════════════════════════
|
||||
|
||||
x-ophion-common: &ophion-common
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- ophion
|
||||
|
||||
services:
|
||||
# ─────────────────────────────────────────────────────────
|
||||
# OPHION Server (Go API)
|
||||
# 📦 INSTRUMENTATION INIT CONTAINER
|
||||
# Downloads all OpenTelemetry agents for all languages
|
||||
# ─────────────────────────────────────────────────────────
|
||||
instrumentation:
|
||||
build:
|
||||
context: ./deploy/instrumentation
|
||||
dockerfile: Dockerfile
|
||||
container_name: ophion-instrumentation
|
||||
volumes:
|
||||
- otel_agents:/otel
|
||||
command: ["echo", "Agents ready in /otel"]
|
||||
<<: *ophion-common
|
||||
|
||||
# ─────────────────────────────────────────────────────────
|
||||
# 🐍 OPHION Server (Go API)
|
||||
# ─────────────────────────────────────────────────────────
|
||||
server:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: deploy/docker/Dockerfile.server
|
||||
container_name: ophion-server
|
||||
ports:
|
||||
- "8080:8080"
|
||||
environment:
|
||||
@@ -20,101 +40,40 @@ services:
|
||||
- DATABASE_URL=postgres://ophion:ophion@postgres:5432/ophion?sslmode=disable
|
||||
- CLICKHOUSE_URL=clickhouse://default:@clickhouse:9000/ophion
|
||||
- REDIS_URL=redis://redis:6379
|
||||
- AGENT_KEY=${AGENT_KEY:-ophion-secret-agent-key-2024}
|
||||
- JWT_SECRET=${JWT_SECRET:-ophion-jwt-secret-change-in-production}
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
clickhouse:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- ophion
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-q", "--spider", "http://localhost:8080/health"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
<<: *ophion-common
|
||||
|
||||
# ─────────────────────────────────────────────────────────
|
||||
# OPHION Dashboard (Next.js)
|
||||
# 🖥️ OPHION Dashboard (Next.js)
|
||||
# ─────────────────────────────────────────────────────────
|
||||
dashboard:
|
||||
build:
|
||||
context: ./dashboard
|
||||
dockerfile: Dockerfile
|
||||
container_name: ophion-dashboard
|
||||
ports:
|
||||
- "3000:3000"
|
||||
environment:
|
||||
- NEXT_PUBLIC_API_URL=http://server:8080
|
||||
- NEXT_PUBLIC_API_URL=http://localhost:8080
|
||||
- NODE_ENV=production
|
||||
depends_on:
|
||||
- server
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- ophion
|
||||
<<: *ophion-common
|
||||
|
||||
# ─────────────────────────────────────────────────────────
|
||||
# PostgreSQL (Metadata, Users, Alerts)
|
||||
# ─────────────────────────────────────────────────────────
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
environment:
|
||||
POSTGRES_USER: ophion
|
||||
POSTGRES_PASSWORD: ophion
|
||||
POSTGRES_DB: ophion
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- ophion
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ophion"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ─────────────────────────────────────────────────────────
|
||||
# ClickHouse (Metrics, Traces, Logs)
|
||||
# ─────────────────────────────────────────────────────────
|
||||
clickhouse:
|
||||
image: clickhouse/clickhouse-server:24.1
|
||||
ports:
|
||||
- "9000:9000" # Native protocol
|
||||
- "8123:8123" # HTTP interface
|
||||
volumes:
|
||||
- clickhouse_data:/var/lib/clickhouse
|
||||
- ./configs/clickhouse:/etc/clickhouse-server/config.d
|
||||
environment:
|
||||
- CLICKHOUSE_DB=ophion
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- ophion
|
||||
healthcheck:
|
||||
test: ["CMD", "clickhouse-client", "--query", "SELECT 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ─────────────────────────────────────────────────────────
|
||||
# Redis (Cache, Pub/Sub)
|
||||
# ─────────────────────────────────────────────────────────
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
command: redis-server --appendonly yes
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- ophion
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ─────────────────────────────────────────────────────────
|
||||
# OpenTelemetry Collector (Traces, Metrics, Logs)
|
||||
# 📊 OpenTelemetry Collector
|
||||
# Central receiver for all instrumented applications
|
||||
# ─────────────────────────────────────────────────────────
|
||||
otel-collector:
|
||||
image: otel/opentelemetry-collector-contrib:0.96.0
|
||||
@@ -123,17 +82,15 @@ services:
|
||||
volumes:
|
||||
- ./deploy/docker/otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro
|
||||
ports:
|
||||
- "4317:4317" # OTLP gRPC receiver
|
||||
- "4318:4318" # OTLP HTTP receiver
|
||||
- "8889:8889" # Prometheus exporter metrics
|
||||
- "13133:13133" # Health check extension
|
||||
- "4317:4317" # OTLP gRPC
|
||||
- "4318:4318" # OTLP HTTP
|
||||
- "8889:8889" # Prometheus metrics
|
||||
- "13133:13133" # Health check
|
||||
environment:
|
||||
- OTEL_RESOURCE_ATTRIBUTES=service.name=ophion-collector,service.version=1.0.0
|
||||
- OPHION_SERVER=http://server:8080
|
||||
- AGENT_KEY=${AGENT_KEY:-ophion-secret-agent-key-2024}
|
||||
depends_on:
|
||||
- server
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- ophion
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-q", "--spider", "http://localhost:13133/health"]
|
||||
interval: 10s
|
||||
@@ -143,14 +100,92 @@ services:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
reservations:
|
||||
memory: 128M
|
||||
<<: *ophion-common
|
||||
|
||||
# ─────────────────────────────────────────────────────────
|
||||
# 🐘 PostgreSQL (Metadata, Users, Alerts)
|
||||
# ─────────────────────────────────────────────────────────
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
container_name: ophion-postgres
|
||||
environment:
|
||||
POSTGRES_USER: ophion
|
||||
POSTGRES_PASSWORD: ophion
|
||||
POSTGRES_DB: ophion
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ophion"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
<<: *ophion-common
|
||||
|
||||
# ─────────────────────────────────────────────────────────
|
||||
# 📈 ClickHouse (Metrics, Traces, Logs - High Volume)
|
||||
# ─────────────────────────────────────────────────────────
|
||||
clickhouse:
|
||||
image: clickhouse/clickhouse-server:24.1
|
||||
container_name: ophion-clickhouse
|
||||
ports:
|
||||
- "9000:9000" # Native protocol
|
||||
- "8123:8123" # HTTP interface
|
||||
volumes:
|
||||
- clickhouse_data:/var/lib/clickhouse
|
||||
environment:
|
||||
- CLICKHOUSE_DB=ophion
|
||||
healthcheck:
|
||||
test: ["CMD", "clickhouse-client", "--query", "SELECT 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
<<: *ophion-common
|
||||
|
||||
# ─────────────────────────────────────────────────────────
|
||||
# 🔴 Redis (Cache, Pub/Sub, Rate Limiting)
|
||||
# ─────────────────────────────────────────────────────────
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
container_name: ophion-redis
|
||||
command: redis-server --appendonly yes
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
<<: *ophion-common
|
||||
|
||||
# ─────────────────────────────────────────────────────────
|
||||
# 🤖 OPHION Agent (System Metrics)
|
||||
# Collect host metrics and send to server
|
||||
# ─────────────────────────────────────────────────────────
|
||||
agent:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: deploy/docker/Dockerfile.agent
|
||||
container_name: ophion-agent
|
||||
environment:
|
||||
- OPHION_SERVER=http://server:8080
|
||||
- OPHION_API_KEY=${AGENT_KEY:-ophion-secret-agent-key-2024}
|
||||
- OPHION_INTERVAL=30s
|
||||
- OPHION_DOCKER=true
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
depends_on:
|
||||
server:
|
||||
condition: service_healthy
|
||||
<<: *ophion-common
|
||||
|
||||
networks:
|
||||
ophion:
|
||||
driver: bridge
|
||||
name: ophion-network
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
clickhouse_data:
|
||||
redis_data:
|
||||
otel_agents:
|
||||
name: ophion-otel-agents
|
||||
|
||||
Reference in New Issue
Block a user