feat: Universal auto-instrumentation for all languages

## New Features

### Universal Instrumentation Container
- Created deploy/instrumentation/ with Dockerfile that downloads OTel agents for:
  - .NET (glibc and musl/Alpine versions)
  - Node.js (with auto-instrumentation package)
  - Python (bootstrap script + requirements)
  - Java (javaagent JAR)
  - Go (example code for compile-time instrumentation)
  - PHP (composer package + init script)

### Universal instrument.sh Script
- Auto-detects application language from running processes
- Generates docker-compose snippets for each language
- Supports: dotnet, nodejs, python, java, go, php
- Usage: ./instrument.sh <container> [language] [otlp_endpoint]

### Improved docker-compose.yml
- Added instrumentation init container with shared volume
- Added AGENT_KEY environment variable for proper auth
- Added ophion-agent service for host metrics collection
- Named containers for easier management
- Added ophion-network for service discovery

### Documentation
- Created docs/QUICK_START.md with:
  - Single-command installation
  - Instrumentation guide for all languages
  - Troubleshooting section
  - Authentication guide

### Auth Fixes
- Server now properly validates AGENT_KEY for agent authentication
- OTel Collector configured with AGENT_KEY for forwarding to server
- Fixed 401 errors when agents connect

## Files Changed
- docker-compose.yml: Complete stack with all services
- deploy/instrumentation/*: Universal OTel agent container
- deploy/docker/otel-collector-config.yaml: Fixed auth headers
- instrument.sh: Universal instrumentation script
- docs/QUICK_START.md: Complete quick start guide
- README.md: Updated with new features
- .env.example: Added AGENT_KEY

## Testing
- Go code compiles successfully
- Docker images build correctly
- All changes are backwards compatible
This commit is contained in:
2026-02-06 19:28:43 -03:00
parent 0cd8b96cd0
commit 6f9657a3a8
16 changed files with 1279 additions and 148 deletions

View File

@@ -1,18 +1,38 @@
version: '3.8'
# ═══════════════════════════════════════════════════════════
# 🐍 OPHION - Docker Compose
# Observability Platform with ClickHouse, PostgreSQL, Redis
# 🐍 OPHION - Full Observability Stack
# Single docker compose up for complete observability platform
# ═══════════════════════════════════════════════════════════
x-ophion-common: &ophion-common
restart: unless-stopped
networks:
- ophion
services:
# ─────────────────────────────────────────────────────────
# OPHION Server (Go API)
# 📦 INSTRUMENTATION INIT CONTAINER
# Downloads all OpenTelemetry agents for all languages
# ─────────────────────────────────────────────────────────
instrumentation:
build:
context: ./deploy/instrumentation
dockerfile: Dockerfile
container_name: ophion-instrumentation
volumes:
- otel_agents:/otel
command: ["echo", "Agents ready in /otel"]
<<: *ophion-common
# ─────────────────────────────────────────────────────────
# 🐍 OPHION Server (Go API)
# ─────────────────────────────────────────────────────────
server:
build:
context: .
dockerfile: deploy/docker/Dockerfile.server
container_name: ophion-server
ports:
- "8080:8080"
environment:
@@ -20,101 +40,40 @@ services:
- DATABASE_URL=postgres://ophion:ophion@postgres:5432/ophion?sslmode=disable
- CLICKHOUSE_URL=clickhouse://default:@clickhouse:9000/ophion
- REDIS_URL=redis://redis:6379
- AGENT_KEY=${AGENT_KEY:-ophion-secret-agent-key-2024}
- JWT_SECRET=${JWT_SECRET:-ophion-jwt-secret-change-in-production}
depends_on:
postgres:
condition: service_healthy
clickhouse:
condition: service_healthy
redis:
condition: service_healthy
restart: unless-stopped
networks:
- ophion
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://localhost:8080/health"]
interval: 10s
timeout: 5s
retries: 3
<<: *ophion-common
# ─────────────────────────────────────────────────────────
# OPHION Dashboard (Next.js)
# 🖥️ OPHION Dashboard (Next.js)
# ─────────────────────────────────────────────────────────
dashboard:
build:
context: ./dashboard
dockerfile: Dockerfile
container_name: ophion-dashboard
ports:
- "3000:3000"
environment:
- NEXT_PUBLIC_API_URL=http://server:8080
- NEXT_PUBLIC_API_URL=http://localhost:8080
- NODE_ENV=production
depends_on:
- server
restart: unless-stopped
networks:
- ophion
<<: *ophion-common
# ─────────────────────────────────────────────────────────
# PostgreSQL (Metadata, Users, Alerts)
# ─────────────────────────────────────────────────────────
postgres:
image: postgres:16-alpine
environment:
POSTGRES_USER: ophion
POSTGRES_PASSWORD: ophion
POSTGRES_DB: ophion
volumes:
- postgres_data:/var/lib/postgresql/data
restart: unless-stopped
networks:
- ophion
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ophion"]
interval: 5s
timeout: 5s
retries: 5
# ─────────────────────────────────────────────────────────
# ClickHouse (Metrics, Traces, Logs)
# ─────────────────────────────────────────────────────────
clickhouse:
image: clickhouse/clickhouse-server:24.1
ports:
- "9000:9000" # Native protocol
- "8123:8123" # HTTP interface
volumes:
- clickhouse_data:/var/lib/clickhouse
- ./configs/clickhouse:/etc/clickhouse-server/config.d
environment:
- CLICKHOUSE_DB=ophion
restart: unless-stopped
networks:
- ophion
healthcheck:
test: ["CMD", "clickhouse-client", "--query", "SELECT 1"]
interval: 5s
timeout: 5s
retries: 5
# ─────────────────────────────────────────────────────────
# Redis (Cache, Pub/Sub)
# ─────────────────────────────────────────────────────────
redis:
image: redis:7-alpine
command: redis-server --appendonly yes
volumes:
- redis_data:/data
restart: unless-stopped
networks:
- ophion
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 5s
timeout: 5s
retries: 5
# ─────────────────────────────────────────────────────────
# OpenTelemetry Collector (Traces, Metrics, Logs)
# 📊 OpenTelemetry Collector
# Central receiver for all instrumented applications
# ─────────────────────────────────────────────────────────
otel-collector:
image: otel/opentelemetry-collector-contrib:0.96.0
@@ -123,17 +82,15 @@ services:
volumes:
- ./deploy/docker/otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro
ports:
- "4317:4317" # OTLP gRPC receiver
- "4318:4318" # OTLP HTTP receiver
- "8889:8889" # Prometheus exporter metrics
- "13133:13133" # Health check extension
- "4317:4317" # OTLP gRPC
- "4318:4318" # OTLP HTTP
- "8889:8889" # Prometheus metrics
- "13133:13133" # Health check
environment:
- OTEL_RESOURCE_ATTRIBUTES=service.name=ophion-collector,service.version=1.0.0
- OPHION_SERVER=http://server:8080
- AGENT_KEY=${AGENT_KEY:-ophion-secret-agent-key-2024}
depends_on:
- server
restart: unless-stopped
networks:
- ophion
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://localhost:13133/health"]
interval: 10s
@@ -143,14 +100,92 @@ services:
resources:
limits:
memory: 512M
reservations:
memory: 128M
<<: *ophion-common
# ─────────────────────────────────────────────────────────
# 🐘 PostgreSQL (Metadata, Users, Alerts)
# ─────────────────────────────────────────────────────────
postgres:
image: postgres:16-alpine
container_name: ophion-postgres
environment:
POSTGRES_USER: ophion
POSTGRES_PASSWORD: ophion
POSTGRES_DB: ophion
volumes:
- postgres_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ophion"]
interval: 5s
timeout: 5s
retries: 5
<<: *ophion-common
# ─────────────────────────────────────────────────────────
# 📈 ClickHouse (Metrics, Traces, Logs - High Volume)
# ─────────────────────────────────────────────────────────
clickhouse:
image: clickhouse/clickhouse-server:24.1
container_name: ophion-clickhouse
ports:
- "9000:9000" # Native protocol
- "8123:8123" # HTTP interface
volumes:
- clickhouse_data:/var/lib/clickhouse
environment:
- CLICKHOUSE_DB=ophion
healthcheck:
test: ["CMD", "clickhouse-client", "--query", "SELECT 1"]
interval: 5s
timeout: 5s
retries: 5
<<: *ophion-common
# ─────────────────────────────────────────────────────────
# 🔴 Redis (Cache, Pub/Sub, Rate Limiting)
# ─────────────────────────────────────────────────────────
redis:
image: redis:7-alpine
container_name: ophion-redis
command: redis-server --appendonly yes
volumes:
- redis_data:/data
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 5s
timeout: 5s
retries: 5
<<: *ophion-common
# ─────────────────────────────────────────────────────────
# 🤖 OPHION Agent (System Metrics)
# Collect host metrics and send to server
# ─────────────────────────────────────────────────────────
agent:
build:
context: .
dockerfile: deploy/docker/Dockerfile.agent
container_name: ophion-agent
environment:
- OPHION_SERVER=http://server:8080
- OPHION_API_KEY=${AGENT_KEY:-ophion-secret-agent-key-2024}
- OPHION_INTERVAL=30s
- OPHION_DOCKER=true
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
depends_on:
server:
condition: service_healthy
<<: *ophion-common
networks:
ophion:
driver: bridge
name: ophion-network
volumes:
postgres_data:
clickhouse_data:
redis_data:
otel_agents:
name: ophion-otel-agents