feat: add AI APM module for AI/LLM call telemetry

- internal/aiapm/types.go: AICallRecord, filter, summary, and stats types - internal/aiapm/pricing.go: vendor pricing tables (Anthropic, OpenAI, Google, Mistral, DeepSeek, Groq) - internal/aiapm/store.go: PostgreSQL storage with batch insert, filtered queries, aggregations, timeseries - internal/aiapm/collector.go: async collector with buffered channel and background batch writer - internal/api/aiapm_handlers.go: Fiber route handlers for ingest, summary, models, vendors, costs, calls, pricing - cmd/server/main.go: register AI APM routes and create ai_calls table at startup
2026-02-08 05:13:38 -03:00
12 changed files with 781 additions and 744 deletions
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -12,6 +12,8 @@ import (
 	"syscall"
 	"time"

+	"github.com/bigtux/ophion/internal/aiapm"
+	aiapmapi "github.com/bigtux/ophion/internal/api"
 	"github.com/bigtux/ophion/internal/auth"
 	"github.com/bigtux/ophion/internal/otel"
 	"github.com/bigtux/ophion/internal/security"
@@ -107,6 +109,12 @@ func main() {
 	} else {
 		log.Println("✓ Connected to PostgreSQL")
 		initSchema(db)
+		// Initialize AI APM table
+		if err := aiapm.CreateTable(db); err != nil {
+			log.Printf("⚠ Failed to create AI APM table: %v", err)
+		} else {
+			log.Println("✓ AI APM table initialized")
+		}
 		// Create default admin user
 		if err := auth.CreateDefaultAdmin(db); err != nil {
 			log.Printf("⚠ Failed to create default admin: %v", err)
@@ -335,6 +343,9 @@ func (s *Server) setupRoutes() {
 	// Dashboard
 	protected.Get("/dashboard/overview", s.getDashboardOverview)

+	// AI APM routes
+	aiapmapi.RegisterAIAPMRoutes(protected, s.db)
+
 	// User info
 	protected.Get("/me", s.authHandler.Me)

--- a/docs/ARQUITETURA.md
+++ b/docs/ARQUITETURA.md
@@ -1,293 +0,0 @@
-# Arquitetura - OPHION
-
-## Visão Geral da Arquitetura
-
-```
-┌─────────────────────────────────────────────────────────────────────────────┐
-│                           APLICAÇÕES INSTRUMENTADAS                          │
-│  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐        │
-│  │  Node.js    │  │   Python    │  │    Java     │  │   .NET      │        │
-│  │   App       │  │    App      │  │    App      │  │    App      │        │
-│  │  (OTEL SDK) │  │  (OTEL SDK) │  │ (OTEL Agent)│  │  (OTEL SDK) │        │
-│  └──────┬──────┘  └──────┬──────┘  └──────┬──────┘  └──────┬──────┘        │
-│         │                │                │                │                │
-│         └────────────────┴────────────────┴────────────────┘                │
-│                                   │                                         │
-│                          OTLP (gRPC/HTTP)                                   │
-│                                   │                                         │
-└───────────────────────────────────┼─────────────────────────────────────────┘
-                                    │
-                                    ▼
-┌─────────────────────────────────────────────────────────────────────────────┐
-│                              OPHION PLATFORM                                 │
-│                                                                             │
-│  ┌─────────────────────────────────────────────────────────────────────┐   │
-│  │                        OTLP RECEIVER (Go)                            │   │
-│  │                   Port 4317 (gRPC) / 4318 (HTTP)                     │   │
-│  │  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐                  │   │
-│  │  │   Traces    │  │   Metrics   │  │    Logs     │                  │   │
-│  │  │  Receiver   │  │  Receiver   │  │  Receiver   │                  │   │
-│  │  └──────┬──────┘  └──────┬──────┘  └──────┬──────┘                  │   │
-│  └─────────┼────────────────┼────────────────┼──────────────────────────┘   │
-│            │                │                │                              │
-│            └────────────────┼────────────────┘                              │
-│                             │                                               │
-│                             ▼                                               │
-│  ┌─────────────────────────────────────────────────────────────────────┐   │
-│  │                      API SERVER (Go) — Port 8080                     │   │
-│  │  ┌─────────────────────────────────────────────────────────────┐    │   │
-│  │  │                         MODULES                              │    │   │
-│  │  │  ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌───────┐  │    │   │
-│  │  │  │   Auth  │ │   API   │ │   AI    │ │  OTEL   │ │Security│  │    │   │
-│  │  │  │ Handlers│ │Handlers │ │Handlers │ │Processor│ │ Module │  │    │   │
-│  │  │  └─────────┘ └─────────┘ └─────────┘ └─────────┘ └───────┘  │    │   │
-│  │  └─────────────────────────────────────────────────────────────┘    │   │
-│  │  ┌─────────────────────────────────────────────────────────────┐    │   │
-│  │  │                       AI ENGINE                              │    │   │
-│  │  │  ┌───────────┐ ┌───────────┐ ┌───────────┐ ┌───────────┐    │    │   │
-│  │  │  │  Insights │ │Auto-Healing│ │Smart Alerts│ │  Copilot  │    │    │   │
-│  │  │  └───────────┘ └───────────┘ └───────────┘ └───────────┘    │    │   │
-│  │  └─────────────────────────────────────────────────────────────┘    │   │
-│  └─────────────────────────────────────────────────────────────────────┘   │
-│                             │                                               │
-│                             ▼                                               │
-│  ┌─────────────────────────────────────────────────────────────────────┐   │
-│  │                    DASHBOARD (Next.js) — Port 3000                   │   │
-│  │  ┌─────────────────────────────────────────────────────────────┐    │   │
-│  │  │                        FEATURES                              │    │   │
-│  │  │  ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌───────┐  │    │   │
-│  │  │  │ Metrics │ │ Traces  │ │  Logs   │ │ Alerts  │ │Copilot│  │    │   │
-│  │  │  │Dashboard│ │ Viewer  │ │Explorer │ │ Manager │ │ Chat  │  │    │   │
-│  │  │  └─────────┘ └─────────┘ └─────────┘ └─────────┘ └───────┘  │    │   │
-│  │  └─────────────────────────────────────────────────────────────┘    │   │
-│  └─────────────────────────────────────────────────────────────────────┘   │
-│                                                                             │
-└────────────────────────────────────┬────────────────────────────────────────┘
-                                     │
-         ┌───────────────────────────┼───────────────────────────┐
-         │                           │                           │
-         ▼                           ▼                           ▼
-┌─────────────────┐     ┌─────────────────┐     ┌─────────────────────────┐
-│    Database     │     │   Time Series   │     │      OpenAI API         │
-│   (PostgreSQL)  │     │   (InfluxDB/    │     │                         │
-│                 │     │   ClickHouse)   │     │  ┌─────────────────┐   │
-│  • Users        │     │                 │     │  │     GPT-4       │   │
-│  • Alerts       │     │  • Metrics      │     │  │  (Insights,     │   │
-│  • Configs      │     │  • Traces       │     │  │   Copilot)      │   │
-│  • Audit Logs   │     │  • Logs         │     │  └─────────────────┘   │
-└─────────────────┘     └─────────────────┘     └─────────────────────────┘
-```
-
-## Fluxo de Dados
-
-```
-┌─────────────┐     ┌─────────────┐     ┌─────────────┐     ┌─────────────┐
-│ Application │────▶│  OTEL SDK   │────▶│OTLP Receiver│────▶│  Processor  │
-│  (Source)   │     │  (Client)   │     │ (4317/4318) │     │  Pipeline   │
-└─────────────┘     └─────────────┘     └─────────────┘     └──────┬──────┘
-                                                                   │
-      ┌────────────────────────────────────────────────────────────┘
-      │
-      ▼
-┌─────────────┐     ┌─────────────┐     ┌─────────────┐     ┌─────────────┐
-│   Storage   │────▶│  AI Engine  │────▶│  Insights   │────▶│   Alert     │
-│  (TimeSeries)│     │  (Analysis) │     │ (Generated) │     │  (Trigger)  │
-└─────────────┘     └─────────────┘     └─────────────┘     └──────┬──────┘
-                                                                   │
-      ┌────────────────────────────────────────────────────────────┘
-      │
-      ▼
-┌─────────────┐     ┌─────────────┐
-│ Notification│────▶│ Auto-Heal   │
-│   (Slack,   │     │  (Execute   │
-│   etc.)     │     │  Playbook)  │
-└─────────────┘     └─────────────┘
-```
-
-## Módulos de IA
-
-```
-┌─────────────────────────────────────────────────────────────────────────────┐
-│                              AI ENGINE                                       │
-├─────────────────────────────────────────────────────────────────────────────┤
-│                                                                             │
-│  ┌───────────────────────────┐  ┌───────────────────────────┐              │
-│  │     🔮 INSIGHTS           │  │     🔧 AUTO-HEALING       │              │
-│  │                           │  │                           │              │
-│  │  • Anomaly detection      │  │  • Problem detection      │              │
-│  │  • Pattern recognition    │  │  • Playbook matching      │              │
-│  │  • Capacity forecasting   │  │  • Action execution       │              │
-│  │  • Root cause analysis    │  │  • Verification           │              │
-│  │  • Optimization tips      │  │  • Rollback support       │              │
-│  │                           │  │                           │              │
-│  │  Input: Metrics, Traces   │  │  Input: Alerts, Metrics   │              │
-│  │  Output: Recommendations  │  │  Output: Actions          │              │
-│  └───────────────────────────┘  └───────────────────────────┘              │
-│                                                                             │
-│  ┌───────────────────────────┐  ┌───────────────────────────┐              │
-│  │     🚨 SMART ALERTS       │  │     💬 COPILOT            │              │
-│  │                           │  │                           │              │
-│  │  • Alert correlation      │  │  • Natural language query │              │
-│  │  • Noise reduction        │  │  • Context-aware answers  │              │
-│  │  • Priority scoring       │  │  • Dashboard generation   │              │
-│  │  • Impact analysis        │  │  • Query building         │              │
-│  │  • Grouping/dedup         │  │  • Troubleshooting guide  │              │
-│  │                           │  │                           │              │
-│  │  Input: Raw alerts        │  │  Input: User question     │              │
-│  │  Output: Prioritized list │  │  Output: Answer + actions │              │
-│  └───────────────────────────┘  └───────────────────────────┘              │
-│                                                                             │
-└─────────────────────────────────────────────────────────────────────────────┘
-```
-
-## Arquitetura do Backend (Go)
-
-```
-┌─────────────────────────────────────────────────────────────────────┐
-│                           GO SERVER                                  │
-│                          (cmd/server)                                │
-│                                                                     │
-│  ┌───────────────────────────────────────────────────────────────┐ │
-│  │                         INTERNAL                               │ │
-│  │                                                                │ │
-│  │  ┌─────────────────────────────────────────────────────────┐  │ │
-│  │  │                     internal/api                         │  │ │
-│  │  │  ┌───────────────┐  ┌───────────────┐                   │  │ │
-│  │  │  │  ai_handlers  │  │   ratelimit   │                   │  │ │
-│  │  │  │               │  │               │                   │  │ │
-│  │  │  │ • /ai/insights│  │ • Token bucket│                   │  │ │
-│  │  │  │ • /ai/chat    │  │ • Per-IP limit│                   │  │ │
-│  │  │  │ • /ai/analyze │  │ • Sliding win │                   │  │ │
-│  │  │  └───────────────┘  └───────────────┘                   │  │ │
-│  │  └─────────────────────────────────────────────────────────┘  │ │
-│  │                                                                │ │
-│  │  ┌─────────────────────────────────────────────────────────┐  │ │
-│  │  │                     internal/ai                          │  │ │
-│  │  │  ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────────┐    │  │ │
-│  │  │  │ engine  │ │insights │ │autohealing│ │smart_alerts│    │  │ │
-│  │  │  └─────────┘ └─────────┘ └─────────┘ └─────────────┘    │  │ │
-│  │  │  ┌─────────┐                                             │  │ │
-│  │  │  │ copilot │                                             │  │ │
-│  │  │  └─────────┘                                             │  │ │
-│  │  └─────────────────────────────────────────────────────────┘  │ │
-│  │                                                                │ │
-│  │  ┌─────────────────────────────────────────────────────────┐  │ │
-│  │  │                    internal/auth                         │  │ │
-│  │  │  ┌───────────────┐  ┌───────────────┐                   │  │ │
-│  │  │  │   handlers    │  │  middleware   │                   │  │ │
-│  │  │  │ • login       │  │ • JWT verify  │                   │  │ │
-│  │  │  │ • logout      │  │ • RBAC check  │                   │  │ │
-│  │  │  └───────────────┘  └───────────────┘                   │  │ │
-│  │  └─────────────────────────────────────────────────────────┘  │ │
-│  │                                                                │ │
-│  │  ┌─────────────────────────────────────────────────────────┐  │ │
-│  │  │                    internal/otel                         │  │ │
-│  │  │  ┌───────────────────────────────────────────────────┐  │  │ │
-│  │  │  │                 otlp_receiver                      │  │  │ │
-│  │  │  │  • gRPC server (4317)                              │  │  │ │
-│  │  │  │  • HTTP server (4318)                              │  │  │ │
-│  │  │  │  • Trace/Metric/Log processors                     │  │  │ │
-│  │  │  └───────────────────────────────────────────────────┘  │  │ │
-│  │  └─────────────────────────────────────────────────────────┘  │ │
-│  │                                                                │ │
-│  └───────────────────────────────────────────────────────────────┘ │
-└─────────────────────────────────────────────────────────────────────┘
-```
-
-## Stack Tecnológico
-
-```
-┌─────────────────────────────────────────────────────────────────┐
-│                        STACK TECNOLÓGICO                         │
-├─────────────────────────────────────────────────────────────────┤
-│                                                                 │
-│  ┌─────────────────────────────────────────────────────────┐   │
-│  │                      BACKEND                             │   │
-│  │  ┌───────────┐ ┌───────────┐ ┌───────────┐              │   │
-│  │  │    Go     │ │  OpenAI   │ │   OTEL    │              │   │
-│  │  │   1.22+   │ │   GPT-4   │ │  Protocol │              │   │
-│  │  └───────────┘ └───────────┘ └───────────┘              │   │
-│  └─────────────────────────────────────────────────────────┘   │
-│                                                                 │
-│  ┌─────────────────────────────────────────────────────────┐   │
-│  │                      FRONTEND                            │   │
-│  │  ┌───────────┐ ┌───────────┐ ┌───────────┐ ┌─────────┐  │   │
-│  │  │  Next.js  │ │  React    │ │ Tailwind  │ │Recharts │  │   │
-│  │  │   14.1    │ │    18     │ │           │ │ D3.js   │  │   │
-│  │  └───────────┘ └───────────┘ └───────────┘ └─────────┘  │   │
-│  │  ┌───────────┐ ┌───────────┐                            │   │
-│  │  │ Radix UI  │ │  TanStack │                            │   │
-│  │  │           │ │   Query   │                            │   │
-│  │  └───────────┘ └───────────┘                            │   │
-│  └─────────────────────────────────────────────────────────┘   │
-│                                                                 │
-│  ┌─────────────────────────────────────────────────────────┐   │
-│  │                    INFRASTRUCTURE                        │   │
-│  │  ┌───────────┐ ┌───────────┐ ┌───────────┐ ┌─────────┐  │   │
-│  │  │  Docker   │ │PostgreSQL │ │InfluxDB/  │ │  Nginx  │  │   │
-│  │  │ Compose   │ │           │ │ClickHouse │ │(reverse)│  │   │
-│  │  └───────────┘ └───────────┘ └───────────┘ └─────────┘  │   │
-│  └─────────────────────────────────────────────────────────┘   │
-│                                                                 │
-└─────────────────────────────────────────────────────────────────┘
-```
-
-## Deploy Architecture
-
-```
-┌─────────────────────────────────────────────────────────────────────────────┐
-│                         DOCKER COMPOSE DEPLOYMENT                            │
-├─────────────────────────────────────────────────────────────────────────────┤
-│                                                                             │
-│  ┌─────────────────────────────────────────────────────────────────────┐   │
-│  │                         CONTAINERS                                   │   │
-│  │                                                                      │   │
-│  │  ┌─────────────────┐  ┌─────────────────┐  ┌─────────────────┐      │   │
-│  │  │  ophion-server  │  │ophion-dashboard │  │  ophion-agent   │      │   │
-│  │  │   (Go API)      │  │   (Next.js)     │  │   (Collector)   │      │   │
-│  │  │   Port 8080     │  │   Port 3000     │  │  4317/4318      │      │   │
-│  │  └─────────────────┘  └─────────────────┘  └─────────────────┘      │   │
-│  │                                                                      │   │
-│  │  ┌─────────────────┐  ┌─────────────────┐                           │   │
-│  │  │   postgresql    │  │    influxdb     │                           │   │
-│  │  │   Port 5432     │  │   Port 8086     │                           │   │
-│  │  └─────────────────┘  └─────────────────┘                           │   │
-│  │                                                                      │   │
-│  └─────────────────────────────────────────────────────────────────────┘   │
-│                                                                             │
-│  ┌─────────────────────────────────────────────────────────────────────┐   │
-│  │                          VOLUMES                                     │   │
-│  │  • postgres_data    — Database persistence                           │   │
-│  │  • influx_data      — Time series data                               │   │
-│  │  • ophion_config    — Configuration files                            │   │
-│  └─────────────────────────────────────────────────────────────────────┘   │
-│                                                                             │
-│  ┌─────────────────────────────────────────────────────────────────────┐   │
-│  │                          NETWORKS                                    │   │
-│  │  • ophion_net — Internal communication                               │   │
-│  │  • External ports: 3000 (UI), 8080 (API), 4317/4318 (OTLP)          │   │
-│  └─────────────────────────────────────────────────────────────────────┘   │
-│                                                                             │
-└─────────────────────────────────────────────────────────────────────────────┘
-```
-
-## Fluxo de Auto-Instrumentação
-
-```
-┌─────────────┐     ┌─────────────┐     ┌─────────────┐     ┌─────────────┐
-│   Run       │────▶│   Detect    │────▶│   Inject    │────▶│   Config    │
-│ instrument.sh│     │  Language   │     │  OTEL SDK   │     │  Exporter   │
-└─────────────┘     └─────────────┘     └─────────────┘     └──────┬──────┘
-                                                                   │
-      ┌────────────────────────────────────────────────────────────┘
-      │
-      ▼
-┌─────────────┐     ┌─────────────┐     ┌─────────────┐
-│   Restart   │────▶│   Verify    │────▶│   Ready     │
-│   Container │     │   Telemetry │     │   ✓         │
-└─────────────┘     └─────────────┘     └─────────────┘
-```
-
---
-
-*Diagrama de arquitetura gerado em 2026-02-09*
--- a/docs/ARQUITETURA.pdf
+++ b/docs/ARQUITETURA.pdf
--- a/docs/Manual-Tecnico.md
+++ b/docs/Manual-Tecnico.md
@@ -1,262 +0,0 @@
-# Manual Técnico - OPHION
-
-## 1. Visão Geral
-
-OPHION é uma plataforma open source de observabilidade que combina métricas, logs e traces com inteligência artificial para monitoramento proativo de infraestrutura.
-
-## 2. Stack Tecnológico
-
-| Camada | Tecnologia | Versão |
-|--------|------------|--------|
-| Backend | Go | 1.22+ |
-| Frontend | Next.js | 14.1.0 |
-| UI Components | Radix UI | Latest |
-| Gráficos | Recharts, Chart.js, D3 | Latest |
-| Estilização | TailwindCSS | Latest |
-| Observabilidade | OpenTelemetry | Latest |
-| IA | OpenAI API | GPT-4 |
-| Container | Docker Compose | Latest |
-
-## 3. Estrutura do Projeto
-
-```
-ophion/
-├── cmd/                      # Entry points
-│   ├── server/main.go        # API Server principal
-│   └── agent/main.go         # Agente de coleta
-├── internal/                 # Código interno
-│   ├── ai/                   # Módulos de IA
-│   │   ├── engine.go         # Motor de IA
-│   │   ├── insights.go       # Geração de insights
-│   │   ├── autohealing.go    # Auto-recuperação
-│   │   ├── smart_alerts.go   # Alertas inteligentes
-│   │   └── copilot.go        # Assistente IA
-│   ├── api/                  # Handlers da API
-│   │   ├── ai_handlers.go    # Endpoints de IA
-│   │   └── ratelimit.go      # Rate limiting
-│   ├── auth/                 # Autenticação
-│   │   ├── handlers.go
-│   │   └── middleware.go
-│   ├── otel/                 # OpenTelemetry
-│   │   └── otlp_receiver.go  # Receptor OTLP
-│   └── security/             # Segurança
-│       └── security.go
-├── dashboard/                # Frontend Next.js
-│   ├── app/                  # App Router
-│   ├── components/           # Componentes React
-│   └── package.json
-├── deploy/                   # Configurações de deploy
-│   ├── docker/
-│   └── remote-agent/
-├── examples/                 # Exemplos de instrumentação
-│   ├── otel-nodejs/
-│   └── otel-python/
-├── configs/                  # Configurações
-├── web/                      # Assets web
-├── docker-compose.yml        # Orquestração
-├── instrument.sh             # Script de instrumentação
-├── install.sh                # Script de instalação
-├── go.mod / go.sum           # Dependências Go
-└── README.md
-```
-
-## 4. Componentes do Sistema
-
-### 4.1 API Server (Go)
- **Porta**: 8080
- **Função**: Backend principal, API REST, processamento de dados
- **Módulos**: Auth, AI, OTLP Receiver, Rate Limiting
-
-### 4.2 Dashboard (Next.js)
- **Porta**: 3000
- **Função**: Interface web, visualização de métricas, configuração
- **Tecnologias**: React 18, TanStack Query, Recharts, D3
-
-### 4.3 OTLP Receiver
- **Portas**: 4317 (gRPC), 4318 (HTTP)
- **Função**: Recepção de traces, métricas e logs via OpenTelemetry
-
-### 4.4 Agente
- **Função**: Coleta de dados em hosts remotos
- **Deploy**: Container ou binário standalone
-
-## 5. Módulos de IA
-
-### 5.1 AI Engine
- Motor central de processamento de IA
- Integração com OpenAI GPT-4
- Cache de respostas
-
-### 5.2 Insights
- Análise automática de padrões
- Detecção de anomalias
- Sugestões de otimização
-
-### 5.3 Auto-Healing
- Detecção de problemas
- Execução automática de ações corretivas
- Playbooks configuráveis
-
-### 5.4 Smart Alerts
- Correlação inteligente de alertas
- Redução de ruído
- Priorização automática
-
-### 5.5 Copilot
- Assistente interativo via chat
- Consultas em linguagem natural
- Geração de queries e dashboards
-
-## 6. API Endpoints
-
-### 6.1 Autenticação
-```
-POST /api/auth/login
-POST /api/auth/logout
-GET  /api/auth/me
-```
-
-### 6.2 Métricas
-```
-GET  /api/metrics
-GET  /api/metrics/:name
-POST /api/metrics/query
-```
-
-### 6.3 Traces
-```
-GET  /api/traces
-GET  /api/traces/:traceId
-POST /api/traces/search
-```
-
-### 6.4 Logs
-```
-GET  /api/logs
-POST /api/logs/query
-```
-
-### 6.5 IA
-```
-POST /api/ai/insights
-POST /api/ai/chat
-POST /api/ai/analyze
-POST /api/ai/autohealing/trigger
-```
-
-### 6.6 Alertas
-```
-GET  /api/alerts
-POST /api/alerts
-PUT  /api/alerts/:id
-DELETE /api/alerts/:id
-```
-
-## 7. Requisitos de Sistema
-
-### 7.1 Servidor
- CPU: 4 cores
- RAM: 8GB
- Disco: 100GB SSD
- SO: Linux (Ubuntu 22.04+, Debian 12+)
-
-### 7.2 Dependências
- Docker 24.0+
- Docker Compose 2.0+
- Go 1.22+ (para desenvolvimento)
- Node.js 20+ (para desenvolvimento)
-
-### 7.3 Rede
- Porta 3000: Dashboard
- Porta 8080: API
- Porta 4317: OTLP gRPC
- Porta 4318: OTLP HTTP
-
-## 8. Instalação e Deploy
-
-### 8.1 Quick Start (Docker)
-```bash
-git clone https://github.com/bigtux/ophion.git
-cd ophion
-docker compose up -d
-```
-
-### 8.2 Instalação Completa
-```bash
-# Download e instalação
-curl -fsSL https://ophion.io/install.sh | bash
-
-# Configuração
-cp .env.example .env
-vim .env
-
-# Iniciar
-docker compose up -d
-```
-
-### 8.3 Variáveis de Ambiente
-```env
-# API
-API_PORT=8080
-API_SECRET=your-secret-key
-
-# OpenAI
-OPENAI_API_KEY=sk-xxx
-
-# Database
-DB_HOST=localhost
-DB_PORT=5432
-DB_NAME=ophion
-
-# OTLP
-OTLP_GRPC_PORT=4317
-OTLP_HTTP_PORT=4318
-```
-
-## 9. Instrumentação de Aplicações
-
-### 9.1 Script Universal
-```bash
-# Auto-detecta linguagem
-./instrument.sh my-container
-
-# Linguagem específica
-./instrument.sh my-container nodejs
-./instrument.sh my-container python
-./instrument.sh my-container java
-./instrument.sh my-container dotnet
-```
-
-### 9.2 Linguagens Suportadas
- Node.js (auto-instrumentation)
- Python (auto-instrumentation)
- Java (agent)
- .NET (auto-instrumentation)
- Go (manual/SDK)
-
-## 10. Segurança
-
-### 10.1 Autenticação
- JWT tokens
- Session management
- Role-based access control
-
-### 10.2 Comunicação
- HTTPS obrigatório em produção
- TLS para OTLP
- API rate limiting
-
-### 10.3 Dados
- Encryption at rest (opcional)
- Audit logging
- Data retention policies
-
-## 11. Monitoramento da Própria Plataforma
-
- Health checks em todos os componentes
- Self-monitoring dashboard
- Alertas de sistema
-
---
-
-*Documento gerado automaticamente em 2026-02-09*
--- a/docs/Manual-Tecnico.pdf
+++ b/docs/Manual-Tecnico.pdf
--- a/docs/Manual-Vendas.md
+++ b/docs/Manual-Vendas.md
@@ -1,189 +0,0 @@
-# Manual de Vendas - OPHION
-
-## 1. O Produto
-
-**OPHION** é uma plataforma de observabilidade open source potencializada por IA que unifica métricas, logs e traces com capacidades de auto-healing e assistente inteligente.
-
-## 2. Proposta de Valor
-
-### O Problema
-Equipes de infraestrutura e DevOps enfrentam:
- Ferramentas de observabilidade fragmentadas e caras
- Alertas em excesso sem priorização
- Diagnóstico manual e demorado de incidentes
- Falta de visibilidade end-to-end
- Custos crescentes com soluções enterprise
-
-### A Solução OPHION
-Uma plataforma **open source** que oferece:
- Métricas, logs e traces em um lugar
- IA que analisa e sugere soluções
- Auto-healing para problemas conhecidos
- Alertas inteligentes com correlação
- Copilot para consultas em linguagem natural
-
-## 3. Público-Alvo
-
-### 3.1 Startups e Scale-ups
- Precisam de observabilidade enterprise
- Orçamento limitado
- Equipe DevOps pequena
- **Valor**: Enterprise features sem custo enterprise
-
-### 3.2 Empresas de Médio Porte
- Infraestrutura crescente
- Múltiplos serviços e times
- Precisam de padronização
- **Valor**: Consolidação e governança
-
-### 3.3 Enterprises
- Compliance e controle de dados
- Self-hosted obrigatório
- Integração com sistemas legados
- **Valor**: Open source + suporte enterprise
-
-### 3.4 MSPs e Consultorias
- Gerenciam múltiplos clientes
- Precisam de multi-tenancy
- White-label desejável
- **Valor**: Plataforma customizável
-
-## 4. Funcionalidades Principais
-
-### 📊 Observabilidade Unificada
-Métricas, logs e traces em uma única plataforma com correlação automática e visualizações integradas.
-
-### 🤖 AI-Powered Insights
-IA analisa seus dados e gera insights acionáveis, detecta anomalias e prevê problemas antes que aconteçam.
-
-### 🔧 Auto-Healing
-Configure playbooks de auto-recuperação. OPHION detecta problemas e executa ações corretivas automaticamente.
-
-### 🚨 Smart Alerts
-Alertas inteligentes que correlacionam eventos, reduzem ruído e priorizam o que realmente importa.
-
-### 💬 Copilot
-Assistente de IA para consultas em linguagem natural. Pergunte "Por que o serviço X está lento?" e receba análise completa.
-
-### 🔌 Auto-Instrumentação
-Script universal que instrumenta aplicações automaticamente. Suporte a Node.js, Python, Java, .NET.
-
-### 📈 Dashboards Customizáveis
-Crie dashboards personalizados com drag-and-drop, templates prontos e compartilhamento de visualizações.
-
-### 🔗 OpenTelemetry Nativo
-Compatível com o padrão OpenTelemetry, integre qualquer aplicação ou ferramenta do ecossistema OTEL.
-
-## 5. Diferenciais Competitivos
-
-| Capacidade | OPHION | Datadog | Grafana | New Relic |
-|------------|--------|---------|---------|-----------|
-| Métricas/Logs/Traces | ✅ | ✅ | ✅ | ✅ |
-| **Copilot IA** | ✅ | ❌ | ❌ | ❌ |
-| **Auto-Healing** | ✅ | ❌ | ❌ | ❌ |
-| **Correlação IA** | ✅ | 💰 | ❌ | 💰 |
-| **Previsões** | ✅ | 💰 | ❌ | 💰 |
-| Open Source | ✅ | ❌ | ✅ | ❌ |
-| **Auto-Instrumentação** | ✅ | 💰 | ❌ | 💰 |
-| Self-Hosted | ✅ | ❌ | ✅ | ❌ |
-
-## 6. Benefícios por Perfil
-
-### Para SREs e DevOps
- ✅ MTTR reduzido com diagnóstico por IA
- ✅ Menos alertas, mais contexto
- ✅ Auto-healing para problemas recorrentes
- ✅ Consultas em linguagem natural
-
-### Para CTOs e VPs de Engineering
- ✅ Custos de observabilidade controlados
- ✅ Dados sob controle (self-hosted)
- ✅ Vendor lock-in zero
- ✅ Compliance facilitado
-
-### Para Desenvolvedores
- ✅ Tracing distribuído sem configuração
- ✅ Debug de performance simplificado
- ✅ Correlação código-infraestrutura
- ✅ Copilot para dúvidas técnicas
-
-### Para Gestores
- ✅ SLAs monitorados automaticamente
- ✅ Relatórios de disponibilidade
- ✅ Previsão de capacidade
- ✅ TCO otimizado
-
-## 7. Casos de Uso
-
-### Caso 1: Fintech
-> "Migramos do Datadog para OPHION e economizamos 80% no custo de observabilidade. O auto-healing reduziu nosso MTTR de 45min para 8min."
-
-### Caso 2: E-commerce
-> "Durante a Black Friday, o Copilot identificou um gargalo em microsserviço antes de impactar usuários. Prevenimos uma queda que custaria milhões."
-
-### Caso 3: SaaS B2B
-> "Compliance exigia dados on-premise. Com OPHION temos observabilidade enterprise rodando no nosso próprio datacenter."
-
-## 8. Modelos de Uso
-
-### Open Source (Gratuito)
- Todas as funcionalidades
- Comunidade para suporte
- Self-managed
- Sem limites de dados
-
-### Enterprise (Suporte)
- Suporte 24/7
- SLA garantido
- Consultoria de implantação
- Updates prioritários
- Custom features
-
-## 9. Comparativo de TCO (3 anos)
-
-| Item | OPHION | Datadog | New Relic |
-|------|--------|---------|-----------|
-| Licença | $0 | $$$$ | $$$$ |
-| Infraestrutura | $$ | Incluso | Incluso |
-| Suporte (opcional) | $ | Incluso | Incluso |
-| **Total** | **$$** | **$$$$** | **$$$$** |
-| **Economia** | **-** | **70-80%** | **70-80%** |
-
-## 10. Integrações
-
-### Instrumentação
- ✅ Node.js, Python, Java, .NET, Go
- ✅ Kubernetes, Docker
- ✅ AWS, GCP, Azure
-
-### Alertas
- ✅ Slack, Teams, Discord
- ✅ PagerDuty, OpsGenie
- ✅ Email, SMS, Webhooks
-
-### Dados
- ✅ Prometheus, InfluxDB
- ✅ Elasticsearch, Loki
- ✅ Jaeger, Zipkin
-
-## 11. Quick Start
-
-```bash
-# Instalação em 1 comando
-git clone https://github.com/bigtux/ophion.git && cd ophion && docker compose up -d
-
-# Acesse em 2 minutos
-# Dashboard: http://localhost:3000
-# API: http://localhost:8080
-```
-
-## 12. Suporte e Comunidade
-
- 📚 Documentação completa
- 💬 Discord da comunidade
- 🐛 GitHub Issues
- 📧 Suporte enterprise (opcional)
-
---
-
-*OPHION — Observabilidade Inteligente, Open Source*
--- a/docs/Manual-Vendas.pdf
+++ b/docs/Manual-Vendas.pdf
--- a/internal/aiapm/collector.go
+++ b/internal/aiapm/collector.go
@@ -0,0 +1,109 @@
+package aiapm
+
+import (
+	"database/sql"
+	"log"
+	"time"
+
+	"github.com/google/uuid"
+)
+
+// Collector receives AI call records and writes them to the database asynchronously
+type Collector struct {
+	db     *sql.DB
+	ch     chan AICallRecord
+	done   chan struct{}
+}
+
+// NewCollector creates a new Collector with a buffered channel and background writer
+func NewCollector(db *sql.DB, bufferSize int) *Collector {
+	if bufferSize <= 0 {
+		bufferSize = 1000
+	}
+	c := &Collector{
+		db:   db,
+		ch:   make(chan AICallRecord, bufferSize),
+		done: make(chan struct{}),
+	}
+	go c.backgroundWriter()
+	return c
+}
+
+// Collect validates and enqueues a record for async storage
+func (c *Collector) Collect(r AICallRecord) {
+	if r.ID == "" {
+		r.ID = uuid.New().String()
+	}
+	if r.Timestamp.IsZero() {
+		r.Timestamp = time.Now()
+	}
+	if r.Status == "" {
+		r.Status = "success"
+	}
+	// Estimate cost if not provided
+	if r.EstimatedCost == 0 && (r.TokensIn > 0 || r.TokensOut > 0) {
+		r.EstimatedCost = EstimateCost(r.Vendor, r.Model, r.TokensIn, r.TokensOut, r.TokensCacheRead, r.TokensCacheWrite)
+	}
+
+	select {
+	case c.ch <- r:
+	default:
+		// Channel full — write synchronously to avoid data loss
+		if err := InsertCall(c.db, r); err != nil {
+			log.Printf("ai-apm: sync insert error: %v", err)
+		}
+	}
+}
+
+// CollectBatch validates and enqueues multiple records
+func (c *Collector) CollectBatch(records []AICallRecord) {
+	for i := range records {
+		c.Collect(records[i])
+	}
+}
+
+// Stop gracefully stops the background writer
+func (c *Collector) Stop() {
+	close(c.ch)
+	<-c.done
+}
+
+func (c *Collector) backgroundWriter() {
+	defer close(c.done)
+
+	batch := make([]AICallRecord, 0, 100)
+	ticker := time.NewTicker(500 * time.Millisecond)
+	defer ticker.Stop()
+
+	flush := func() {
+		if len(batch) == 0 {
+			return
+		}
+		if err := InsertCallBatch(c.db, batch); err != nil {
+			log.Printf("ai-apm: batch insert error (%d records): %v", len(batch), err)
+			// Fallback: insert one by one
+			for _, r := range batch {
+				if err := InsertCall(c.db, r); err != nil {
+					log.Printf("ai-apm: single insert error: %v", err)
+				}
+			}
+		}
+		batch = batch[:0]
+	}
+
+	for {
+		select {
+		case r, ok := <-c.ch:
+			if !ok {
+				flush()
+				return
+			}
+			batch = append(batch, r)
+			if len(batch) >= 100 {
+				flush()
+			}
+		case <-ticker.C:
+			flush()
+		}
+	}
+}
--- a/internal/aiapm/pricing.go
+++ b/internal/aiapm/pricing.go
@@ -0,0 +1,68 @@
+package aiapm
+
+// ModelPricing holds per-1M-token pricing for a model
+type ModelPricing struct {
+	InputPer1M      float64 `json:"input_per_1m"`
+	OutputPer1M     float64 `json:"output_per_1m"`
+	CacheReadPer1M  float64 `json:"cache_read_per_1m"`
+	CacheWritePer1M float64 `json:"cache_write_per_1m"`
+}
+
+// PricingTable maps "vendor/model" to pricing. Prices in USD per 1M tokens.
+var PricingTable = map[string]ModelPricing{
+	// Anthropic
+	"anthropic/claude-opus-4":          {InputPer1M: 15.0, OutputPer1M: 75.0, CacheReadPer1M: 1.5, CacheWritePer1M: 18.75},
+	"anthropic/claude-sonnet-4":        {InputPer1M: 3.0, OutputPer1M: 15.0, CacheReadPer1M: 0.3, CacheWritePer1M: 3.75},
+	"anthropic/claude-3.5-sonnet":      {InputPer1M: 3.0, OutputPer1M: 15.0, CacheReadPer1M: 0.3, CacheWritePer1M: 3.75},
+	"anthropic/claude-3.5-haiku":       {InputPer1M: 0.8, OutputPer1M: 4.0, CacheReadPer1M: 0.08, CacheWritePer1M: 1.0},
+	"anthropic/claude-3-haiku":         {InputPer1M: 0.25, OutputPer1M: 1.25, CacheReadPer1M: 0.03, CacheWritePer1M: 0.3},
+
+	// OpenAI
+	"openai/gpt-4o":                    {InputPer1M: 2.5, OutputPer1M: 10.0, CacheReadPer1M: 1.25, CacheWritePer1M: 2.5},
+	"openai/gpt-4o-mini":               {InputPer1M: 0.15, OutputPer1M: 0.6, CacheReadPer1M: 0.075, CacheWritePer1M: 0.15},
+	"openai/o1":                        {InputPer1M: 15.0, OutputPer1M: 60.0, CacheReadPer1M: 7.5, CacheWritePer1M: 15.0},
+	"openai/o1-mini":                   {InputPer1M: 3.0, OutputPer1M: 12.0, CacheReadPer1M: 1.5, CacheWritePer1M: 3.0},
+	"openai/o3":                        {InputPer1M: 10.0, OutputPer1M: 40.0, CacheReadPer1M: 5.0, CacheWritePer1M: 10.0},
+	"openai/o3-mini":                   {InputPer1M: 1.1, OutputPer1M: 4.4, CacheReadPer1M: 0.55, CacheWritePer1M: 1.1},
+
+	// Google
+	"google/gemini-2.5-pro":            {InputPer1M: 1.25, OutputPer1M: 10.0, CacheReadPer1M: 0.315, CacheWritePer1M: 1.25},
+	"google/gemini-2.5-flash":          {InputPer1M: 0.15, OutputPer1M: 0.6, CacheReadPer1M: 0.0375, CacheWritePer1M: 0.15},
+	"google/gemini-2.0-flash":          {InputPer1M: 0.1, OutputPer1M: 0.4, CacheReadPer1M: 0.025, CacheWritePer1M: 0.1},
+
+	// Mistral
+	"mistral/mistral-large":            {InputPer1M: 2.0, OutputPer1M: 6.0, CacheReadPer1M: 2.0, CacheWritePer1M: 2.0},
+	"mistral/mistral-small":            {InputPer1M: 0.1, OutputPer1M: 0.3, CacheReadPer1M: 0.1, CacheWritePer1M: 0.1},
+	"mistral/codestral":                {InputPer1M: 0.3, OutputPer1M: 0.9, CacheReadPer1M: 0.3, CacheWritePer1M: 0.3},
+
+	// DeepSeek
+	"deepseek/deepseek-chat":           {InputPer1M: 0.14, OutputPer1M: 0.28, CacheReadPer1M: 0.014, CacheWritePer1M: 0.14},
+	"deepseek/deepseek-reasoner":       {InputPer1M: 0.55, OutputPer1M: 2.19, CacheReadPer1M: 0.055, CacheWritePer1M: 0.55},
+
+	// Groq (hosted models — pricing approximate)
+	"groq/llama-3.3-70b":               {InputPer1M: 0.59, OutputPer1M: 0.79, CacheReadPer1M: 0.59, CacheWritePer1M: 0.59},
+	"groq/llama-3.1-8b":                {InputPer1M: 0.05, OutputPer1M: 0.08, CacheReadPer1M: 0.05, CacheWritePer1M: 0.05},
+	"groq/gemma2-9b":                   {InputPer1M: 0.2, OutputPer1M: 0.2, CacheReadPer1M: 0.2, CacheWritePer1M: 0.2},
+}
+
+// EstimateCost calculates the estimated cost in USD for an AI call
+func EstimateCost(vendor, model string, tokensIn, tokensOut, cacheRead, cacheWrite int) float64 {
+	key := vendor + "/" + model
+	pricing, ok := PricingTable[key]
+	if !ok {
+		// Fallback: try just the model name with vendor prefix variations
+		return 0
+	}
+
+	cost := float64(tokensIn) * pricing.InputPer1M / 1_000_000
+	cost += float64(tokensOut) * pricing.OutputPer1M / 1_000_000
+	cost += float64(cacheRead) * pricing.CacheReadPer1M / 1_000_000
+	cost += float64(cacheWrite) * pricing.CacheWritePer1M / 1_000_000
+
+	return cost
+}
+
+// GetPricingTable returns the full pricing table (for the API endpoint)
+func GetPricingTable() map[string]ModelPricing {
+	return PricingTable
+}
--- a/internal/aiapm/store.go
+++ b/internal/aiapm/store.go
@@ -0,0 +1,349 @@
+package aiapm
+
+import (
+	"database/sql"
+	"encoding/json"
+	"fmt"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/google/uuid"
+)
+
+// CreateTable creates the ai_calls table and indexes
+func CreateTable(db *sql.DB) error {
+	schema := `
+	CREATE TABLE IF NOT EXISTS ai_calls (
+		id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+		timestamp TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+		service_name VARCHAR(255) NOT NULL,
+		project_id VARCHAR(255) NOT NULL DEFAULT '',
+		vendor VARCHAR(100) NOT NULL,
+		model VARCHAR(255) NOT NULL,
+		tokens_in INT NOT NULL DEFAULT 0,
+		tokens_out INT NOT NULL DEFAULT 0,
+		tokens_cache_read INT NOT NULL DEFAULT 0,
+		tokens_cache_write INT NOT NULL DEFAULT 0,
+		estimated_cost DOUBLE PRECISION NOT NULL DEFAULT 0,
+		latency_ms INT NOT NULL DEFAULT 0,
+		ttfb_ms INT NOT NULL DEFAULT 0,
+		status VARCHAR(20) NOT NULL DEFAULT 'success',
+		error_message TEXT,
+		stream BOOLEAN NOT NULL DEFAULT FALSE,
+		cached BOOLEAN NOT NULL DEFAULT FALSE,
+		tags JSONB
+	);
+
+	CREATE INDEX IF NOT EXISTS idx_ai_calls_timestamp ON ai_calls(timestamp DESC);
+	CREATE INDEX IF NOT EXISTS idx_ai_calls_service ON ai_calls(service_name);
+	CREATE INDEX IF NOT EXISTS idx_ai_calls_vendor ON ai_calls(vendor);
+	CREATE INDEX IF NOT EXISTS idx_ai_calls_model ON ai_calls(model);
+	CREATE INDEX IF NOT EXISTS idx_ai_calls_project ON ai_calls(project_id);
+	CREATE INDEX IF NOT EXISTS idx_ai_calls_status ON ai_calls(status);
+	CREATE INDEX IF NOT EXISTS idx_ai_calls_vendor_model ON ai_calls(vendor, model);
+	`
+	_, err := db.Exec(schema)
+	return err
+}
+
+// InsertCall inserts a single AI call record
+func InsertCall(db *sql.DB, r AICallRecord) error {
+	if r.ID == "" {
+		r.ID = uuid.New().String()
+	}
+	if r.Timestamp.IsZero() {
+		r.Timestamp = time.Now()
+	}
+
+	tags, _ := json.Marshal(r.Tags)
+
+	_, err := db.Exec(`
+		INSERT INTO ai_calls (id, timestamp, service_name, project_id, vendor, model,
+			tokens_in, tokens_out, tokens_cache_read, tokens_cache_write,
+			estimated_cost, latency_ms, ttfb_ms, status, error_message, stream, cached, tags)
+		VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18)`,
+		r.ID, r.Timestamp, r.ServiceName, r.ProjectID, r.Vendor, r.Model,
+		r.TokensIn, r.TokensOut, r.TokensCacheRead, r.TokensCacheWrite,
+		r.EstimatedCost, r.LatencyMs, r.TTFBMs, r.Status, r.ErrorMessage,
+		r.Stream, r.Cached, tags)
+	return err
+}
+
+// InsertCallBatch inserts multiple AI call records in a single transaction
+func InsertCallBatch(db *sql.DB, records []AICallRecord) error {
+	if len(records) == 0 {
+		return nil
+	}
+
+	tx, err := db.Begin()
+	if err != nil {
+		return err
+	}
+	defer tx.Rollback()
+
+	stmt, err := tx.Prepare(`
+		INSERT INTO ai_calls (id, timestamp, service_name, project_id, vendor, model,
+			tokens_in, tokens_out, tokens_cache_read, tokens_cache_write,
+			estimated_cost, latency_ms, ttfb_ms, status, error_message, stream, cached, tags)
+		VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18)`)
+	if err != nil {
+		return err
+	}
+	defer stmt.Close()
+
+	for _, r := range records {
+		if r.ID == "" {
+			r.ID = uuid.New().String()
+		}
+		if r.Timestamp.IsZero() {
+			r.Timestamp = time.Now()
+		}
+		tags, _ := json.Marshal(r.Tags)
+
+		_, err := stmt.Exec(
+			r.ID, r.Timestamp, r.ServiceName, r.ProjectID, r.Vendor, r.Model,
+			r.TokensIn, r.TokensOut, r.TokensCacheRead, r.TokensCacheWrite,
+			r.EstimatedCost, r.LatencyMs, r.TTFBMs, r.Status, r.ErrorMessage,
+			r.Stream, r.Cached, tags)
+		if err != nil {
+			return err
+		}
+	}
+
+	return tx.Commit()
+}
+
+// buildWhereClause constructs WHERE clause from filter
+func buildWhereClause(f AICallFilter, startArg int) (string, []any) {
+	var conditions []string
+	var args []any
+	n := startArg
+
+	if !f.From.IsZero() {
+		conditions = append(conditions, "timestamp >= $"+strconv.Itoa(n))
+		args = append(args, f.From)
+		n++
+	}
+	if !f.To.IsZero() {
+		conditions = append(conditions, "timestamp <= $"+strconv.Itoa(n))
+		args = append(args, f.To)
+		n++
+	}
+	if f.ServiceName != "" {
+		conditions = append(conditions, "service_name = $"+strconv.Itoa(n))
+		args = append(args, f.ServiceName)
+		n++
+	}
+	if f.ProjectID != "" {
+		conditions = append(conditions, "project_id = $"+strconv.Itoa(n))
+		args = append(args, f.ProjectID)
+		n++
+	}
+	if f.Vendor != "" {
+		conditions = append(conditions, "vendor = $"+strconv.Itoa(n))
+		args = append(args, f.Vendor)
+		n++
+	}
+	if f.Model != "" {
+		conditions = append(conditions, "model = $"+strconv.Itoa(n))
+		args = append(args, f.Model)
+		n++
+	}
+	if f.Status != "" {
+		conditions = append(conditions, "status = $"+strconv.Itoa(n))
+		args = append(args, f.Status)
+		n++
+	}
+
+	if len(conditions) == 0 {
+		return "", args
+	}
+	return " WHERE " + strings.Join(conditions, " AND "), args
+}
+
+// QueryCalls queries AI call records with filters
+func QueryCalls(db *sql.DB, filter AICallFilter) ([]AICallRecord, error) {
+	where, args := buildWhereClause(filter, 1)
+
+	limit := filter.Limit
+	if limit <= 0 {
+		limit = 100
+	}
+	offset := filter.Offset
+
+	q := `SELECT id, timestamp, service_name, project_id, vendor, model,
+		tokens_in, tokens_out, tokens_cache_read, tokens_cache_write,
+		estimated_cost, latency_ms, ttfb_ms, status, COALESCE(error_message,''),
+		stream, cached, COALESCE(tags::text,'{}')
+		FROM ai_calls` + where + ` ORDER BY timestamp DESC LIMIT ` +
+		strconv.Itoa(limit) + ` OFFSET ` + strconv.Itoa(offset)
+
+	rows, err := db.Query(q, args...)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+
+	var records []AICallRecord
+	for rows.Next() {
+		var r AICallRecord
+		var tagsJSON string
+		if err := rows.Scan(&r.ID, &r.Timestamp, &r.ServiceName, &r.ProjectID,
+			&r.Vendor, &r.Model, &r.TokensIn, &r.TokensOut,
+			&r.TokensCacheRead, &r.TokensCacheWrite, &r.EstimatedCost,
+			&r.LatencyMs, &r.TTFBMs, &r.Status, &r.ErrorMessage,
+			&r.Stream, &r.Cached, &tagsJSON); err != nil {
+			continue
+		}
+		_ = json.Unmarshal([]byte(tagsJSON), &r.Tags)
+		records = append(records, r)
+	}
+	return records, rows.Err()
+}
+
+// GetUsageSummary returns aggregated usage statistics
+func GetUsageSummary(db *sql.DB, filter AICallFilter) (*AIUsageSummary, error) {
+	where, args := buildWhereClause(filter, 1)
+
+	q := `SELECT
+		COUNT(*),
+		COALESCE(SUM(tokens_in),0),
+		COALESCE(SUM(tokens_out),0),
+		COALESCE(SUM(tokens_cache_read),0),
+		COALESCE(SUM(tokens_cache_write),0),
+		COALESCE(SUM(estimated_cost),0),
+		COALESCE(AVG(latency_ms),0),
+		COALESCE(AVG(ttfb_ms),0),
+		COUNT(*) FILTER (WHERE status = 'error'),
+		COUNT(DISTINCT model),
+		COUNT(DISTINCT vendor),
+		COUNT(DISTINCT service_name)
+		FROM ai_calls` + where
+
+	s := &AIUsageSummary{}
+	err := db.QueryRow(q, args...).Scan(
+		&s.TotalCalls, &s.TotalTokensIn, &s.TotalTokensOut,
+		&s.TotalCacheRead, &s.TotalCacheWrite, &s.TotalCost,
+		&s.AvgLatencyMs, &s.AvgTTFBMs, &s.ErrorCount,
+		&s.UniqueModels, &s.UniqueVendors, &s.UniqueServices)
+	if err != nil {
+		return nil, err
+	}
+	if s.TotalCalls > 0 {
+		s.ErrorRate = float64(s.ErrorCount) / float64(s.TotalCalls)
+	}
+	// Cache hit rate
+	var cachedCount int
+	cq := `SELECT COUNT(*) FILTER (WHERE cached = true) FROM ai_calls` + where
+	if err := db.QueryRow(cq, args...).Scan(&cachedCount); err == nil && s.TotalCalls > 0 {
+		s.CacheHitRate = float64(cachedCount) / float64(s.TotalCalls)
+	}
+	return s, nil
+}
+
+// GetModelStats returns per-model statistics
+func GetModelStats(db *sql.DB, filter AICallFilter) ([]AIModelStats, error) {
+	where, args := buildWhereClause(filter, 1)
+
+	q := `SELECT vendor, model, COUNT(*),
+		COALESCE(SUM(tokens_in + tokens_out),0),
+		COALESCE(SUM(estimated_cost),0),
+		COALESCE(AVG(latency_ms),0),
+		COUNT(*) FILTER (WHERE status = 'error')
+		FROM ai_calls` + where + `
+		GROUP BY vendor, model ORDER BY SUM(estimated_cost) DESC`
+
+	rows, err := db.Query(q, args...)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+
+	var stats []AIModelStats
+	for rows.Next() {
+		var s AIModelStats
+		if err := rows.Scan(&s.Vendor, &s.Model, &s.TotalCalls,
+			&s.TotalTokens, &s.TotalCost, &s.AvgLatencyMs, &s.ErrorCount); err != nil {
+			continue
+		}
+		if s.TotalCalls > 0 {
+			s.ErrorRate = float64(s.ErrorCount) / float64(s.TotalCalls)
+		}
+		stats = append(stats, s)
+	}
+	return stats, rows.Err()
+}
+
+// GetVendorStats returns per-vendor statistics
+func GetVendorStats(db *sql.DB, filter AICallFilter) ([]AIVendorStats, error) {
+	where, args := buildWhereClause(filter, 1)
+
+	q := `SELECT vendor, COUNT(*),
+		COALESCE(SUM(tokens_in + tokens_out),0),
+		COALESCE(SUM(estimated_cost),0),
+		COALESCE(AVG(latency_ms),0),
+		COUNT(DISTINCT model),
+		COUNT(*) FILTER (WHERE status = 'error')
+		FROM ai_calls` + where + `
+		GROUP BY vendor ORDER BY SUM(estimated_cost) DESC`
+
+	rows, err := db.Query(q, args...)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+
+	var stats []AIVendorStats
+	for rows.Next() {
+		var s AIVendorStats
+		if err := rows.Scan(&s.Vendor, &s.TotalCalls, &s.TotalTokens,
+			&s.TotalCost, &s.AvgLatencyMs, &s.ModelCount, &s.ErrorCount); err != nil {
+			continue
+		}
+		if s.TotalCalls > 0 {
+			s.ErrorRate = float64(s.ErrorCount) / float64(s.TotalCalls)
+		}
+		stats = append(stats, s)
+	}
+	return stats, rows.Err()
+}
+
+// GetCostTimeseries returns cost aggregated over time intervals
+func GetCostTimeseries(db *sql.DB, filter AICallFilter, interval string) ([]TimeseriesPoint, error) {
+	// Validate interval
+	validIntervals := map[string]bool{"1h": true, "6h": true, "1d": true, "7d": true, "1m": true}
+	if !validIntervals[interval] {
+		interval = "1d"
+	}
+
+	// Map to PostgreSQL interval
+	pgInterval := map[string]string{
+		"1h": "1 hour", "6h": "6 hours", "1d": "1 day", "7d": "7 days", "1m": "1 month",
+	}[interval]
+
+	where, args := buildWhereClause(filter, 1)
+
+	q := fmt.Sprintf(`SELECT date_trunc('hour', timestamp) -
+		(EXTRACT(EPOCH FROM date_trunc('hour', timestamp))::int %%%% EXTRACT(EPOCH FROM interval '%s')::int) * interval '1 second' AS bucket,
+		COALESCE(SUM(estimated_cost),0),
+		COUNT(*)
+		FROM ai_calls%s
+		GROUP BY bucket ORDER BY bucket ASC`, pgInterval, where)
+
+	rows, err := db.Query(q, args...)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+
+	var points []TimeseriesPoint
+	for rows.Next() {
+		var p TimeseriesPoint
+		if err := rows.Scan(&p.Timestamp, &p.Value, &p.Count); err != nil {
+			continue
+		}
+		points = append(points, p)
+	}
+	return points, rows.Err()
+}
--- a/internal/aiapm/types.go
+++ b/internal/aiapm/types.go
@@ -0,0 +1,102 @@
+package aiapm
+
+import "time"
+
+// AICallRecord represents a single AI/LLM API call
+type AICallRecord struct {
+	ID               string            `json:"id"`
+	Timestamp        time.Time         `json:"timestamp"`
+	ServiceName      string            `json:"service_name"`
+	ProjectID        string            `json:"project_id"`
+	Vendor           string            `json:"vendor"`
+	Model            string            `json:"model"`
+	TokensIn         int               `json:"tokens_in"`
+	TokensOut        int               `json:"tokens_out"`
+	TokensCacheRead  int               `json:"tokens_cache_read"`
+	TokensCacheWrite int               `json:"tokens_cache_write"`
+	EstimatedCost    float64           `json:"estimated_cost"`
+	LatencyMs        int               `json:"latency_ms"`
+	TTFBMs           int               `json:"ttfb_ms"`
+	Status           string            `json:"status"`
+	ErrorMessage     string            `json:"error_message,omitempty"`
+	Stream           bool              `json:"stream"`
+	Cached           bool              `json:"cached"`
+	Tags             map[string]string `json:"tags,omitempty"`
+}
+
+// AICallFilter defines query filters for AI call records
+type AICallFilter struct {
+	ServiceName string    `json:"service_name"`
+	ProjectID   string    `json:"project_id"`
+	Vendor      string    `json:"vendor"`
+	Model       string    `json:"model"`
+	Status      string    `json:"status"`
+	From        time.Time `json:"from"`
+	To          time.Time `json:"to"`
+	Limit       int       `json:"limit"`
+	Offset      int       `json:"offset"`
+}
+
+// AIUsageSummary aggregated usage statistics
+type AIUsageSummary struct {
+	TotalCalls       int     `json:"total_calls"`
+	TotalTokensIn    int64   `json:"total_tokens_in"`
+	TotalTokensOut   int64   `json:"total_tokens_out"`
+	TotalCacheRead   int64   `json:"total_cache_read"`
+	TotalCacheWrite  int64   `json:"total_cache_write"`
+	TotalCost        float64 `json:"total_cost"`
+	AvgLatencyMs     float64 `json:"avg_latency_ms"`
+	AvgTTFBMs        float64 `json:"avg_ttfb_ms"`
+	ErrorCount       int     `json:"error_count"`
+	ErrorRate        float64 `json:"error_rate"`
+	CacheHitRate     float64 `json:"cache_hit_rate"`
+	UniqueModels     int     `json:"unique_models"`
+	UniqueVendors    int     `json:"unique_vendors"`
+	UniqueServices   int     `json:"unique_services"`
+}
+
+// AIModelStats per-model breakdown
+type AIModelStats struct {
+	Vendor       string  `json:"vendor"`
+	Model        string  `json:"model"`
+	TotalCalls   int     `json:"total_calls"`
+	TotalTokens  int64   `json:"total_tokens"`
+	TotalCost    float64 `json:"total_cost"`
+	AvgLatencyMs float64 `json:"avg_latency_ms"`
+	ErrorCount   int     `json:"error_count"`
+	ErrorRate    float64 `json:"error_rate"`
+}
+
+// AIVendorStats per-vendor breakdown
+type AIVendorStats struct {
+	Vendor       string  `json:"vendor"`
+	TotalCalls   int     `json:"total_calls"`
+	TotalTokens  int64   `json:"total_tokens"`
+	TotalCost    float64 `json:"total_cost"`
+	AvgLatencyMs float64 `json:"avg_latency_ms"`
+	ModelCount   int     `json:"model_count"`
+	ErrorCount   int     `json:"error_count"`
+	ErrorRate    float64 `json:"error_rate"`
+}
+
+// AICostBreakdown cost breakdown by dimension
+type AICostBreakdown struct {
+	Dimension string  `json:"dimension"` // vendor, model, service, project
+	Key       string  `json:"key"`
+	Cost      float64 `json:"cost"`
+	Calls     int     `json:"calls"`
+	Tokens    int64   `json:"tokens"`
+}
+
+// TimeseriesPoint a single point in a time series
+type TimeseriesPoint struct {
+	Timestamp time.Time `json:"timestamp"`
+	Value     float64   `json:"value"`
+	Count     int       `json:"count"`
+}
+
+// IngestRequest payload for the ingest endpoint
+type IngestRequest struct {
+	Call  *AICallRecord  `json:"call,omitempty"`
+	Calls []AICallRecord `json:"calls,omitempty"`
+}
--- a/internal/api/aiapm_handlers.go
+++ b/internal/api/aiapm_handlers.go
@@ -0,0 +1,142 @@
+package api
+
+import (
+	"database/sql"
+	"time"
+
+	"github.com/bigtux/ophion/internal/aiapm"
+	"github.com/gofiber/fiber/v2"
+)
+
+// AIAPMHandlers holds dependencies for AI APM route handlers
+type AIAPMHandlers struct {
+	db        *sql.DB
+	collector *aiapm.Collector
+}
+
+// RegisterAIAPMRoutes registers all AI APM routes on the given router
+func RegisterAIAPMRoutes(api fiber.Router, db *sql.DB) *aiapm.Collector {
+	collector := aiapm.NewCollector(db, 5000)
+	h := &AIAPMHandlers{db: db, collector: collector}
+
+	g := api.Group("/ai-apm")
+	g.Post("/ingest", h.Ingest)
+	g.Get("/summary", h.Summary)
+	g.Get("/models", h.Models)
+	g.Get("/vendors", h.Vendors)
+	g.Get("/costs", h.Costs)
+	g.Get("/calls", h.Calls)
+	g.Get("/pricing", h.Pricing)
+
+	return collector
+}
+
+// Ingest receives AI call records (single or batch)
+func (h *AIAPMHandlers) Ingest(c *fiber.Ctx) error {
+	var req aiapm.IngestRequest
+	if err := c.BodyParser(&req); err != nil {
+		return c.Status(400).JSON(fiber.Map{"error": "invalid request body: " + err.Error()})
+	}
+
+	count := 0
+	if req.Call != nil {
+		h.collector.Collect(*req.Call)
+		count = 1
+	}
+	if len(req.Calls) > 0 {
+		h.collector.CollectBatch(req.Calls)
+		count += len(req.Calls)
+	}
+
+	if count == 0 {
+		return c.Status(400).JSON(fiber.Map{"error": "no call records provided; use 'call' or 'calls' field"})
+	}
+
+	return c.JSON(fiber.Map{"status": "accepted", "count": count})
+}
+
+// parseFilter extracts common filter parameters from query string
+func parseFilter(c *fiber.Ctx) aiapm.AICallFilter {
+	f := aiapm.AICallFilter{
+		ServiceName: c.Query("service"),
+		ProjectID:   c.Query("project"),
+		Vendor:      c.Query("vendor"),
+		Model:       c.Query("model"),
+		Status:      c.Query("status"),
+	}
+	if from := c.Query("from"); from != "" {
+		if t, err := time.Parse(time.RFC3339, from); err == nil {
+			f.From = t
+		}
+	}
+	if to := c.Query("to"); to != "" {
+		if t, err := time.Parse(time.RFC3339, to); err == nil {
+			f.To = t
+		}
+	}
+	if f.From.IsZero() {
+		f.From = time.Now().Add(-24 * time.Hour)
+	}
+	if f.To.IsZero() {
+		f.To = time.Now()
+	}
+	f.Limit = c.QueryInt("limit", 100)
+	f.Offset = c.QueryInt("offset", 0)
+	return f
+}
+
+// Summary returns aggregated usage statistics
+func (h *AIAPMHandlers) Summary(c *fiber.Ctx) error {
+	filter := parseFilter(c)
+	summary, err := aiapm.GetUsageSummary(h.db, filter)
+	if err != nil {
+		return c.Status(500).JSON(fiber.Map{"error": err.Error()})
+	}
+	return c.JSON(summary)
+}
+
+// Models returns per-model statistics
+func (h *AIAPMHandlers) Models(c *fiber.Ctx) error {
+	filter := parseFilter(c)
+	stats, err := aiapm.GetModelStats(h.db, filter)
+	if err != nil {
+		return c.Status(500).JSON(fiber.Map{"error": err.Error()})
+	}
+	return c.JSON(fiber.Map{"models": stats})
+}
+
+// Vendors returns per-vendor statistics
+func (h *AIAPMHandlers) Vendors(c *fiber.Ctx) error {
+	filter := parseFilter(c)
+	stats, err := aiapm.GetVendorStats(h.db, filter)
+	if err != nil {
+		return c.Status(500).JSON(fiber.Map{"error": err.Error()})
+	}
+	return c.JSON(fiber.Map{"vendors": stats})
+}
+
+// Costs returns cost timeseries data
+func (h *AIAPMHandlers) Costs(c *fiber.Ctx) error {
+	filter := parseFilter(c)
+	interval := c.Query("interval", "1d")
+	points, err := aiapm.GetCostTimeseries(h.db, filter, interval)
+	if err != nil {
+		return c.Status(500).JSON(fiber.Map{"error": err.Error()})
+	}
+	return c.JSON(fiber.Map{"timeseries": points, "interval": interval})
+}
+
+// Calls returns recent AI call records (paginated)
+func (h *AIAPMHandlers) Calls(c *fiber.Ctx) error {
+	filter := parseFilter(c)
+	calls, err := aiapm.QueryCalls(h.db, filter)
+	if err != nil {
+		return c.Status(500).JSON(fiber.Map{"error": err.Error()})
+	}
+	return c.JSON(fiber.Map{"calls": calls, "count": len(calls)})
+}
+
+// Pricing returns the current pricing table
+func (h *AIAPMHandlers) Pricing(c *fiber.Ctx) error {
+	return c.JSON(fiber.Map{"pricing": aiapm.GetPricingTable()})
+}