feat: Add AI differentiators

🤖 OPHION Copilot - Interactive AI assistant for troubleshooting - Context-aware conversations - Actionable suggestions with commands 🔧 Auto-Healing - AI-powered incident analysis - Automatic remediation plans - Safe execution with dry-run mode 🚨 Smart Alerts - Noise reduction - Alert correlation - Root cause analysis - Impact assessment 📊 AI Insights - Daily insights generation - Security anomaly detection - Cost optimization suggestions - Capacity predictions - Executive reports 🛡️ Security - Behavioral anomaly detection - Intrusion attempt identification - Compliance monitoring
2026-02-05 22:48:10 -03:00
parent d58ac37e39
commit 369373b387
5 changed files with 1473 additions and 0 deletions
--- a/internal/ai/autohealing.go
+++ b/internal/ai/autohealing.go
@@ -0,0 +1,278 @@
+package ai
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log"
+	"time"
+)
+
+// AutoHealer sistema de auto-correção inteligente
+type AutoHealer struct {
+	engine      *AIEngine
+	enabled     bool
+	dryRun      bool
+	maxActions  int
+	cooldown    time.Duration
+	lastActions map[string]time.Time
+}
+
+// HealingAction ação de correção
+type HealingAction struct {
+	ID          string    `json:"id"`
+	Type        string    `json:"type"`
+	Target      string    `json:"target"`
+	Command     string    `json:"command"`
+	Description string    `json:"description"`
+	Risk        string    `json:"risk"`
+	Executed    bool      `json:"executed"`
+	ExecutedAt  time.Time `json:"executed_at,omitempty"`
+	Result      string    `json:"result,omitempty"`
+	Error       string    `json:"error,omitempty"`
+}
+
+// HealingPlan plano de correção gerado pela IA
+type HealingPlan struct {
+	Issue       string          `json:"issue"`
+	Severity    string          `json:"severity"`
+	Analysis    string          `json:"analysis"`
+	Actions     []HealingAction `json:"actions"`
+	Confidence  float64         `json:"confidence"`
+	RequiresApproval bool       `json:"requires_approval"`
+}
+
+// AutoHealConfig configuração do auto-healer
+type AutoHealConfig struct {
+	Enabled        bool
+	DryRun         bool          // Se true, não executa, só sugere
+	MaxActionsPerHour int
+	Cooldown       time.Duration // Tempo mínimo entre ações no mesmo recurso
+	AllowedActions []string      // Tipos de ações permitidas
+	ApprovalRequired []string    // Ações que precisam de aprovação humana
+}
+
+// NewAutoHealer cria novo auto-healer
+func NewAutoHealer(engine *AIEngine, config AutoHealConfig) *AutoHealer {
+	return &AutoHealer{
+		engine:      engine,
+		enabled:     config.Enabled,
+		dryRun:      config.DryRun,
+		maxActions:  config.MaxActionsPerHour,
+		cooldown:    config.Cooldown,
+		lastActions: make(map[string]time.Time),
+	}
+}
+
+// Analyze analisa um problema e gera plano de correção
+func (h *AutoHealer) Analyze(ctx context.Context, incident Incident) (*HealingPlan, error) {
+	if !h.enabled {
+		return nil, fmt.Errorf("auto-healer is disabled")
+	}
+
+	prompt := fmt.Sprintf(`Analise o seguinte incidente e gere um plano de correção automática:
+
+INCIDENTE:
+- Tipo: %s
+- Severidade: %s
+- Host: %s
+- Descrição: %s
+- Início: %s
+- Duração: %s
+
+MÉTRICAS DO HOST:
+%s
+
+LOGS RECENTES:
+%s
+
+AÇÕES ANTERIORES (últimas 24h):
+%s
+
+REGRAS:
+1. Priorize ações de baixo risco
+2. Reinício de serviço só como último recurso
+3. Escalonamento horizontal antes de vertical
+4. Sempre tenha rollback em mente
+
+Responda em JSON:
+{
+  "issue": "descrição concisa do problema",
+  "severity": "low|medium|high|critical",
+  "analysis": "análise detalhada da causa raiz",
+  "actions": [
+    {
+      "id": "action_1",
+      "type": "restart_service|scale_up|clear_cache|rotate_logs|kill_process|run_script|config_change",
+      "target": "nome do serviço ou recurso",
+      "command": "comando a executar",
+      "description": "o que essa ação faz",
+      "risk": "low|medium|high"
+    }
+  ],
+  "confidence": 0.0-1.0,
+  "requires_approval": boolean
+}`,
+		incident.Type,
+		incident.Severity,
+		incident.Host,
+		incident.Description,
+		incident.StartTime.Format(time.RFC3339),
+		time.Since(incident.StartTime).String(),
+		incident.Metrics,
+		incident.RecentLogs,
+		h.getRecentActionsForHost(incident.Host),
+	)
+
+	response, err := h.engine.chat(ctx, prompt)
+	if err != nil {
+		return nil, err
+	}
+
+	var plan HealingPlan
+	if err := json.Unmarshal([]byte(response), &plan); err != nil {
+		return nil, fmt.Errorf("failed to parse healing plan: %w", err)
+	}
+
+	return &plan, nil
+}
+
+// Execute executa um plano de correção
+func (h *AutoHealer) Execute(ctx context.Context, plan *HealingPlan) (*HealingResult, error) {
+	if h.dryRun {
+		return &HealingResult{
+			Plan:    plan,
+			DryRun:  true,
+			Message: "Dry run - ações não executadas",
+		}, nil
+	}
+
+	result := &HealingResult{
+		Plan:      plan,
+		StartedAt: time.Now(),
+		Actions:   make([]ActionResult, 0),
+	}
+
+	for _, action := range plan.Actions {
+		// Verificar cooldown
+		if !h.canExecute(action.Target) {
+			result.Actions = append(result.Actions, ActionResult{
+				ActionID: action.ID,
+				Skipped:  true,
+				Reason:   "Cooldown period active",
+			})
+			continue
+		}
+
+		// Executar ação
+		actionResult := h.executeAction(ctx, action)
+		result.Actions = append(result.Actions, actionResult)
+
+		// Registrar execução
+		h.lastActions[action.Target] = time.Now()
+
+		// Se falhou, parar
+		if !actionResult.Success {
+			result.Success = false
+			result.Error = actionResult.Error
+			break
+		}
+	}
+
+	result.CompletedAt = time.Now()
+	result.Success = result.Error == ""
+
+	return result, nil
+}
+
+// executeAction executa uma ação individual
+func (h *AutoHealer) executeAction(ctx context.Context, action HealingAction) ActionResult {
+	result := ActionResult{
+		ActionID:  action.ID,
+		StartedAt: time.Now(),
+	}
+
+	log.Printf("[AutoHealer] Executing action: %s on %s", action.Type, action.Target)
+
+	// Aqui seria a execução real via SSH, API, etc.
+	// Por segurança, implementar com cuidado
+	switch action.Type {
+	case "restart_service":
+		// Exemplo: systemctl restart <service>
+		result.Command = action.Command
+		// result = executeSSHCommand(action.Target, action.Command)
+		
+	case "scale_up":
+		// Exemplo: kubectl scale deployment...
+		result.Command = action.Command
+		
+	case "clear_cache":
+		// Exemplo: redis-cli flushdb
+		result.Command = action.Command
+		
+	case "rotate_logs":
+		// Exemplo: logrotate -f
+		result.Command = action.Command
+		
+	default:
+		result.Error = fmt.Sprintf("Unknown action type: %s", action.Type)
+		return result
+	}
+
+	// Simular execução bem-sucedida (implementar execução real)
+	result.Success = true
+	result.CompletedAt = time.Now()
+	result.Output = "Action executed successfully (simulated)"
+
+	return result
+}
+
+func (h *AutoHealer) canExecute(target string) bool {
+	lastAction, exists := h.lastActions[target]
+	if !exists {
+		return true
+	}
+	return time.Since(lastAction) > h.cooldown
+}
+
+func (h *AutoHealer) getRecentActionsForHost(host string) string {
+	// Buscar histórico de ações recentes
+	return "Nenhuma ação recente"
+}
+
+// Incident representa um incidente
+type Incident struct {
+	ID          string
+	Type        string
+	Severity    string
+	Host        string
+	Description string
+	StartTime   time.Time
+	Metrics     string
+	RecentLogs  string
+}
+
+// HealingResult resultado da execução do plano
+type HealingResult struct {
+	Plan        *HealingPlan
+	Success     bool
+	DryRun      bool
+	Message     string
+	Error       string
+	StartedAt   time.Time
+	CompletedAt time.Time
+	Actions     []ActionResult
+}
+
+// ActionResult resultado de uma ação
+type ActionResult struct {
+	ActionID    string
+	Success     bool
+	Skipped     bool
+	Reason      string
+	Command     string
+	Output      string
+	Error       string
+	StartedAt   time.Time
+	CompletedAt time.Time
+}
--- a/internal/ai/copilot.go
+++ b/internal/ai/copilot.go
@@ -0,0 +1,245 @@
+package ai
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strings"
+	"time"
+)
+
+// OphionCopilot é o assistente de IA para troubleshooting
+type OphionCopilot struct {
+	engine       *AIEngine
+	contextStore map[string]*ConversationContext
+}
+
+// ConversationContext contexto da conversa
+type ConversationContext struct {
+	SessionID    string
+	UserID       string
+	Messages     []CopilotMessage
+	CurrentHost  string
+	CurrentIssue string
+	StartedAt    time.Time
+}
+
+// CopilotMessage mensagem do copilot
+type CopilotMessage struct {
+	Role      string    `json:"role"` // user, assistant
+	Content   string    `json:"content"`
+	Timestamp time.Time `json:"timestamp"`
+	Metadata  map[string]interface{} `json:"metadata,omitempty"`
+}
+
+// CopilotResponse resposta do copilot
+type CopilotResponse struct {
+	Message      string                 `json:"message"`
+	Actions      []SuggestedAction      `json:"actions,omitempty"`
+	Queries      []SuggestedQuery       `json:"queries,omitempty"`
+	RelatedDocs  []string               `json:"related_docs,omitempty"`
+	Confidence   float64                `json:"confidence"`
+	NeedsHuman   bool                   `json:"needs_human"`
+}
+
+// SuggestedAction ação sugerida pelo copilot
+type SuggestedAction struct {
+	Type        string `json:"type"` // command, script, config_change, scale, restart
+	Description string `json:"description"`
+	Command     string `json:"command,omitempty"`
+	Risk        string `json:"risk"` // low, medium, high
+	AutoExecute bool   `json:"auto_execute"`
+}
+
+// SuggestedQuery query sugerida para investigação
+type SuggestedQuery struct {
+	Type  string `json:"type"` // metrics, logs, traces
+	Query string `json:"query"`
+	Description string `json:"description"`
+}
+
+// NewOphionCopilot cria nova instância do Copilot
+func NewOphionCopilot(engine *AIEngine) *OphionCopilot {
+	return &OphionCopilot{
+		engine:       engine,
+		contextStore: make(map[string]*ConversationContext),
+	}
+}
+
+// Chat processa uma mensagem do usuário
+func (c *OphionCopilot) Chat(ctx context.Context, sessionID, userMessage string, systemContext SystemContext) (*CopilotResponse, error) {
+	// Obter ou criar contexto da conversa
+	convCtx := c.getOrCreateContext(sessionID)
+	
+	// Adicionar mensagem do usuário
+	convCtx.Messages = append(convCtx.Messages, CopilotMessage{
+		Role:      "user",
+		Content:   userMessage,
+		Timestamp: time.Now(),
+	})
+
+	// Construir prompt com contexto completo
+	prompt := c.buildPrompt(userMessage, convCtx, systemContext)
+
+	// Chamar IA
+	response, err := c.engine.chat(ctx, prompt)
+	if err != nil {
+		return nil, err
+	}
+
+	// Parsear resposta
+	var result CopilotResponse
+	if err := json.Unmarshal([]byte(response), &result); err != nil {
+		// Se não for JSON válido, retornar como mensagem simples
+		result = CopilotResponse{
+			Message:    response,
+			Confidence: 0.7,
+		}
+	}
+
+	// Adicionar resposta ao contexto
+	convCtx.Messages = append(convCtx.Messages, CopilotMessage{
+		Role:      "assistant",
+		Content:   result.Message,
+		Timestamp: time.Now(),
+		Metadata: map[string]interface{}{
+			"confidence": result.Confidence,
+			"actions":    len(result.Actions),
+		},
+	})
+
+	return &result, nil
+}
+
+// buildPrompt constrói o prompt completo
+func (c *OphionCopilot) buildPrompt(userMessage string, convCtx *ConversationContext, sysCtx SystemContext) string {
+	var sb strings.Builder
+
+	sb.WriteString(`Você é o OPHION Copilot, um assistente especialista em observabilidade e troubleshooting de infraestrutura.
+
+SUAS CAPACIDADES:
+- Analisar métricas, logs e traces
+- Identificar causas raiz de problemas
+- Sugerir comandos e ações de correção
+- Explicar conceitos de forma clara
+- Correlacionar eventos de diferentes fontes
+
+CONTEXTO DO SISTEMA:
+`)
+
+	// Adicionar contexto do sistema
+	sb.WriteString(fmt.Sprintf(`
+Hosts monitorados: %d
+Alertas ativos: %d
+Incidentes abertos: %d
+Último deploy: %s
+`, sysCtx.TotalHosts, sysCtx.ActiveAlerts, sysCtx.OpenIncidents, sysCtx.LastDeploy))
+
+	// Métricas atuais
+	if len(sysCtx.CurrentMetrics) > 0 {
+		sb.WriteString("\nMÉTRICAS ATUAIS:\n")
+		for host, metrics := range sysCtx.CurrentMetrics {
+			sb.WriteString(fmt.Sprintf("- %s: CPU=%.1f%%, MEM=%.1f%%, Disco=%.1f%%\n",
+				host, metrics.CPU, metrics.Memory, metrics.Disk))
+		}
+	}
+
+	// Alertas recentes
+	if len(sysCtx.RecentAlerts) > 0 {
+		sb.WriteString("\nALERTAS RECENTES:\n")
+		for _, alert := range sysCtx.RecentAlerts {
+			sb.WriteString(fmt.Sprintf("- [%s] %s: %s\n", alert.Severity, alert.Host, alert.Message))
+		}
+	}
+
+	// Histórico da conversa
+	if len(convCtx.Messages) > 1 {
+		sb.WriteString("\nHISTÓRICO DA CONVERSA:\n")
+		for _, msg := range convCtx.Messages[max(0, len(convCtx.Messages)-6):] {
+			sb.WriteString(fmt.Sprintf("%s: %s\n", msg.Role, msg.Content))
+		}
+	}
+
+	// Pergunta atual
+	sb.WriteString(fmt.Sprintf(`
+PERGUNTA DO USUÁRIO:
+%s
+
+INSTRUÇÕES DE RESPOSTA:
+1. Seja direto e prático
+2. Se sugerir comandos, explique o que fazem
+3. Indique o nível de risco das ações
+4. Se não tiver certeza, diga e sugira investigações adicionais
+
+Responda em JSON:
+{
+  "message": "sua resposta detalhada",
+  "actions": [
+    {
+      "type": "command|script|config_change|scale|restart",
+      "description": "o que faz",
+      "command": "comando se aplicável",
+      "risk": "low|medium|high",
+      "auto_execute": false
+    }
+  ],
+  "queries": [
+    {
+      "type": "metrics|logs|traces",
+      "query": "query para investigar",
+      "description": "o que vai mostrar"
+    }
+  ],
+  "confidence": 0.0-1.0,
+  "needs_human": false
+}`, userMessage))
+
+	return sb.String()
+}
+
+func (c *OphionCopilot) getOrCreateContext(sessionID string) *ConversationContext {
+	if ctx, exists := c.contextStore[sessionID]; exists {
+		return ctx
+	}
+	
+	ctx := &ConversationContext{
+		SessionID: sessionID,
+		Messages:  []CopilotMessage{},
+		StartedAt: time.Now(),
+	}
+	c.contextStore[sessionID] = ctx
+	return ctx
+}
+
+// SystemContext contexto do sistema para o copilot
+type SystemContext struct {
+	TotalHosts     int
+	ActiveAlerts   int
+	OpenIncidents  int
+	LastDeploy     string
+	CurrentMetrics map[string]HostMetrics
+	RecentAlerts   []AlertInfo
+	RecentLogs     []string
+}
+
+// HostMetrics métricas de um host
+type HostMetrics struct {
+	CPU    float64
+	Memory float64
+	Disk   float64
+}
+
+// AlertInfo informação de alerta
+type AlertInfo struct {
+	Severity string
+	Host     string
+	Message  string
+	Time     time.Time
+}
+
+func max(a, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
--- a/internal/ai/insights.go
+++ b/internal/ai/insights.go
@@ -0,0 +1,397 @@
+package ai
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"time"
+)
+
+// InsightsEngine motor de geração de insights
+type InsightsEngine struct {
+	engine *AIEngine
+}
+
+// DailyInsights insights diários gerados pela IA
+type DailyInsights struct {
+	Date            string           `json:"date"`
+	Summary         string           `json:"summary"`
+	Highlights      []Highlight      `json:"highlights"`
+	Concerns        []Concern        `json:"concerns"`
+	Improvements    []Improvement    `json:"improvements"`
+	CostAnalysis    CostAnalysis     `json:"cost_analysis"`
+	SecurityInsights []SecurityInsight `json:"security_insights"`
+	Predictions     []Prediction     `json:"predictions"`
+	Score           HealthScore      `json:"health_score"`
+}
+
+// Highlight destaque positivo
+type Highlight struct {
+	Title       string `json:"title"`
+	Description string `json:"description"`
+	Metric      string `json:"metric"`
+	Value       string `json:"value"`
+}
+
+// Concern preocupação identificada
+type Concern struct {
+	Title       string   `json:"title"`
+	Description string   `json:"description"`
+	Severity    string   `json:"severity"`
+	Affected    []string `json:"affected"`
+	Suggestion  string   `json:"suggestion"`
+}
+
+// Improvement sugestão de melhoria
+type Improvement struct {
+	Title          string `json:"title"`
+	Description    string `json:"description"`
+	ExpectedImpact string `json:"expected_impact"`
+	Effort         string `json:"effort"` // low, medium, high
+	Priority       int    `json:"priority"`
+}
+
+// CostAnalysis análise de custos
+type CostAnalysis struct {
+	TotalEstimated    float64           `json:"total_estimated"`
+	ByResource        map[string]float64 `json:"by_resource"`
+	WastedResources   []WastedResource   `json:"wasted_resources"`
+	SavingOpportunities []SavingOpportunity `json:"saving_opportunities"`
+	TrendVsLastMonth  float64           `json:"trend_vs_last_month"`
+}
+
+// WastedResource recurso subutilizado
+type WastedResource struct {
+	Resource    string  `json:"resource"`
+	Host        string  `json:"host"`
+	Usage       float64 `json:"usage_percent"`
+	Suggestion  string  `json:"suggestion"`
+	PotentialSaving float64 `json:"potential_saving"`
+}
+
+// SavingOpportunity oportunidade de economia
+type SavingOpportunity struct {
+	Description    string  `json:"description"`
+	Action         string  `json:"action"`
+	MonthlySaving  float64 `json:"monthly_saving"`
+	Risk           string  `json:"risk"`
+}
+
+// SecurityInsight insight de segurança
+type SecurityInsight struct {
+	Type        string   `json:"type"` // anomaly, vulnerability, compliance
+	Severity    string   `json:"severity"`
+	Title       string   `json:"title"`
+	Description string   `json:"description"`
+	AffectedHosts []string `json:"affected_hosts"`
+	Recommendation string `json:"recommendation"`
+	References  []string `json:"references"`
+}
+
+// Prediction previsão
+type Prediction struct {
+	Metric     string    `json:"metric"`
+	Host       string    `json:"host"`
+	Current    float64   `json:"current"`
+	Predicted  float64   `json:"predicted"`
+	TimeFrame  string    `json:"time_frame"`
+	Confidence float64   `json:"confidence"`
+	Warning    string    `json:"warning"`
+}
+
+// HealthScore pontuação de saúde
+type HealthScore struct {
+	Overall       int    `json:"overall"` // 0-100
+	Availability  int    `json:"availability"`
+	Performance   int    `json:"performance"`
+	Security      int    `json:"security"`
+	Trend         string `json:"trend"` // improving, stable, declining
+	Explanation   string `json:"explanation"`
+}
+
+// NewInsightsEngine cria novo engine de insights
+func NewInsightsEngine(engine *AIEngine) *InsightsEngine {
+	return &InsightsEngine{engine: engine}
+}
+
+// GenerateDailyInsights gera insights diários
+func (i *InsightsEngine) GenerateDailyInsights(ctx context.Context, data DailyData) (*DailyInsights, error) {
+	prompt := fmt.Sprintf(`Analise os dados das últimas 24 horas e gere insights acionáveis:
+
+RESUMO DO DIA:
+- Período: %s
+- Total de hosts: %d
+- Alertas disparados: %d
+- Incidentes: %d
+- Uptime médio: %.2f%%
+
+MÉTRICAS AGREGADAS:
+%s
+
+TOP 5 HOSTS COM MAIS ALERTAS:
+%s
+
+DEPLOYS REALIZADOS:
+%s
+
+ERROS MAIS FREQUENTES NOS LOGS:
+%s
+
+CUSTOS ESTIMADOS:
+- Compute: R$ %.2f
+- Storage: R$ %.2f
+- Network: R$ %.2f
+
+Gere um relatório completo em JSON:
+{
+  "date": "YYYY-MM-DD",
+  "summary": "resumo executivo do dia (2-3 frases)",
+  "highlights": [
+    {"title": "...", "description": "...", "metric": "...", "value": "..."}
+  ],
+  "concerns": [
+    {"title": "...", "description": "...", "severity": "low|medium|high", "affected": ["host1"], "suggestion": "..."}
+  ],
+  "improvements": [
+    {"title": "...", "description": "...", "expected_impact": "...", "effort": "low|medium|high", "priority": 1}
+  ],
+  "cost_analysis": {
+    "total_estimated": 1234.56,
+    "wasted_resources": [
+      {"resource": "CPU", "host": "...", "usage_percent": 5.0, "suggestion": "...", "potential_saving": 100.0}
+    ],
+    "saving_opportunities": [
+      {"description": "...", "action": "...", "monthly_saving": 200.0, "risk": "low"}
+    ],
+    "trend_vs_last_month": 5.5
+  },
+  "security_insights": [
+    {"type": "anomaly|vulnerability|compliance", "severity": "...", "title": "...", "description": "...", "affected_hosts": [], "recommendation": "..."}
+  ],
+  "predictions": [
+    {"metric": "disk.usage", "host": "...", "current": 70.0, "predicted": 95.0, "time_frame": "7 dias", "confidence": 0.85, "warning": "..."}
+  ],
+  "health_score": {
+    "overall": 85,
+    "availability": 99,
+    "performance": 78,
+    "security": 82,
+    "trend": "improving|stable|declining",
+    "explanation": "..."
+  }
+}`,
+		data.Period,
+		data.TotalHosts,
+		data.AlertsCount,
+		data.IncidentsCount,
+		data.AvgUptime,
+		data.MetricsSummary,
+		data.TopAlertHosts,
+		data.Deploys,
+		data.TopErrors,
+		data.CostCompute,
+		data.CostStorage,
+		data.CostNetwork,
+	)
+
+	response, err := i.engine.chat(ctx, prompt)
+	if err != nil {
+		return nil, err
+	}
+
+	var insights DailyInsights
+	if err := json.Unmarshal([]byte(response), &insights); err != nil {
+		return nil, fmt.Errorf("failed to parse insights: %w", err)
+	}
+
+	return &insights, nil
+}
+
+// DetectSecurityAnomalies detecta anomalias de segurança
+func (i *InsightsEngine) DetectSecurityAnomalies(ctx context.Context, data SecurityData) ([]SecurityInsight, error) {
+	prompt := fmt.Sprintf(`Analise os dados de segurança e identifique anomalias ou ameaças:
+
+LOGINS NAS ÚLTIMAS 24H:
+%s
+
+CONEXÕES DE REDE SUSPEITAS:
+%s
+
+MUDANÇAS DE CONFIGURAÇÃO:
+%s
+
+PROCESSOS INCOMUNS:
+%s
+
+FALHAS DE AUTENTICAÇÃO:
+%s
+
+Identifique:
+1. Comportamentos anômalos
+2. Possíveis tentativas de ataque
+3. Violações de compliance
+4. Configurações inseguras
+
+Responda em JSON:
+{
+  "insights": [
+    {
+      "type": "anomaly|vulnerability|compliance|intrusion_attempt",
+      "severity": "low|medium|high|critical",
+      "title": "título descritivo",
+      "description": "descrição detalhada",
+      "affected_hosts": ["host1"],
+      "evidence": "evidência encontrada",
+      "recommendation": "ação recomendada",
+      "references": ["CVE-xxx", "link"]
+    }
+  ],
+  "risk_level": "low|medium|high|critical",
+  "requires_immediate_action": boolean
+}`,
+		data.Logins,
+		data.SuspiciousConnections,
+		data.ConfigChanges,
+		data.UnusualProcesses,
+		data.AuthFailures,
+	)
+
+	response, err := i.engine.chat(ctx, prompt)
+	if err != nil {
+		return nil, err
+	}
+
+	var result struct {
+		Insights []SecurityInsight `json:"insights"`
+	}
+	if err := json.Unmarshal([]byte(response), &result); err != nil {
+		return nil, err
+	}
+
+	return result.Insights, nil
+}
+
+// DailyData dados para análise diária
+type DailyData struct {
+	Period         string
+	TotalHosts     int
+	AlertsCount    int
+	IncidentsCount int
+	AvgUptime      float64
+	MetricsSummary string
+	TopAlertHosts  string
+	Deploys        string
+	TopErrors      string
+	CostCompute    float64
+	CostStorage    float64
+	CostNetwork    float64
+}
+
+// SecurityData dados para análise de segurança
+type SecurityData struct {
+	Logins               string
+	SuspiciousConnections string
+	ConfigChanges        string
+	UnusualProcesses     string
+	AuthFailures         string
+}
+
+// GenerateExecutiveReport gera relatório executivo semanal/mensal
+func (i *InsightsEngine) GenerateExecutiveReport(ctx context.Context, period string, data ReportData) (*ExecutiveReport, error) {
+	prompt := fmt.Sprintf(`Gere um relatório executivo de %s para liderança técnica:
+
+PERÍODO: %s
+
+KPIs:
+- Uptime: %.2f%%
+- MTTR: %s
+- Incidentes: %d (vs período anterior: %+d)
+- Alertas: %d
+- Deploys: %d
+
+PRINCIPAIS INCIDENTES:
+%s
+
+MELHORIAS IMPLEMENTADAS:
+%s
+
+CUSTOS:
+%s
+
+SEGURANÇA:
+%s
+
+Gere relatório executivo em JSON:
+{
+  "period": "...",
+  "executive_summary": "resumo de 3-4 frases para C-level",
+  "key_metrics": {...},
+  "achievements": ["conquista 1", "conquista 2"],
+  "challenges": ["desafio 1", "desafio 2"],
+  "action_items": [
+    {"item": "...", "owner": "...", "due_date": "...", "priority": "high|medium|low"}
+  ],
+  "next_period_focus": ["foco 1", "foco 2"],
+  "budget_status": {"current": 1000, "projected": 1200, "variance": "..."},
+  "risk_summary": "resumo de riscos"
+}`,
+		period,
+		data.Period,
+		data.Uptime,
+		data.MTTR,
+		data.Incidents,
+		data.IncidentsDelta,
+		data.Alerts,
+		data.Deploys,
+		data.MainIncidents,
+		data.Improvements,
+		data.CostsSummary,
+		data.SecuritySummary,
+	)
+
+	response, err := i.engine.chat(ctx, prompt)
+	if err != nil {
+		return nil, err
+	}
+
+	var report ExecutiveReport
+	if err := json.Unmarshal([]byte(response), &report); err != nil {
+		return nil, err
+	}
+
+	return &report, nil
+}
+
+// ReportData dados para relatório executivo
+type ReportData struct {
+	Period          string
+	Uptime          float64
+	MTTR            string
+	Incidents       int
+	IncidentsDelta  int
+	Alerts          int
+	Deploys         int
+	MainIncidents   string
+	Improvements    string
+	CostsSummary    string
+	SecuritySummary string
+}
+
+// ExecutiveReport relatório executivo
+type ExecutiveReport struct {
+	Period           string       `json:"period"`
+	ExecutiveSummary string       `json:"executive_summary"`
+	Achievements     []string     `json:"achievements"`
+	Challenges       []string     `json:"challenges"`
+	ActionItems      []ActionItem `json:"action_items"`
+	NextPeriodFocus  []string     `json:"next_period_focus"`
+	RiskSummary      string       `json:"risk_summary"`
+}
+
+// ActionItem item de ação
+type ActionItem struct {
+	Item     string `json:"item"`
+	Owner    string `json:"owner"`
+	DueDate  string `json:"due_date"`
+	Priority string `json:"priority"`
+}
--- a/internal/ai/smart_alerts.go
+++ b/internal/ai/smart_alerts.go
@@ -0,0 +1,266 @@
+package ai
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"time"
+)
+
+// SmartAlertEngine motor de alertas inteligentes
+type SmartAlertEngine struct {
+	engine        *AIEngine
+	alertBuffer   []RawAlert
+	correlationWindow time.Duration
+}
+
+// RawAlert alerta bruto antes do processamento
+type RawAlert struct {
+	ID          string
+	Metric      string
+	Host        string
+	Value       float64
+	Threshold   float64
+	Severity    string
+	Timestamp   time.Time
+	Labels      map[string]string
+}
+
+// SmartAlert alerta processado com IA
+type SmartAlert struct {
+	ID              string          `json:"id"`
+	Title           string          `json:"title"`
+	Description     string          `json:"description"`
+	Severity        string          `json:"severity"`
+	Category        string          `json:"category"`
+	AffectedHosts   []string        `json:"affected_hosts"`
+	RootCause       string          `json:"root_cause"`
+	Impact          string          `json:"impact"`
+	Suggestions     []string        `json:"suggestions"`
+	RelatedAlerts   []string        `json:"related_alerts"`
+	Runbook         string          `json:"runbook,omitempty"`
+	IsNoise         bool            `json:"is_noise"`
+	Confidence      float64         `json:"confidence"`
+	CorrelationID   string          `json:"correlation_id,omitempty"`
+	Timestamp       time.Time       `json:"timestamp"`
+}
+
+// AlertCorrelation grupo de alertas correlacionados
+type AlertCorrelation struct {
+	ID              string       `json:"id"`
+	Title           string       `json:"title"`
+	Alerts          []SmartAlert `json:"alerts"`
+	RootCause       string       `json:"root_cause"`
+	Impact          string       `json:"impact"`
+	Severity        string       `json:"severity"`
+	StartTime       time.Time    `json:"start_time"`
+	AffectedServices []string    `json:"affected_services"`
+}
+
+// NewSmartAlertEngine cria novo engine de alertas inteligentes
+func NewSmartAlertEngine(engine *AIEngine) *SmartAlertEngine {
+	return &SmartAlertEngine{
+		engine:            engine,
+		alertBuffer:       make([]RawAlert, 0),
+		correlationWindow: 5 * time.Minute,
+	}
+}
+
+// ProcessAlert processa um alerta com IA
+func (s *SmartAlertEngine) ProcessAlert(ctx context.Context, raw RawAlert, systemContext AlertContext) (*SmartAlert, error) {
+	// Adicionar ao buffer para correlação
+	s.alertBuffer = append(s.alertBuffer, raw)
+	s.cleanOldAlerts()
+
+	prompt := fmt.Sprintf(`Analise este alerta e enriqueça com contexto inteligente:
+
+ALERTA:
+- Métrica: %s
+- Host: %s
+- Valor atual: %.2f
+- Threshold: %.2f
+- Severidade: %s
+- Timestamp: %s
+- Labels: %v
+
+CONTEXTO DO SISTEMA:
+- Deploys recentes: %s
+- Alertas relacionados (últimos 5min): %d
+- Incidentes abertos: %d
+- Horário comercial: %v
+
+ALERTAS RECENTES NO MESMO HOST:
+%s
+
+PADRÕES CONHECIDOS:
+- Este tipo de alerta costuma ser: %s
+- Falsos positivos históricos: %d%%
+
+Analise e responda em JSON:
+{
+  "title": "título conciso e descritivo",
+  "description": "descrição detalhada do que está acontecendo",
+  "severity": "info|warning|error|critical",
+  "category": "performance|availability|security|capacity|configuration",
+  "root_cause": "provável causa raiz",
+  "impact": "impacto no negócio/usuários",
+  "suggestions": ["ação sugerida 1", "ação sugerida 2"],
+  "is_noise": boolean (true se for falso positivo ou não acionável),
+  "confidence": 0.0-1.0,
+  "runbook": "link ou passos do runbook se existir"
+}`,
+		raw.Metric,
+		raw.Host,
+		raw.Value,
+		raw.Threshold,
+		raw.Severity,
+		raw.Timestamp.Format(time.RFC3339),
+		raw.Labels,
+		systemContext.RecentDeploys,
+		s.countRecentAlerts(raw.Host),
+		systemContext.OpenIncidents,
+		systemContext.IsBusinessHours,
+		s.getRecentAlertsForHost(raw.Host),
+		systemContext.HistoricalPattern,
+		systemContext.FalsePositiveRate,
+	)
+
+	response, err := s.engine.chat(ctx, prompt)
+	if err != nil {
+		return nil, err
+	}
+
+	var smart SmartAlert
+	if err := json.Unmarshal([]byte(response), &smart); err != nil {
+		return nil, err
+	}
+
+	smart.ID = raw.ID
+	smart.AffectedHosts = []string{raw.Host}
+	smart.Timestamp = raw.Timestamp
+
+	return &smart, nil
+}
+
+// CorrelateAlerts agrupa alertas relacionados
+func (s *SmartAlertEngine) CorrelateAlerts(ctx context.Context, alerts []SmartAlert) (*AlertCorrelation, error) {
+	if len(alerts) < 2 {
+		return nil, nil
+	}
+
+	alertSummary := ""
+	for _, a := range alerts {
+		alertSummary += fmt.Sprintf("- [%s] %s on %s: %s\n", 
+			a.Severity, a.Title, a.AffectedHosts[0], a.RootCause)
+	}
+
+	prompt := fmt.Sprintf(`Analise estes alertas e identifique se estão relacionados:
+
+ALERTAS:
+%s
+
+Determine:
+1. Se os alertas estão correlacionados (mesmo incidente)
+2. Qual é a causa raiz comum
+3. Qual é o impacto real
+4. Qual deve ser a severidade consolidada
+
+Responda em JSON:
+{
+  "correlated": boolean,
+  "title": "título do incidente consolidado",
+  "root_cause": "causa raiz comum",
+  "impact": "impacto consolidado",
+  "severity": "severidade do grupo",
+  "affected_services": ["serviço1", "serviço2"],
+  "timeline": "sequência de eventos",
+  "recommendations": ["recomendação 1", "recomendação 2"]
+}`, alertSummary)
+
+	response, err := s.engine.chat(ctx, prompt)
+	if err != nil {
+		return nil, err
+	}
+
+	var result struct {
+		Correlated       bool     `json:"correlated"`
+		Title           string   `json:"title"`
+		RootCause       string   `json:"root_cause"`
+		Impact          string   `json:"impact"`
+		Severity        string   `json:"severity"`
+		AffectedServices []string `json:"affected_services"`
+	}
+
+	if err := json.Unmarshal([]byte(response), &result); err != nil {
+		return nil, err
+	}
+
+	if !result.Correlated {
+		return nil, nil
+	}
+
+	return &AlertCorrelation{
+		ID:               fmt.Sprintf("corr_%d", time.Now().UnixNano()),
+		Title:            result.Title,
+		Alerts:           alerts,
+		RootCause:        result.RootCause,
+		Impact:           result.Impact,
+		Severity:         result.Severity,
+		StartTime:        alerts[0].Timestamp,
+		AffectedServices: result.AffectedServices,
+	}, nil
+}
+
+// SuppressNoise filtra alertas que são ruído
+func (s *SmartAlertEngine) SuppressNoise(alerts []SmartAlert) []SmartAlert {
+	filtered := make([]SmartAlert, 0)
+	for _, a := range alerts {
+		if !a.IsNoise && a.Confidence > 0.5 {
+			filtered = append(filtered, a)
+		}
+	}
+	return filtered
+}
+
+func (s *SmartAlertEngine) cleanOldAlerts() {
+	cutoff := time.Now().Add(-s.correlationWindow)
+	filtered := make([]RawAlert, 0)
+	for _, a := range s.alertBuffer {
+		if a.Timestamp.After(cutoff) {
+			filtered = append(filtered, a)
+		}
+	}
+	s.alertBuffer = filtered
+}
+
+func (s *SmartAlertEngine) countRecentAlerts(host string) int {
+	count := 0
+	for _, a := range s.alertBuffer {
+		if a.Host == host {
+			count++
+		}
+	}
+	return count
+}
+
+func (s *SmartAlertEngine) getRecentAlertsForHost(host string) string {
+	result := ""
+	for _, a := range s.alertBuffer {
+		if a.Host == host {
+			result += fmt.Sprintf("- %s: %s = %.2f\n", a.Timestamp.Format("15:04:05"), a.Metric, a.Value)
+		}
+	}
+	if result == "" {
+		return "Nenhum alerta recente"
+	}
+	return result
+}
+
+// AlertContext contexto para processamento de alertas
+type AlertContext struct {
+	RecentDeploys     string
+	OpenIncidents     int
+	IsBusinessHours   bool
+	HistoricalPattern string
+	FalsePositiveRate int
+}