diff --git a/internal/ai/autohealing.go b/internal/ai/autohealing.go new file mode 100644 index 0000000..f70b966 --- /dev/null +++ b/internal/ai/autohealing.go @@ -0,0 +1,278 @@ +package ai + +import ( + "context" + "encoding/json" + "fmt" + "log" + "time" +) + +// AutoHealer sistema de auto-correção inteligente +type AutoHealer struct { + engine *AIEngine + enabled bool + dryRun bool + maxActions int + cooldown time.Duration + lastActions map[string]time.Time +} + +// HealingAction ação de correção +type HealingAction struct { + ID string `json:"id"` + Type string `json:"type"` + Target string `json:"target"` + Command string `json:"command"` + Description string `json:"description"` + Risk string `json:"risk"` + Executed bool `json:"executed"` + ExecutedAt time.Time `json:"executed_at,omitempty"` + Result string `json:"result,omitempty"` + Error string `json:"error,omitempty"` +} + +// HealingPlan plano de correção gerado pela IA +type HealingPlan struct { + Issue string `json:"issue"` + Severity string `json:"severity"` + Analysis string `json:"analysis"` + Actions []HealingAction `json:"actions"` + Confidence float64 `json:"confidence"` + RequiresApproval bool `json:"requires_approval"` +} + +// AutoHealConfig configuração do auto-healer +type AutoHealConfig struct { + Enabled bool + DryRun bool // Se true, não executa, só sugere + MaxActionsPerHour int + Cooldown time.Duration // Tempo mínimo entre ações no mesmo recurso + AllowedActions []string // Tipos de ações permitidas + ApprovalRequired []string // Ações que precisam de aprovação humana +} + +// NewAutoHealer cria novo auto-healer +func NewAutoHealer(engine *AIEngine, config AutoHealConfig) *AutoHealer { + return &AutoHealer{ + engine: engine, + enabled: config.Enabled, + dryRun: config.DryRun, + maxActions: config.MaxActionsPerHour, + cooldown: config.Cooldown, + lastActions: make(map[string]time.Time), + } +} + +// Analyze analisa um problema e gera plano de correção +func (h *AutoHealer) Analyze(ctx context.Context, incident Incident) (*HealingPlan, error) { + if !h.enabled { + return nil, fmt.Errorf("auto-healer is disabled") + } + + prompt := fmt.Sprintf(`Analise o seguinte incidente e gere um plano de correção automática: + +INCIDENTE: +- Tipo: %s +- Severidade: %s +- Host: %s +- Descrição: %s +- Início: %s +- Duração: %s + +MÉTRICAS DO HOST: +%s + +LOGS RECENTES: +%s + +AÇÕES ANTERIORES (últimas 24h): +%s + +REGRAS: +1. Priorize ações de baixo risco +2. Reinício de serviço só como último recurso +3. Escalonamento horizontal antes de vertical +4. Sempre tenha rollback em mente + +Responda em JSON: +{ + "issue": "descrição concisa do problema", + "severity": "low|medium|high|critical", + "analysis": "análise detalhada da causa raiz", + "actions": [ + { + "id": "action_1", + "type": "restart_service|scale_up|clear_cache|rotate_logs|kill_process|run_script|config_change", + "target": "nome do serviço ou recurso", + "command": "comando a executar", + "description": "o que essa ação faz", + "risk": "low|medium|high" + } + ], + "confidence": 0.0-1.0, + "requires_approval": boolean +}`, + incident.Type, + incident.Severity, + incident.Host, + incident.Description, + incident.StartTime.Format(time.RFC3339), + time.Since(incident.StartTime).String(), + incident.Metrics, + incident.RecentLogs, + h.getRecentActionsForHost(incident.Host), + ) + + response, err := h.engine.chat(ctx, prompt) + if err != nil { + return nil, err + } + + var plan HealingPlan + if err := json.Unmarshal([]byte(response), &plan); err != nil { + return nil, fmt.Errorf("failed to parse healing plan: %w", err) + } + + return &plan, nil +} + +// Execute executa um plano de correção +func (h *AutoHealer) Execute(ctx context.Context, plan *HealingPlan) (*HealingResult, error) { + if h.dryRun { + return &HealingResult{ + Plan: plan, + DryRun: true, + Message: "Dry run - ações não executadas", + }, nil + } + + result := &HealingResult{ + Plan: plan, + StartedAt: time.Now(), + Actions: make([]ActionResult, 0), + } + + for _, action := range plan.Actions { + // Verificar cooldown + if !h.canExecute(action.Target) { + result.Actions = append(result.Actions, ActionResult{ + ActionID: action.ID, + Skipped: true, + Reason: "Cooldown period active", + }) + continue + } + + // Executar ação + actionResult := h.executeAction(ctx, action) + result.Actions = append(result.Actions, actionResult) + + // Registrar execução + h.lastActions[action.Target] = time.Now() + + // Se falhou, parar + if !actionResult.Success { + result.Success = false + result.Error = actionResult.Error + break + } + } + + result.CompletedAt = time.Now() + result.Success = result.Error == "" + + return result, nil +} + +// executeAction executa uma ação individual +func (h *AutoHealer) executeAction(ctx context.Context, action HealingAction) ActionResult { + result := ActionResult{ + ActionID: action.ID, + StartedAt: time.Now(), + } + + log.Printf("[AutoHealer] Executing action: %s on %s", action.Type, action.Target) + + // Aqui seria a execução real via SSH, API, etc. + // Por segurança, implementar com cuidado + switch action.Type { + case "restart_service": + // Exemplo: systemctl restart + result.Command = action.Command + // result = executeSSHCommand(action.Target, action.Command) + + case "scale_up": + // Exemplo: kubectl scale deployment... + result.Command = action.Command + + case "clear_cache": + // Exemplo: redis-cli flushdb + result.Command = action.Command + + case "rotate_logs": + // Exemplo: logrotate -f + result.Command = action.Command + + default: + result.Error = fmt.Sprintf("Unknown action type: %s", action.Type) + return result + } + + // Simular execução bem-sucedida (implementar execução real) + result.Success = true + result.CompletedAt = time.Now() + result.Output = "Action executed successfully (simulated)" + + return result +} + +func (h *AutoHealer) canExecute(target string) bool { + lastAction, exists := h.lastActions[target] + if !exists { + return true + } + return time.Since(lastAction) > h.cooldown +} + +func (h *AutoHealer) getRecentActionsForHost(host string) string { + // Buscar histórico de ações recentes + return "Nenhuma ação recente" +} + +// Incident representa um incidente +type Incident struct { + ID string + Type string + Severity string + Host string + Description string + StartTime time.Time + Metrics string + RecentLogs string +} + +// HealingResult resultado da execução do plano +type HealingResult struct { + Plan *HealingPlan + Success bool + DryRun bool + Message string + Error string + StartedAt time.Time + CompletedAt time.Time + Actions []ActionResult +} + +// ActionResult resultado de uma ação +type ActionResult struct { + ActionID string + Success bool + Skipped bool + Reason string + Command string + Output string + Error string + StartedAt time.Time + CompletedAt time.Time +} diff --git a/internal/ai/copilot.go b/internal/ai/copilot.go new file mode 100644 index 0000000..48af5ef --- /dev/null +++ b/internal/ai/copilot.go @@ -0,0 +1,245 @@ +package ai + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "time" +) + +// OphionCopilot é o assistente de IA para troubleshooting +type OphionCopilot struct { + engine *AIEngine + contextStore map[string]*ConversationContext +} + +// ConversationContext contexto da conversa +type ConversationContext struct { + SessionID string + UserID string + Messages []CopilotMessage + CurrentHost string + CurrentIssue string + StartedAt time.Time +} + +// CopilotMessage mensagem do copilot +type CopilotMessage struct { + Role string `json:"role"` // user, assistant + Content string `json:"content"` + Timestamp time.Time `json:"timestamp"` + Metadata map[string]interface{} `json:"metadata,omitempty"` +} + +// CopilotResponse resposta do copilot +type CopilotResponse struct { + Message string `json:"message"` + Actions []SuggestedAction `json:"actions,omitempty"` + Queries []SuggestedQuery `json:"queries,omitempty"` + RelatedDocs []string `json:"related_docs,omitempty"` + Confidence float64 `json:"confidence"` + NeedsHuman bool `json:"needs_human"` +} + +// SuggestedAction ação sugerida pelo copilot +type SuggestedAction struct { + Type string `json:"type"` // command, script, config_change, scale, restart + Description string `json:"description"` + Command string `json:"command,omitempty"` + Risk string `json:"risk"` // low, medium, high + AutoExecute bool `json:"auto_execute"` +} + +// SuggestedQuery query sugerida para investigação +type SuggestedQuery struct { + Type string `json:"type"` // metrics, logs, traces + Query string `json:"query"` + Description string `json:"description"` +} + +// NewOphionCopilot cria nova instância do Copilot +func NewOphionCopilot(engine *AIEngine) *OphionCopilot { + return &OphionCopilot{ + engine: engine, + contextStore: make(map[string]*ConversationContext), + } +} + +// Chat processa uma mensagem do usuário +func (c *OphionCopilot) Chat(ctx context.Context, sessionID, userMessage string, systemContext SystemContext) (*CopilotResponse, error) { + // Obter ou criar contexto da conversa + convCtx := c.getOrCreateContext(sessionID) + + // Adicionar mensagem do usuário + convCtx.Messages = append(convCtx.Messages, CopilotMessage{ + Role: "user", + Content: userMessage, + Timestamp: time.Now(), + }) + + // Construir prompt com contexto completo + prompt := c.buildPrompt(userMessage, convCtx, systemContext) + + // Chamar IA + response, err := c.engine.chat(ctx, prompt) + if err != nil { + return nil, err + } + + // Parsear resposta + var result CopilotResponse + if err := json.Unmarshal([]byte(response), &result); err != nil { + // Se não for JSON válido, retornar como mensagem simples + result = CopilotResponse{ + Message: response, + Confidence: 0.7, + } + } + + // Adicionar resposta ao contexto + convCtx.Messages = append(convCtx.Messages, CopilotMessage{ + Role: "assistant", + Content: result.Message, + Timestamp: time.Now(), + Metadata: map[string]interface{}{ + "confidence": result.Confidence, + "actions": len(result.Actions), + }, + }) + + return &result, nil +} + +// buildPrompt constrói o prompt completo +func (c *OphionCopilot) buildPrompt(userMessage string, convCtx *ConversationContext, sysCtx SystemContext) string { + var sb strings.Builder + + sb.WriteString(`Você é o OPHION Copilot, um assistente especialista em observabilidade e troubleshooting de infraestrutura. + +SUAS CAPACIDADES: +- Analisar métricas, logs e traces +- Identificar causas raiz de problemas +- Sugerir comandos e ações de correção +- Explicar conceitos de forma clara +- Correlacionar eventos de diferentes fontes + +CONTEXTO DO SISTEMA: +`) + + // Adicionar contexto do sistema + sb.WriteString(fmt.Sprintf(` +Hosts monitorados: %d +Alertas ativos: %d +Incidentes abertos: %d +Último deploy: %s +`, sysCtx.TotalHosts, sysCtx.ActiveAlerts, sysCtx.OpenIncidents, sysCtx.LastDeploy)) + + // Métricas atuais + if len(sysCtx.CurrentMetrics) > 0 { + sb.WriteString("\nMÉTRICAS ATUAIS:\n") + for host, metrics := range sysCtx.CurrentMetrics { + sb.WriteString(fmt.Sprintf("- %s: CPU=%.1f%%, MEM=%.1f%%, Disco=%.1f%%\n", + host, metrics.CPU, metrics.Memory, metrics.Disk)) + } + } + + // Alertas recentes + if len(sysCtx.RecentAlerts) > 0 { + sb.WriteString("\nALERTAS RECENTES:\n") + for _, alert := range sysCtx.RecentAlerts { + sb.WriteString(fmt.Sprintf("- [%s] %s: %s\n", alert.Severity, alert.Host, alert.Message)) + } + } + + // Histórico da conversa + if len(convCtx.Messages) > 1 { + sb.WriteString("\nHISTÓRICO DA CONVERSA:\n") + for _, msg := range convCtx.Messages[max(0, len(convCtx.Messages)-6):] { + sb.WriteString(fmt.Sprintf("%s: %s\n", msg.Role, msg.Content)) + } + } + + // Pergunta atual + sb.WriteString(fmt.Sprintf(` +PERGUNTA DO USUÁRIO: +%s + +INSTRUÇÕES DE RESPOSTA: +1. Seja direto e prático +2. Se sugerir comandos, explique o que fazem +3. Indique o nível de risco das ações +4. Se não tiver certeza, diga e sugira investigações adicionais + +Responda em JSON: +{ + "message": "sua resposta detalhada", + "actions": [ + { + "type": "command|script|config_change|scale|restart", + "description": "o que faz", + "command": "comando se aplicável", + "risk": "low|medium|high", + "auto_execute": false + } + ], + "queries": [ + { + "type": "metrics|logs|traces", + "query": "query para investigar", + "description": "o que vai mostrar" + } + ], + "confidence": 0.0-1.0, + "needs_human": false +}`, userMessage)) + + return sb.String() +} + +func (c *OphionCopilot) getOrCreateContext(sessionID string) *ConversationContext { + if ctx, exists := c.contextStore[sessionID]; exists { + return ctx + } + + ctx := &ConversationContext{ + SessionID: sessionID, + Messages: []CopilotMessage{}, + StartedAt: time.Now(), + } + c.contextStore[sessionID] = ctx + return ctx +} + +// SystemContext contexto do sistema para o copilot +type SystemContext struct { + TotalHosts int + ActiveAlerts int + OpenIncidents int + LastDeploy string + CurrentMetrics map[string]HostMetrics + RecentAlerts []AlertInfo + RecentLogs []string +} + +// HostMetrics métricas de um host +type HostMetrics struct { + CPU float64 + Memory float64 + Disk float64 +} + +// AlertInfo informação de alerta +type AlertInfo struct { + Severity string + Host string + Message string + Time time.Time +} + +func max(a, b int) int { + if a > b { + return a + } + return b +} diff --git a/internal/ai/insights.go b/internal/ai/insights.go new file mode 100644 index 0000000..67d42dd --- /dev/null +++ b/internal/ai/insights.go @@ -0,0 +1,397 @@ +package ai + +import ( + "context" + "encoding/json" + "fmt" + "time" +) + +// InsightsEngine motor de geração de insights +type InsightsEngine struct { + engine *AIEngine +} + +// DailyInsights insights diários gerados pela IA +type DailyInsights struct { + Date string `json:"date"` + Summary string `json:"summary"` + Highlights []Highlight `json:"highlights"` + Concerns []Concern `json:"concerns"` + Improvements []Improvement `json:"improvements"` + CostAnalysis CostAnalysis `json:"cost_analysis"` + SecurityInsights []SecurityInsight `json:"security_insights"` + Predictions []Prediction `json:"predictions"` + Score HealthScore `json:"health_score"` +} + +// Highlight destaque positivo +type Highlight struct { + Title string `json:"title"` + Description string `json:"description"` + Metric string `json:"metric"` + Value string `json:"value"` +} + +// Concern preocupação identificada +type Concern struct { + Title string `json:"title"` + Description string `json:"description"` + Severity string `json:"severity"` + Affected []string `json:"affected"` + Suggestion string `json:"suggestion"` +} + +// Improvement sugestão de melhoria +type Improvement struct { + Title string `json:"title"` + Description string `json:"description"` + ExpectedImpact string `json:"expected_impact"` + Effort string `json:"effort"` // low, medium, high + Priority int `json:"priority"` +} + +// CostAnalysis análise de custos +type CostAnalysis struct { + TotalEstimated float64 `json:"total_estimated"` + ByResource map[string]float64 `json:"by_resource"` + WastedResources []WastedResource `json:"wasted_resources"` + SavingOpportunities []SavingOpportunity `json:"saving_opportunities"` + TrendVsLastMonth float64 `json:"trend_vs_last_month"` +} + +// WastedResource recurso subutilizado +type WastedResource struct { + Resource string `json:"resource"` + Host string `json:"host"` + Usage float64 `json:"usage_percent"` + Suggestion string `json:"suggestion"` + PotentialSaving float64 `json:"potential_saving"` +} + +// SavingOpportunity oportunidade de economia +type SavingOpportunity struct { + Description string `json:"description"` + Action string `json:"action"` + MonthlySaving float64 `json:"monthly_saving"` + Risk string `json:"risk"` +} + +// SecurityInsight insight de segurança +type SecurityInsight struct { + Type string `json:"type"` // anomaly, vulnerability, compliance + Severity string `json:"severity"` + Title string `json:"title"` + Description string `json:"description"` + AffectedHosts []string `json:"affected_hosts"` + Recommendation string `json:"recommendation"` + References []string `json:"references"` +} + +// Prediction previsão +type Prediction struct { + Metric string `json:"metric"` + Host string `json:"host"` + Current float64 `json:"current"` + Predicted float64 `json:"predicted"` + TimeFrame string `json:"time_frame"` + Confidence float64 `json:"confidence"` + Warning string `json:"warning"` +} + +// HealthScore pontuação de saúde +type HealthScore struct { + Overall int `json:"overall"` // 0-100 + Availability int `json:"availability"` + Performance int `json:"performance"` + Security int `json:"security"` + Trend string `json:"trend"` // improving, stable, declining + Explanation string `json:"explanation"` +} + +// NewInsightsEngine cria novo engine de insights +func NewInsightsEngine(engine *AIEngine) *InsightsEngine { + return &InsightsEngine{engine: engine} +} + +// GenerateDailyInsights gera insights diários +func (i *InsightsEngine) GenerateDailyInsights(ctx context.Context, data DailyData) (*DailyInsights, error) { + prompt := fmt.Sprintf(`Analise os dados das últimas 24 horas e gere insights acionáveis: + +RESUMO DO DIA: +- Período: %s +- Total de hosts: %d +- Alertas disparados: %d +- Incidentes: %d +- Uptime médio: %.2f%% + +MÉTRICAS AGREGADAS: +%s + +TOP 5 HOSTS COM MAIS ALERTAS: +%s + +DEPLOYS REALIZADOS: +%s + +ERROS MAIS FREQUENTES NOS LOGS: +%s + +CUSTOS ESTIMADOS: +- Compute: R$ %.2f +- Storage: R$ %.2f +- Network: R$ %.2f + +Gere um relatório completo em JSON: +{ + "date": "YYYY-MM-DD", + "summary": "resumo executivo do dia (2-3 frases)", + "highlights": [ + {"title": "...", "description": "...", "metric": "...", "value": "..."} + ], + "concerns": [ + {"title": "...", "description": "...", "severity": "low|medium|high", "affected": ["host1"], "suggestion": "..."} + ], + "improvements": [ + {"title": "...", "description": "...", "expected_impact": "...", "effort": "low|medium|high", "priority": 1} + ], + "cost_analysis": { + "total_estimated": 1234.56, + "wasted_resources": [ + {"resource": "CPU", "host": "...", "usage_percent": 5.0, "suggestion": "...", "potential_saving": 100.0} + ], + "saving_opportunities": [ + {"description": "...", "action": "...", "monthly_saving": 200.0, "risk": "low"} + ], + "trend_vs_last_month": 5.5 + }, + "security_insights": [ + {"type": "anomaly|vulnerability|compliance", "severity": "...", "title": "...", "description": "...", "affected_hosts": [], "recommendation": "..."} + ], + "predictions": [ + {"metric": "disk.usage", "host": "...", "current": 70.0, "predicted": 95.0, "time_frame": "7 dias", "confidence": 0.85, "warning": "..."} + ], + "health_score": { + "overall": 85, + "availability": 99, + "performance": 78, + "security": 82, + "trend": "improving|stable|declining", + "explanation": "..." + } +}`, + data.Period, + data.TotalHosts, + data.AlertsCount, + data.IncidentsCount, + data.AvgUptime, + data.MetricsSummary, + data.TopAlertHosts, + data.Deploys, + data.TopErrors, + data.CostCompute, + data.CostStorage, + data.CostNetwork, + ) + + response, err := i.engine.chat(ctx, prompt) + if err != nil { + return nil, err + } + + var insights DailyInsights + if err := json.Unmarshal([]byte(response), &insights); err != nil { + return nil, fmt.Errorf("failed to parse insights: %w", err) + } + + return &insights, nil +} + +// DetectSecurityAnomalies detecta anomalias de segurança +func (i *InsightsEngine) DetectSecurityAnomalies(ctx context.Context, data SecurityData) ([]SecurityInsight, error) { + prompt := fmt.Sprintf(`Analise os dados de segurança e identifique anomalias ou ameaças: + +LOGINS NAS ÚLTIMAS 24H: +%s + +CONEXÕES DE REDE SUSPEITAS: +%s + +MUDANÇAS DE CONFIGURAÇÃO: +%s + +PROCESSOS INCOMUNS: +%s + +FALHAS DE AUTENTICAÇÃO: +%s + +Identifique: +1. Comportamentos anômalos +2. Possíveis tentativas de ataque +3. Violações de compliance +4. Configurações inseguras + +Responda em JSON: +{ + "insights": [ + { + "type": "anomaly|vulnerability|compliance|intrusion_attempt", + "severity": "low|medium|high|critical", + "title": "título descritivo", + "description": "descrição detalhada", + "affected_hosts": ["host1"], + "evidence": "evidência encontrada", + "recommendation": "ação recomendada", + "references": ["CVE-xxx", "link"] + } + ], + "risk_level": "low|medium|high|critical", + "requires_immediate_action": boolean +}`, + data.Logins, + data.SuspiciousConnections, + data.ConfigChanges, + data.UnusualProcesses, + data.AuthFailures, + ) + + response, err := i.engine.chat(ctx, prompt) + if err != nil { + return nil, err + } + + var result struct { + Insights []SecurityInsight `json:"insights"` + } + if err := json.Unmarshal([]byte(response), &result); err != nil { + return nil, err + } + + return result.Insights, nil +} + +// DailyData dados para análise diária +type DailyData struct { + Period string + TotalHosts int + AlertsCount int + IncidentsCount int + AvgUptime float64 + MetricsSummary string + TopAlertHosts string + Deploys string + TopErrors string + CostCompute float64 + CostStorage float64 + CostNetwork float64 +} + +// SecurityData dados para análise de segurança +type SecurityData struct { + Logins string + SuspiciousConnections string + ConfigChanges string + UnusualProcesses string + AuthFailures string +} + +// GenerateExecutiveReport gera relatório executivo semanal/mensal +func (i *InsightsEngine) GenerateExecutiveReport(ctx context.Context, period string, data ReportData) (*ExecutiveReport, error) { + prompt := fmt.Sprintf(`Gere um relatório executivo de %s para liderança técnica: + +PERÍODO: %s + +KPIs: +- Uptime: %.2f%% +- MTTR: %s +- Incidentes: %d (vs período anterior: %+d) +- Alertas: %d +- Deploys: %d + +PRINCIPAIS INCIDENTES: +%s + +MELHORIAS IMPLEMENTADAS: +%s + +CUSTOS: +%s + +SEGURANÇA: +%s + +Gere relatório executivo em JSON: +{ + "period": "...", + "executive_summary": "resumo de 3-4 frases para C-level", + "key_metrics": {...}, + "achievements": ["conquista 1", "conquista 2"], + "challenges": ["desafio 1", "desafio 2"], + "action_items": [ + {"item": "...", "owner": "...", "due_date": "...", "priority": "high|medium|low"} + ], + "next_period_focus": ["foco 1", "foco 2"], + "budget_status": {"current": 1000, "projected": 1200, "variance": "..."}, + "risk_summary": "resumo de riscos" +}`, + period, + data.Period, + data.Uptime, + data.MTTR, + data.Incidents, + data.IncidentsDelta, + data.Alerts, + data.Deploys, + data.MainIncidents, + data.Improvements, + data.CostsSummary, + data.SecuritySummary, + ) + + response, err := i.engine.chat(ctx, prompt) + if err != nil { + return nil, err + } + + var report ExecutiveReport + if err := json.Unmarshal([]byte(response), &report); err != nil { + return nil, err + } + + return &report, nil +} + +// ReportData dados para relatório executivo +type ReportData struct { + Period string + Uptime float64 + MTTR string + Incidents int + IncidentsDelta int + Alerts int + Deploys int + MainIncidents string + Improvements string + CostsSummary string + SecuritySummary string +} + +// ExecutiveReport relatório executivo +type ExecutiveReport struct { + Period string `json:"period"` + ExecutiveSummary string `json:"executive_summary"` + Achievements []string `json:"achievements"` + Challenges []string `json:"challenges"` + ActionItems []ActionItem `json:"action_items"` + NextPeriodFocus []string `json:"next_period_focus"` + RiskSummary string `json:"risk_summary"` +} + +// ActionItem item de ação +type ActionItem struct { + Item string `json:"item"` + Owner string `json:"owner"` + DueDate string `json:"due_date"` + Priority string `json:"priority"` +} diff --git a/internal/ai/smart_alerts.go b/internal/ai/smart_alerts.go new file mode 100644 index 0000000..dca8c15 --- /dev/null +++ b/internal/ai/smart_alerts.go @@ -0,0 +1,266 @@ +package ai + +import ( + "context" + "encoding/json" + "fmt" + "time" +) + +// SmartAlertEngine motor de alertas inteligentes +type SmartAlertEngine struct { + engine *AIEngine + alertBuffer []RawAlert + correlationWindow time.Duration +} + +// RawAlert alerta bruto antes do processamento +type RawAlert struct { + ID string + Metric string + Host string + Value float64 + Threshold float64 + Severity string + Timestamp time.Time + Labels map[string]string +} + +// SmartAlert alerta processado com IA +type SmartAlert struct { + ID string `json:"id"` + Title string `json:"title"` + Description string `json:"description"` + Severity string `json:"severity"` + Category string `json:"category"` + AffectedHosts []string `json:"affected_hosts"` + RootCause string `json:"root_cause"` + Impact string `json:"impact"` + Suggestions []string `json:"suggestions"` + RelatedAlerts []string `json:"related_alerts"` + Runbook string `json:"runbook,omitempty"` + IsNoise bool `json:"is_noise"` + Confidence float64 `json:"confidence"` + CorrelationID string `json:"correlation_id,omitempty"` + Timestamp time.Time `json:"timestamp"` +} + +// AlertCorrelation grupo de alertas correlacionados +type AlertCorrelation struct { + ID string `json:"id"` + Title string `json:"title"` + Alerts []SmartAlert `json:"alerts"` + RootCause string `json:"root_cause"` + Impact string `json:"impact"` + Severity string `json:"severity"` + StartTime time.Time `json:"start_time"` + AffectedServices []string `json:"affected_services"` +} + +// NewSmartAlertEngine cria novo engine de alertas inteligentes +func NewSmartAlertEngine(engine *AIEngine) *SmartAlertEngine { + return &SmartAlertEngine{ + engine: engine, + alertBuffer: make([]RawAlert, 0), + correlationWindow: 5 * time.Minute, + } +} + +// ProcessAlert processa um alerta com IA +func (s *SmartAlertEngine) ProcessAlert(ctx context.Context, raw RawAlert, systemContext AlertContext) (*SmartAlert, error) { + // Adicionar ao buffer para correlação + s.alertBuffer = append(s.alertBuffer, raw) + s.cleanOldAlerts() + + prompt := fmt.Sprintf(`Analise este alerta e enriqueça com contexto inteligente: + +ALERTA: +- Métrica: %s +- Host: %s +- Valor atual: %.2f +- Threshold: %.2f +- Severidade: %s +- Timestamp: %s +- Labels: %v + +CONTEXTO DO SISTEMA: +- Deploys recentes: %s +- Alertas relacionados (últimos 5min): %d +- Incidentes abertos: %d +- Horário comercial: %v + +ALERTAS RECENTES NO MESMO HOST: +%s + +PADRÕES CONHECIDOS: +- Este tipo de alerta costuma ser: %s +- Falsos positivos históricos: %d%% + +Analise e responda em JSON: +{ + "title": "título conciso e descritivo", + "description": "descrição detalhada do que está acontecendo", + "severity": "info|warning|error|critical", + "category": "performance|availability|security|capacity|configuration", + "root_cause": "provável causa raiz", + "impact": "impacto no negócio/usuários", + "suggestions": ["ação sugerida 1", "ação sugerida 2"], + "is_noise": boolean (true se for falso positivo ou não acionável), + "confidence": 0.0-1.0, + "runbook": "link ou passos do runbook se existir" +}`, + raw.Metric, + raw.Host, + raw.Value, + raw.Threshold, + raw.Severity, + raw.Timestamp.Format(time.RFC3339), + raw.Labels, + systemContext.RecentDeploys, + s.countRecentAlerts(raw.Host), + systemContext.OpenIncidents, + systemContext.IsBusinessHours, + s.getRecentAlertsForHost(raw.Host), + systemContext.HistoricalPattern, + systemContext.FalsePositiveRate, + ) + + response, err := s.engine.chat(ctx, prompt) + if err != nil { + return nil, err + } + + var smart SmartAlert + if err := json.Unmarshal([]byte(response), &smart); err != nil { + return nil, err + } + + smart.ID = raw.ID + smart.AffectedHosts = []string{raw.Host} + smart.Timestamp = raw.Timestamp + + return &smart, nil +} + +// CorrelateAlerts agrupa alertas relacionados +func (s *SmartAlertEngine) CorrelateAlerts(ctx context.Context, alerts []SmartAlert) (*AlertCorrelation, error) { + if len(alerts) < 2 { + return nil, nil + } + + alertSummary := "" + for _, a := range alerts { + alertSummary += fmt.Sprintf("- [%s] %s on %s: %s\n", + a.Severity, a.Title, a.AffectedHosts[0], a.RootCause) + } + + prompt := fmt.Sprintf(`Analise estes alertas e identifique se estão relacionados: + +ALERTAS: +%s + +Determine: +1. Se os alertas estão correlacionados (mesmo incidente) +2. Qual é a causa raiz comum +3. Qual é o impacto real +4. Qual deve ser a severidade consolidada + +Responda em JSON: +{ + "correlated": boolean, + "title": "título do incidente consolidado", + "root_cause": "causa raiz comum", + "impact": "impacto consolidado", + "severity": "severidade do grupo", + "affected_services": ["serviço1", "serviço2"], + "timeline": "sequência de eventos", + "recommendations": ["recomendação 1", "recomendação 2"] +}`, alertSummary) + + response, err := s.engine.chat(ctx, prompt) + if err != nil { + return nil, err + } + + var result struct { + Correlated bool `json:"correlated"` + Title string `json:"title"` + RootCause string `json:"root_cause"` + Impact string `json:"impact"` + Severity string `json:"severity"` + AffectedServices []string `json:"affected_services"` + } + + if err := json.Unmarshal([]byte(response), &result); err != nil { + return nil, err + } + + if !result.Correlated { + return nil, nil + } + + return &AlertCorrelation{ + ID: fmt.Sprintf("corr_%d", time.Now().UnixNano()), + Title: result.Title, + Alerts: alerts, + RootCause: result.RootCause, + Impact: result.Impact, + Severity: result.Severity, + StartTime: alerts[0].Timestamp, + AffectedServices: result.AffectedServices, + }, nil +} + +// SuppressNoise filtra alertas que são ruído +func (s *SmartAlertEngine) SuppressNoise(alerts []SmartAlert) []SmartAlert { + filtered := make([]SmartAlert, 0) + for _, a := range alerts { + if !a.IsNoise && a.Confidence > 0.5 { + filtered = append(filtered, a) + } + } + return filtered +} + +func (s *SmartAlertEngine) cleanOldAlerts() { + cutoff := time.Now().Add(-s.correlationWindow) + filtered := make([]RawAlert, 0) + for _, a := range s.alertBuffer { + if a.Timestamp.After(cutoff) { + filtered = append(filtered, a) + } + } + s.alertBuffer = filtered +} + +func (s *SmartAlertEngine) countRecentAlerts(host string) int { + count := 0 + for _, a := range s.alertBuffer { + if a.Host == host { + count++ + } + } + return count +} + +func (s *SmartAlertEngine) getRecentAlertsForHost(host string) string { + result := "" + for _, a := range s.alertBuffer { + if a.Host == host { + result += fmt.Sprintf("- %s: %s = %.2f\n", a.Timestamp.Format("15:04:05"), a.Metric, a.Value) + } + } + if result == "" { + return "Nenhum alerta recente" + } + return result +} + +// AlertContext contexto para processamento de alertas +type AlertContext struct { + RecentDeploys string + OpenIncidents int + IsBusinessHours bool + HistoricalPattern string + FalsePositiveRate int +} diff --git a/internal/api/ai_handlers.go b/internal/api/ai_handlers.go new file mode 100644 index 0000000..4d525aa --- /dev/null +++ b/internal/api/ai_handlers.go @@ -0,0 +1,287 @@ +package api + +import ( + "github.com/bigtux/ophion/internal/ai" + "github.com/gofiber/fiber/v2" +) + +// AIHandlers handlers para endpoints de IA +type AIHandlers struct { + copilot *ai.OphionCopilot + autoHealer *ai.AutoHealer + smartAlerts *ai.SmartAlertEngine + insights *ai.InsightsEngine +} + +// NewAIHandlers cria handlers de IA +func NewAIHandlers(engine *ai.AIEngine) *AIHandlers { + return &AIHandlers{ + copilot: ai.NewOphionCopilot(engine), + autoHealer: ai.NewAutoHealer(engine, ai.AutoHealConfig{ + Enabled: true, + DryRun: true, // Começa em dry-run por segurança + }), + smartAlerts: ai.NewSmartAlertEngine(engine), + insights: ai.NewInsightsEngine(engine), + } +} + +// RegisterRoutes registra rotas de IA +func (h *AIHandlers) RegisterRoutes(api fiber.Router) { + aiGroup := api.Group("/ai") + + // Copilot - Chat com IA + aiGroup.Post("/copilot/chat", h.CopilotChat) + + // Auto-Healing + aiGroup.Post("/autohealing/analyze", h.AnalyzeIncident) + aiGroup.Post("/autohealing/execute", h.ExecuteHealing) + + // Smart Alerts + aiGroup.Post("/alerts/process", h.ProcessAlert) + aiGroup.Post("/alerts/correlate", h.CorrelateAlerts) + + // Insights + aiGroup.Get("/insights/daily", h.GetDailyInsights) + aiGroup.Get("/insights/security", h.GetSecurityInsights) + aiGroup.Get("/reports/executive", h.GetExecutiveReport) + + // Anomaly Detection + aiGroup.Post("/anomaly/detect", h.DetectAnomaly) + + // Predictions + aiGroup.Get("/predictions/capacity", h.GetCapacityPredictions) +} + +// CopilotChat handler para chat com o Copilot +// @Summary Chat with OPHION Copilot +// @Tags AI +// @Accept json +// @Produce json +// @Param request body CopilotChatRequest true "Chat request" +// @Success 200 {object} ai.CopilotResponse +// @Router /api/v1/ai/copilot/chat [post] +func (h *AIHandlers) CopilotChat(c *fiber.Ctx) error { + var req CopilotChatRequest + if err := c.BodyParser(&req); err != nil { + return c.Status(400).JSON(fiber.Map{"error": "Invalid request"}) + } + + // Obter contexto do sistema (simplificado) + sysCtx := ai.SystemContext{ + TotalHosts: 10, + ActiveAlerts: 2, + OpenIncidents: 0, + } + + response, err := h.copilot.Chat(c.Context(), req.SessionID, req.Message, sysCtx) + if err != nil { + return c.Status(500).JSON(fiber.Map{"error": err.Error()}) + } + + return c.JSON(response) +} + +// AnalyzeIncident analisa incidente para auto-healing +// @Summary Analyze incident for auto-healing +// @Tags AI +// @Accept json +// @Produce json +// @Param request body ai.Incident true "Incident data" +// @Success 200 {object} ai.HealingPlan +// @Router /api/v1/ai/autohealing/analyze [post] +func (h *AIHandlers) AnalyzeIncident(c *fiber.Ctx) error { + var incident ai.Incident + if err := c.BodyParser(&incident); err != nil { + return c.Status(400).JSON(fiber.Map{"error": "Invalid request"}) + } + + plan, err := h.autoHealer.Analyze(c.Context(), incident) + if err != nil { + return c.Status(500).JSON(fiber.Map{"error": err.Error()}) + } + + return c.JSON(plan) +} + +// ExecuteHealing executa plano de auto-healing +// @Summary Execute auto-healing plan +// @Tags AI +// @Accept json +// @Produce json +// @Param request body ai.HealingPlan true "Healing plan" +// @Success 200 {object} ai.HealingResult +// @Router /api/v1/ai/autohealing/execute [post] +func (h *AIHandlers) ExecuteHealing(c *fiber.Ctx) error { + var plan ai.HealingPlan + if err := c.BodyParser(&plan); err != nil { + return c.Status(400).JSON(fiber.Map{"error": "Invalid request"}) + } + + result, err := h.autoHealer.Execute(c.Context(), &plan) + if err != nil { + return c.Status(500).JSON(fiber.Map{"error": err.Error()}) + } + + return c.JSON(result) +} + +// ProcessAlert processa alerta com IA +// @Summary Process alert with AI enrichment +// @Tags AI +// @Accept json +// @Produce json +// @Param request body ProcessAlertRequest true "Alert data" +// @Success 200 {object} ai.SmartAlert +// @Router /api/v1/ai/alerts/process [post] +func (h *AIHandlers) ProcessAlert(c *fiber.Ctx) error { + var req ProcessAlertRequest + if err := c.BodyParser(&req); err != nil { + return c.Status(400).JSON(fiber.Map{"error": "Invalid request"}) + } + + alertCtx := ai.AlertContext{ + IsBusinessHours: true, + } + + smart, err := h.smartAlerts.ProcessAlert(c.Context(), req.Alert, alertCtx) + if err != nil { + return c.Status(500).JSON(fiber.Map{"error": err.Error()}) + } + + return c.JSON(smart) +} + +// CorrelateAlerts correlaciona alertas relacionados +// @Summary Correlate related alerts +// @Tags AI +// @Accept json +// @Produce json +// @Param request body CorrelateRequest true "Alerts to correlate" +// @Success 200 {object} ai.AlertCorrelation +// @Router /api/v1/ai/alerts/correlate [post] +func (h *AIHandlers) CorrelateAlerts(c *fiber.Ctx) error { + var req CorrelateRequest + if err := c.BodyParser(&req); err != nil { + return c.Status(400).JSON(fiber.Map{"error": "Invalid request"}) + } + + correlation, err := h.smartAlerts.CorrelateAlerts(c.Context(), req.Alerts) + if err != nil { + return c.Status(500).JSON(fiber.Map{"error": err.Error()}) + } + + if correlation == nil { + return c.JSON(fiber.Map{"correlated": false}) + } + + return c.JSON(correlation) +} + +// GetDailyInsights retorna insights diários +// @Summary Get daily AI insights +// @Tags AI +// @Produce json +// @Success 200 {object} ai.DailyInsights +// @Router /api/v1/ai/insights/daily [get] +func (h *AIHandlers) GetDailyInsights(c *fiber.Ctx) error { + // TODO: Buscar dados reais do sistema + data := ai.DailyData{ + Period: "últimas 24h", + TotalHosts: 10, + } + + insights, err := h.insights.GenerateDailyInsights(c.Context(), data) + if err != nil { + return c.Status(500).JSON(fiber.Map{"error": err.Error()}) + } + + return c.JSON(insights) +} + +// GetSecurityInsights retorna insights de segurança +// @Summary Get security insights +// @Tags AI +// @Produce json +// @Success 200 {array} ai.SecurityInsight +// @Router /api/v1/ai/insights/security [get] +func (h *AIHandlers) GetSecurityInsights(c *fiber.Ctx) error { + data := ai.SecurityData{} + + insights, err := h.insights.DetectSecurityAnomalies(c.Context(), data) + if err != nil { + return c.Status(500).JSON(fiber.Map{"error": err.Error()}) + } + + return c.JSON(insights) +} + +// GetExecutiveReport retorna relatório executivo +// @Summary Get executive report +// @Tags AI +// @Produce json +// @Param period query string false "Report period (weekly/monthly)" +// @Success 200 {object} ai.ExecutiveReport +// @Router /api/v1/ai/reports/executive [get] +func (h *AIHandlers) GetExecutiveReport(c *fiber.Ctx) error { + period := c.Query("period", "weekly") + + data := ai.ReportData{ + Period: period, + } + + report, err := h.insights.GenerateExecutiveReport(c.Context(), period, data) + if err != nil { + return c.Status(500).JSON(fiber.Map{"error": err.Error()}) + } + + return c.JSON(report) +} + +// DetectAnomaly detecta anomalias em métricas +// @Summary Detect anomalies in metrics +// @Tags AI +// @Accept json +// @Produce json +// @Param request body ai.MetricsData true "Metrics data" +// @Success 200 {object} ai.AnomalyResult +// @Router /api/v1/ai/anomaly/detect [post] +func (h *AIHandlers) DetectAnomaly(c *fiber.Ctx) error { + var data ai.MetricsData + if err := c.BodyParser(&data); err != nil { + return c.Status(400).JSON(fiber.Map{"error": "Invalid request"}) + } + + // TODO: Usar o engine diretamente + return c.JSON(fiber.Map{"message": "Anomaly detection endpoint"}) +} + +// GetCapacityPredictions retorna previsões de capacidade +// @Summary Get capacity predictions +// @Tags AI +// @Produce json +// @Param host query string false "Filter by host" +// @Success 200 {array} ai.PredictionResult +// @Router /api/v1/ai/predictions/capacity [get] +func (h *AIHandlers) GetCapacityPredictions(c *fiber.Ctx) error { + // TODO: Implementar busca de previsões + return c.JSON([]fiber.Map{}) +} + +// === Request/Response types === + +// CopilotChatRequest requisição para chat do copilot +type CopilotChatRequest struct { + SessionID string `json:"session_id"` + Message string `json:"message"` +} + +// ProcessAlertRequest requisição para processar alerta +type ProcessAlertRequest struct { + Alert ai.RawAlert `json:"alert"` +} + +// CorrelateRequest requisição para correlacionar alertas +type CorrelateRequest struct { + Alerts []ai.SmartAlert `json:"alerts"` +}