feat: Add AI differentiators

🤖 OPHION Copilot
- Interactive AI assistant for troubleshooting
- Context-aware conversations
- Actionable suggestions with commands

🔧 Auto-Healing
- AI-powered incident analysis
- Automatic remediation plans
- Safe execution with dry-run mode

🚨 Smart Alerts
- Noise reduction
- Alert correlation
- Root cause analysis
- Impact assessment

📊 AI Insights
- Daily insights generation
- Security anomaly detection
- Cost optimization suggestions
- Capacity predictions
- Executive reports

🛡️ Security
- Behavioral anomaly detection
- Intrusion attempt identification
- Compliance monitoring
This commit is contained in:
2026-02-05 22:48:10 -03:00
parent d58ac37e39
commit 369373b387
5 changed files with 1473 additions and 0 deletions

266
internal/ai/smart_alerts.go Normal file
View File

@@ -0,0 +1,266 @@
package ai
import (
"context"
"encoding/json"
"fmt"
"time"
)
// SmartAlertEngine motor de alertas inteligentes
type SmartAlertEngine struct {
engine *AIEngine
alertBuffer []RawAlert
correlationWindow time.Duration
}
// RawAlert alerta bruto antes do processamento
type RawAlert struct {
ID string
Metric string
Host string
Value float64
Threshold float64
Severity string
Timestamp time.Time
Labels map[string]string
}
// SmartAlert alerta processado com IA
type SmartAlert struct {
ID string `json:"id"`
Title string `json:"title"`
Description string `json:"description"`
Severity string `json:"severity"`
Category string `json:"category"`
AffectedHosts []string `json:"affected_hosts"`
RootCause string `json:"root_cause"`
Impact string `json:"impact"`
Suggestions []string `json:"suggestions"`
RelatedAlerts []string `json:"related_alerts"`
Runbook string `json:"runbook,omitempty"`
IsNoise bool `json:"is_noise"`
Confidence float64 `json:"confidence"`
CorrelationID string `json:"correlation_id,omitempty"`
Timestamp time.Time `json:"timestamp"`
}
// AlertCorrelation grupo de alertas correlacionados
type AlertCorrelation struct {
ID string `json:"id"`
Title string `json:"title"`
Alerts []SmartAlert `json:"alerts"`
RootCause string `json:"root_cause"`
Impact string `json:"impact"`
Severity string `json:"severity"`
StartTime time.Time `json:"start_time"`
AffectedServices []string `json:"affected_services"`
}
// NewSmartAlertEngine cria novo engine de alertas inteligentes
func NewSmartAlertEngine(engine *AIEngine) *SmartAlertEngine {
return &SmartAlertEngine{
engine: engine,
alertBuffer: make([]RawAlert, 0),
correlationWindow: 5 * time.Minute,
}
}
// ProcessAlert processa um alerta com IA
func (s *SmartAlertEngine) ProcessAlert(ctx context.Context, raw RawAlert, systemContext AlertContext) (*SmartAlert, error) {
// Adicionar ao buffer para correlação
s.alertBuffer = append(s.alertBuffer, raw)
s.cleanOldAlerts()
prompt := fmt.Sprintf(`Analise este alerta e enriqueça com contexto inteligente:
ALERTA:
- Métrica: %s
- Host: %s
- Valor atual: %.2f
- Threshold: %.2f
- Severidade: %s
- Timestamp: %s
- Labels: %v
CONTEXTO DO SISTEMA:
- Deploys recentes: %s
- Alertas relacionados (últimos 5min): %d
- Incidentes abertos: %d
- Horário comercial: %v
ALERTAS RECENTES NO MESMO HOST:
%s
PADRÕES CONHECIDOS:
- Este tipo de alerta costuma ser: %s
- Falsos positivos históricos: %d%%
Analise e responda em JSON:
{
"title": "título conciso e descritivo",
"description": "descrição detalhada do que está acontecendo",
"severity": "info|warning|error|critical",
"category": "performance|availability|security|capacity|configuration",
"root_cause": "provável causa raiz",
"impact": "impacto no negócio/usuários",
"suggestions": ["ação sugerida 1", "ação sugerida 2"],
"is_noise": boolean (true se for falso positivo ou não acionável),
"confidence": 0.0-1.0,
"runbook": "link ou passos do runbook se existir"
}`,
raw.Metric,
raw.Host,
raw.Value,
raw.Threshold,
raw.Severity,
raw.Timestamp.Format(time.RFC3339),
raw.Labels,
systemContext.RecentDeploys,
s.countRecentAlerts(raw.Host),
systemContext.OpenIncidents,
systemContext.IsBusinessHours,
s.getRecentAlertsForHost(raw.Host),
systemContext.HistoricalPattern,
systemContext.FalsePositiveRate,
)
response, err := s.engine.chat(ctx, prompt)
if err != nil {
return nil, err
}
var smart SmartAlert
if err := json.Unmarshal([]byte(response), &smart); err != nil {
return nil, err
}
smart.ID = raw.ID
smart.AffectedHosts = []string{raw.Host}
smart.Timestamp = raw.Timestamp
return &smart, nil
}
// CorrelateAlerts agrupa alertas relacionados
func (s *SmartAlertEngine) CorrelateAlerts(ctx context.Context, alerts []SmartAlert) (*AlertCorrelation, error) {
if len(alerts) < 2 {
return nil, nil
}
alertSummary := ""
for _, a := range alerts {
alertSummary += fmt.Sprintf("- [%s] %s on %s: %s\n",
a.Severity, a.Title, a.AffectedHosts[0], a.RootCause)
}
prompt := fmt.Sprintf(`Analise estes alertas e identifique se estão relacionados:
ALERTAS:
%s
Determine:
1. Se os alertas estão correlacionados (mesmo incidente)
2. Qual é a causa raiz comum
3. Qual é o impacto real
4. Qual deve ser a severidade consolidada
Responda em JSON:
{
"correlated": boolean,
"title": "título do incidente consolidado",
"root_cause": "causa raiz comum",
"impact": "impacto consolidado",
"severity": "severidade do grupo",
"affected_services": ["serviço1", "serviço2"],
"timeline": "sequência de eventos",
"recommendations": ["recomendação 1", "recomendação 2"]
}`, alertSummary)
response, err := s.engine.chat(ctx, prompt)
if err != nil {
return nil, err
}
var result struct {
Correlated bool `json:"correlated"`
Title string `json:"title"`
RootCause string `json:"root_cause"`
Impact string `json:"impact"`
Severity string `json:"severity"`
AffectedServices []string `json:"affected_services"`
}
if err := json.Unmarshal([]byte(response), &result); err != nil {
return nil, err
}
if !result.Correlated {
return nil, nil
}
return &AlertCorrelation{
ID: fmt.Sprintf("corr_%d", time.Now().UnixNano()),
Title: result.Title,
Alerts: alerts,
RootCause: result.RootCause,
Impact: result.Impact,
Severity: result.Severity,
StartTime: alerts[0].Timestamp,
AffectedServices: result.AffectedServices,
}, nil
}
// SuppressNoise filtra alertas que são ruído
func (s *SmartAlertEngine) SuppressNoise(alerts []SmartAlert) []SmartAlert {
filtered := make([]SmartAlert, 0)
for _, a := range alerts {
if !a.IsNoise && a.Confidence > 0.5 {
filtered = append(filtered, a)
}
}
return filtered
}
func (s *SmartAlertEngine) cleanOldAlerts() {
cutoff := time.Now().Add(-s.correlationWindow)
filtered := make([]RawAlert, 0)
for _, a := range s.alertBuffer {
if a.Timestamp.After(cutoff) {
filtered = append(filtered, a)
}
}
s.alertBuffer = filtered
}
func (s *SmartAlertEngine) countRecentAlerts(host string) int {
count := 0
for _, a := range s.alertBuffer {
if a.Host == host {
count++
}
}
return count
}
func (s *SmartAlertEngine) getRecentAlertsForHost(host string) string {
result := ""
for _, a := range s.alertBuffer {
if a.Host == host {
result += fmt.Sprintf("- %s: %s = %.2f\n", a.Timestamp.Format("15:04:05"), a.Metric, a.Value)
}
}
if result == "" {
return "Nenhum alerta recente"
}
return result
}
// AlertContext contexto para processamento de alertas
type AlertContext struct {
RecentDeploys string
OpenIncidents int
IsBusinessHours bool
HistoricalPattern string
FalsePositiveRate int
}