ophion/internal/ai/smart_alerts.go

package ai

import (
	"context"
	"encoding/json"
	"fmt"
	"time"
)

// SmartAlertEngine motor de alertas inteligentes
type SmartAlertEngine struct {
	engine        *AIEngine
	alertBuffer   []RawAlert
	correlationWindow time.Duration
}

// RawAlert alerta bruto antes do processamento
type RawAlert struct {
	ID          string
	Metric      string
	Host        string
	Value       float64
	Threshold   float64
	Severity    string
	Timestamp   time.Time
	Labels      map[string]string
}

// SmartAlert alerta processado com IA
type SmartAlert struct {
	ID              string          `json:"id"`
	Title           string          `json:"title"`
	Description     string          `json:"description"`
	Severity        string          `json:"severity"`
	Category        string          `json:"category"`
	AffectedHosts   []string        `json:"affected_hosts"`
	RootCause       string          `json:"root_cause"`
	Impact          string          `json:"impact"`
	Suggestions     []string        `json:"suggestions"`
	RelatedAlerts   []string        `json:"related_alerts"`
	Runbook         string          `json:"runbook,omitempty"`
	IsNoise         bool            `json:"is_noise"`
	Confidence      float64         `json:"confidence"`
	CorrelationID   string          `json:"correlation_id,omitempty"`
	Timestamp       time.Time       `json:"timestamp"`
}

// AlertCorrelation grupo de alertas correlacionados
type AlertCorrelation struct {
	ID              string       `json:"id"`
	Title           string       `json:"title"`
	Alerts          []SmartAlert `json:"alerts"`
	RootCause       string       `json:"root_cause"`
	Impact          string       `json:"impact"`
	Severity        string       `json:"severity"`
	StartTime       time.Time    `json:"start_time"`
	AffectedServices []string    `json:"affected_services"`
}

// NewSmartAlertEngine cria novo engine de alertas inteligentes
func NewSmartAlertEngine(engine *AIEngine) *SmartAlertEngine {
	return &SmartAlertEngine{
		engine:            engine,
		alertBuffer:       make([]RawAlert, 0),
		correlationWindow: 5 * time.Minute,
	}
}

// ProcessAlert processa um alerta com IA
func (s *SmartAlertEngine) ProcessAlert(ctx context.Context, raw RawAlert, systemContext AlertContext) (*SmartAlert, error) {
	// Adicionar ao buffer para correlação
	s.alertBuffer = append(s.alertBuffer, raw)
	s.cleanOldAlerts()

	prompt := fmt.Sprintf(`Analise este alerta e enriqueça com contexto inteligente:

ALERTA:
- Métrica: %s
- Host: %s
- Valor atual: %.2f
- Threshold: %.2f
- Severidade: %s
- Timestamp: %s
- Labels: %v

CONTEXTO DO SISTEMA:
- Deploys recentes: %s
- Alertas relacionados (últimos 5min): %d
- Incidentes abertos: %d
- Horário comercial: %v

ALERTAS RECENTES NO MESMO HOST:
%s

PADRÕES CONHECIDOS:
- Este tipo de alerta costuma ser: %s
- Falsos positivos históricos: %d%%

Analise e responda em JSON:
{
  "title": "título conciso e descritivo",
  "description": "descrição detalhada do que está acontecendo",
  "severity": "info|warning|error|critical",
  "category": "performance|availability|security|capacity|configuration",
  "root_cause": "provável causa raiz",
  "impact": "impacto no negócio/usuários",
  "suggestions": ["ação sugerida 1", "ação sugerida 2"],
  "is_noise": boolean (true se for falso positivo ou não acionável),
  "confidence": 0.0-1.0,
  "runbook": "link ou passos do runbook se existir"
}`,
		raw.Metric,
		raw.Host,
		raw.Value,
		raw.Threshold,
		raw.Severity,
		raw.Timestamp.Format(time.RFC3339),
		raw.Labels,
		systemContext.RecentDeploys,
		s.countRecentAlerts(raw.Host),
		systemContext.OpenIncidents,
		systemContext.IsBusinessHours,
		s.getRecentAlertsForHost(raw.Host),
		systemContext.HistoricalPattern,
		systemContext.FalsePositiveRate,
	)

	response, err := s.engine.chat(ctx, prompt)
	if err != nil {
		return nil, err
	}

	var smart SmartAlert
	if err := json.Unmarshal([]byte(response), &smart); err != nil {
		return nil, err
	}

	smart.ID = raw.ID
	smart.AffectedHosts = []string{raw.Host}
	smart.Timestamp = raw.Timestamp

	return &smart, nil
}

// CorrelateAlerts agrupa alertas relacionados
func (s *SmartAlertEngine) CorrelateAlerts(ctx context.Context, alerts []SmartAlert) (*AlertCorrelation, error) {
	if len(alerts) < 2 {
		return nil, nil
	}

	alertSummary := ""
	for _, a := range alerts {
		alertSummary += fmt.Sprintf("- [%s] %s on %s: %s\n",
			a.Severity, a.Title, a.AffectedHosts[0], a.RootCause)
	}

	prompt := fmt.Sprintf(`Analise estes alertas e identifique se estão relacionados:

ALERTAS:
%s

Determine:
1. Se os alertas estão correlacionados (mesmo incidente)
2. Qual é a causa raiz comum
3. Qual é o impacto real
4. Qual deve ser a severidade consolidada

Responda em JSON:
{
  "correlated": boolean,
  "title": "título do incidente consolidado",
  "root_cause": "causa raiz comum",
  "impact": "impacto consolidado",
  "severity": "severidade do grupo",
  "affected_services": ["serviço1", "serviço2"],
  "timeline": "sequência de eventos",
  "recommendations": ["recomendação 1", "recomendação 2"]
}`, alertSummary)

	response, err := s.engine.chat(ctx, prompt)
	if err != nil {
		return nil, err
	}

	var result struct {
		Correlated       bool     `json:"correlated"`
		Title           string   `json:"title"`
		RootCause       string   `json:"root_cause"`
		Impact          string   `json:"impact"`
		Severity        string   `json:"severity"`
		AffectedServices []string `json:"affected_services"`
	}

	if err := json.Unmarshal([]byte(response), &result); err != nil {
		return nil, err
	}

	if !result.Correlated {
		return nil, nil
	}

	return &AlertCorrelation{
		ID:               fmt.Sprintf("corr_%d", time.Now().UnixNano()),
		Title:            result.Title,
		Alerts:           alerts,
		RootCause:        result.RootCause,
		Impact:           result.Impact,
		Severity:         result.Severity,
		StartTime:        alerts[0].Timestamp,
		AffectedServices: result.AffectedServices,
	}, nil
}

// SuppressNoise filtra alertas que são ruído
func (s *SmartAlertEngine) SuppressNoise(alerts []SmartAlert) []SmartAlert {
	filtered := make([]SmartAlert, 0)
	for _, a := range alerts {
		if !a.IsNoise && a.Confidence > 0.5 {
			filtered = append(filtered, a)
		}
	}
	return filtered
}

func (s *SmartAlertEngine) cleanOldAlerts() {
	cutoff := time.Now().Add(-s.correlationWindow)
	filtered := make([]RawAlert, 0)
	for _, a := range s.alertBuffer {
		if a.Timestamp.After(cutoff) {
			filtered = append(filtered, a)
		}
	}
	s.alertBuffer = filtered
}

func (s *SmartAlertEngine) countRecentAlerts(host string) int {
	count := 0
	for _, a := range s.alertBuffer {
		if a.Host == host {
			count++
		}
	}
	return count
}

func (s *SmartAlertEngine) getRecentAlertsForHost(host string) string {
	result := ""
	for _, a := range s.alertBuffer {
		if a.Host == host {
			result += fmt.Sprintf("- %s: %s = %.2f\n", a.Timestamp.Format("15:04:05"), a.Metric, a.Value)
		}
	}
	if result == "" {
		return "Nenhum alerta recente"
	}
	return result
}

// AlertContext contexto para processamento de alertas
type AlertContext struct {
	RecentDeploys     string
	OpenIncidents     int
	IsBusinessHours   bool
	HistoricalPattern string
	FalsePositiveRate int
}