feat: Add AI engine and complete documentation

- Add AI engine with OpenAI integration - Anomaly detection - Log analysis with pattern clustering - Capacity prediction - Complete installation manual (Portuguese) - Docker monitoring guide - APM integration guide (Node.js, Python, Go) - Alert configuration guide
2026-02-05 22:45:05 -03:00
parent b64cb8e133
commit d58ac37e39
3 changed files with 1685 additions and 0 deletions
--- a/internal/ai/engine.go
+++ b/internal/ai/engine.go
@@ -0,0 +1,336 @@
+package ai
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"os"
+	"time"
+)
+
+// AIEngine é o motor de inteligência artificial do OPHION
+type AIEngine struct {
+	apiKey    string
+	model     string
+	baseURL   string
+	client    *http.Client
+	enabled   bool
+}
+
+// Config configuração do AI Engine
+type Config struct {
+	APIKey  string
+	Model   string
+	Enabled bool
+}
+
+// AnomalyResult resultado da detecção de anomalias
+type AnomalyResult struct {
+	IsAnomaly   bool    `json:"is_anomaly"`
+	Confidence  float64 `json:"confidence"`
+	Severity    string  `json:"severity"`
+	Description string  `json:"description"`
+	Suggestions []string `json:"suggestions"`
+	RelatedEvents []string `json:"related_events"`
+}
+
+// LogAnalysisResult resultado da análise de logs
+type LogAnalysisResult struct {
+	Clusters    []LogCluster `json:"clusters"`
+	RootCause   string       `json:"root_cause"`
+	Suggestions []string     `json:"suggestions"`
+}
+
+// LogCluster agrupamento de logs similares
+type LogCluster struct {
+	Pattern     string   `json:"pattern"`
+	Count       int      `json:"count"`
+	Severity    string   `json:"severity"`
+	Examples    []string `json:"examples"`
+	Description string   `json:"description"`
+}
+
+// PredictionResult resultado de previsões
+type PredictionResult struct {
+	Metric      string    `json:"metric"`
+	CurrentValue float64  `json:"current_value"`
+	PredictedValue float64 `json:"predicted_value"`
+	TimeToThreshold string `json:"time_to_threshold"`
+	Confidence  float64   `json:"confidence"`
+	Recommendation string `json:"recommendation"`
+}
+
+// NewAIEngine cria nova instância do AI Engine
+func NewAIEngine(config Config) *AIEngine {
+	return &AIEngine{
+		apiKey:  config.APIKey,
+		model:   config.Model,
+		baseURL: "https://api.openai.com/v1",
+		client: &http.Client{
+			Timeout: 30 * time.Second,
+		},
+		enabled: config.Enabled,
+	}
+}
+
+// NewFromEnv cria AI Engine a partir de variáveis de ambiente
+func NewFromEnv() *AIEngine {
+	return NewAIEngine(Config{
+		APIKey:  os.Getenv("OPENAI_API_KEY"),
+		Model:   getEnvOrDefault("AI_MODEL", "gpt-4o-mini"),
+		Enabled: os.Getenv("AI_ENABLED") == "true",
+	})
+}
+
+func getEnvOrDefault(key, defaultValue string) string {
+	if value := os.Getenv(key); value != "" {
+		return value
+	}
+	return defaultValue
+}
+
+// DetectAnomaly detecta anomalias em métricas
+func (e *AIEngine) DetectAnomaly(ctx context.Context, metrics MetricsData) (*AnomalyResult, error) {
+	if !e.enabled {
+		return nil, fmt.Errorf("AI engine is disabled")
+	}
+
+	prompt := fmt.Sprintf(`Analise as seguintes métricas de servidor e detecte anomalias:
+
+Host: %s
+Período: últimas %d horas
+Métricas atuais:
+- CPU: %.2f%% (baseline: %.2f%%)
+- Memória: %.2f%% (baseline: %.2f%%)
+- Disco: %.2f%% (baseline: %.2f%%)
+- Conexões de rede: %d (baseline: %d)
+
+Histórico recente (últimas 24h):
+%s
+
+Responda em JSON com o formato:
+{
+  "is_anomaly": boolean,
+  "confidence": 0.0-1.0,
+  "severity": "low|medium|high|critical",
+  "description": "descrição da anomalia",
+  "suggestions": ["sugestão 1", "sugestão 2"],
+  "related_events": ["evento relacionado 1"]
+}`,
+		metrics.Hostname,
+		metrics.PeriodHours,
+		metrics.CPU.Current, metrics.CPU.Baseline,
+		metrics.Memory.Current, metrics.Memory.Baseline,
+		metrics.Disk.Current, metrics.Disk.Baseline,
+		metrics.Network.Connections, metrics.Network.BaselineConnections,
+		metrics.History,
+	)
+
+	response, err := e.chat(ctx, prompt)
+	if err != nil {
+		return nil, err
+	}
+
+	var result AnomalyResult
+	if err := json.Unmarshal([]byte(response), &result); err != nil {
+		return nil, fmt.Errorf("failed to parse AI response: %w", err)
+	}
+
+	return &result, nil
+}
+
+// AnalyzeLogs analisa logs e identifica padrões
+func (e *AIEngine) AnalyzeLogs(ctx context.Context, logs []LogEntry) (*LogAnalysisResult, error) {
+	if !e.enabled {
+		return nil, fmt.Errorf("AI engine is disabled")
+	}
+
+	// Preparar logs para análise
+	logText := ""
+	for i, log := range logs {
+		if i >= 100 { // Limitar a 100 logs
+			break
+		}
+		logText += fmt.Sprintf("[%s] %s: %s\n", log.Timestamp, log.Level, log.Message)
+	}
+
+	prompt := fmt.Sprintf(`Analise os seguintes logs de aplicação e identifique padrões, erros e possíveis causas raiz:
+
+Logs:
+%s
+
+Responda em JSON com o formato:
+{
+  "clusters": [
+    {
+      "pattern": "padrão identificado",
+      "count": 10,
+      "severity": "error|warning|info",
+      "examples": ["exemplo 1", "exemplo 2"],
+      "description": "descrição do cluster"
+    }
+  ],
+  "root_cause": "possível causa raiz dos problemas",
+  "suggestions": ["sugestão de correção 1", "sugestão 2"]
+}`, logText)
+
+	response, err := e.chat(ctx, prompt)
+	if err != nil {
+		return nil, err
+	}
+
+	var result LogAnalysisResult
+	if err := json.Unmarshal([]byte(response), &result); err != nil {
+		return nil, fmt.Errorf("failed to parse AI response: %w", err)
+	}
+
+	return &result, nil
+}
+
+// PredictCapacity prevê capacidade futura
+func (e *AIEngine) PredictCapacity(ctx context.Context, data CapacityData) (*PredictionResult, error) {
+	if !e.enabled {
+		return nil, fmt.Errorf("AI engine is disabled")
+	}
+
+	prompt := fmt.Sprintf(`Analise os dados históricos e preveja quando o recurso atingirá o limite:
+
+Recurso: %s
+Valor atual: %.2f%%
+Limite crítico: %.2f%%
+Dados históricos (últimos 30 dias):
+%s
+
+Responda em JSON com o formato:
+{
+  "metric": "nome da métrica",
+  "current_value": 75.5,
+  "predicted_value": 95.0,
+  "time_to_threshold": "em 7 dias",
+  "confidence": 0.85,
+  "recommendation": "recomendação de ação"
+}`, data.Metric, data.CurrentValue, data.Threshold, data.History)
+
+	response, err := e.chat(ctx, prompt)
+	if err != nil {
+		return nil, err
+	}
+
+	var result PredictionResult
+	if err := json.Unmarshal([]byte(response), &result); err != nil {
+		return nil, fmt.Errorf("failed to parse AI response: %w", err)
+	}
+
+	return &result, nil
+}
+
+// chat envia mensagem para a API da OpenAI
+func (e *AIEngine) chat(ctx context.Context, prompt string) (string, error) {
+	requestBody := map[string]interface{}{
+		"model": e.model,
+		"messages": []map[string]string{
+			{
+				"role":    "system",
+				"content": "Você é um especialista em observabilidade e monitoramento de infraestrutura. Analise os dados fornecidos e forneça insights acionáveis. Sempre responda em JSON válido.",
+			},
+			{
+				"role":    "user",
+				"content": prompt,
+			},
+		},
+		"temperature": 0.3,
+		"max_tokens":  2000,
+	}
+
+	jsonBody, err := json.Marshal(requestBody)
+	if err != nil {
+		return "", err
+	}
+
+	req, err := http.NewRequestWithContext(ctx, "POST", e.baseURL+"/chat/completions", bytes.NewBuffer(jsonBody))
+	if err != nil {
+		return "", err
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", "Bearer "+e.apiKey)
+
+	resp, err := e.client.Do(req)
+	if err != nil {
+		return "", err
+	}
+	defer resp.Body.Close()
+
+	var result struct {
+		Choices []struct {
+			Message struct {
+				Content string `json:"content"`
+			} `json:"message"`
+		} `json:"choices"`
+		Error struct {
+			Message string `json:"message"`
+		} `json:"error"`
+	}
+
+	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
+		return "", err
+	}
+
+	if result.Error.Message != "" {
+		return "", fmt.Errorf("OpenAI error: %s", result.Error.Message)
+	}
+
+	if len(result.Choices) == 0 {
+		return "", fmt.Errorf("no response from AI")
+	}
+
+	return result.Choices[0].Message.Content, nil
+}
+
+// IsEnabled verifica se o AI Engine está habilitado
+func (e *AIEngine) IsEnabled() bool {
+	return e.enabled
+}
+
+// === Tipos de dados auxiliares ===
+
+// MetricsData dados de métricas para análise
+type MetricsData struct {
+	Hostname    string
+	PeriodHours int
+	CPU         MetricValue
+	Memory      MetricValue
+	Disk        MetricValue
+	Network     NetworkMetric
+	History     string
+}
+
+// MetricValue valor de métrica com baseline
+type MetricValue struct {
+	Current  float64
+	Baseline float64
+}
+
+// NetworkMetric métricas de rede
+type NetworkMetric struct {
+	Connections         int
+	BaselineConnections int
+}
+
+// LogEntry entrada de log
+type LogEntry struct {
+	Timestamp string
+	Level     string
+	Message   string
+	Source    string
+}
+
+// CapacityData dados para previsão de capacidade
+type CapacityData struct {
+	Metric       string
+	CurrentValue float64
+	Threshold    float64
+	History      string
+}