fix: add go.sum and fixes

This commit is contained in:
2026-02-06 14:26:15 -03:00
parent cf2b4f7b91
commit d6b08cb586
36 changed files with 3613 additions and 423 deletions

View File

@@ -7,118 +7,326 @@ import (
"log"
"net/http"
"os"
"os/exec"
"os/signal"
"runtime"
"strings"
"syscall"
"time"
"github.com/shirou/gopsutil/v3/cpu"
"github.com/shirou/gopsutil/v3/disk"
"github.com/shirou/gopsutil/v3/host"
"github.com/shirou/gopsutil/v3/load"
"github.com/shirou/gopsutil/v3/mem"
"github.com/shirou/gopsutil/v3/net"
psnet "github.com/shirou/gopsutil/v3/net"
)
type Metrics struct {
Hostname string `json:"hostname"`
Timestamp time.Time `json:"timestamp"`
CPU CPUMetric `json:"cpu"`
Memory MemMetric `json:"memory"`
Disk []DiskMetric `json:"disk"`
Network NetMetric `json:"network"`
// ═══════════════════════════════════════════════════════════
// 🐍 OPHION Agent - Observability Collector
// ═══════════════════════════════════════════════════════════
type Config struct {
ServerURL string
APIKey string
Hostname string
CollectInterval time.Duration
DockerEnabled bool
}
type CPUMetric struct {
UsagePercent float64 `json:"usage_percent"`
Cores int `json:"cores"`
type Metric struct {
Timestamp time.Time `json:"timestamp"`
Service string `json:"service"`
Host string `json:"host"`
Name string `json:"name"`
Value float64 `json:"value"`
MetricType string `json:"metric_type"`
Tags map[string]string `json:"tags,omitempty"`
}
type MemMetric struct {
Total uint64 `json:"total"`
Used uint64 `json:"used"`
UsedPercent float64 `json:"used_percent"`
}
type DiskMetric struct {
Path string `json:"path"`
Total uint64 `json:"total"`
Used uint64 `json:"used"`
UsedPercent float64 `json:"used_percent"`
}
type NetMetric struct {
BytesSent uint64 `json:"bytes_sent"`
BytesRecv uint64 `json:"bytes_recv"`
type ContainerStats struct {
ID string `json:"id"`
Name string `json:"name"`
CPUPercent float64 `json:"cpu_percent"`
MemoryUsage uint64 `json:"memory_usage"`
MemoryLimit uint64 `json:"memory_limit"`
MemoryPercent float64 `json:"memory_percent"`
NetRx uint64 `json:"net_rx"`
NetTx uint64 `json:"net_tx"`
State string `json:"state"`
}
func main() {
serverURL := os.Getenv("OPHION_SERVER")
if serverURL == "" {
serverURL = "http://localhost:8080"
}
config := loadConfig()
apiKey := os.Getenv("OPHION_API_KEY")
if apiKey == "" {
log.Fatal("OPHION_API_KEY is required")
log.Printf("🐍 OPHION Agent starting")
log.Printf(" Server: %s", config.ServerURL)
log.Printf(" Host: %s", config.Hostname)
log.Printf(" Interval: %s", config.CollectInterval)
log.Printf(" Docker: %v", config.DockerEnabled)
// Handle shutdown
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
ticker := time.NewTicker(config.CollectInterval)
defer ticker.Stop()
// Collect immediately
collect(config)
for {
select {
case <-sigCh:
log.Println("🛑 Shutting down agent...")
return
case <-ticker.C:
collect(config)
}
}
}
func loadConfig() *Config {
hostname, _ := os.Hostname()
interval := 30 * time.Second
log.Printf("🐍 OPHION Agent starting - reporting to %s every %s", serverURL, interval)
if v := os.Getenv("OPHION_INTERVAL"); v != "" {
if d, err := time.ParseDuration(v); err == nil {
interval = d
}
}
ticker := time.NewTicker(interval)
for range ticker.C {
metrics := collectMetrics()
sendMetrics(serverURL, apiKey, metrics)
dockerEnabled := true
if v := os.Getenv("OPHION_DOCKER"); v == "false" || v == "0" {
dockerEnabled = false
}
return &Config{
ServerURL: getEnv("OPHION_SERVER", "http://localhost:8080"),
APIKey: getEnv("OPHION_API_KEY", ""),
Hostname: getEnv("OPHION_HOSTNAME", hostname),
CollectInterval: interval,
DockerEnabled: dockerEnabled,
}
}
func collectMetrics() Metrics {
hostname, _ := os.Hostname()
cpuPercent, _ := cpu.Percent(time.Second, false)
cpuUsage := 0.0
if len(cpuPercent) > 0 {
cpuUsage = cpuPercent[0]
func getEnv(key, def string) string {
if v := os.Getenv(key); v != "" {
return v
}
return def
}
func collect(config *Config) {
var metrics []Metric
now := time.Now()
host := config.Hostname
// System metrics
metrics = append(metrics, collectSystemMetrics(now, host)...)
// Docker metrics
if config.DockerEnabled {
metrics = append(metrics, collectDockerMetrics(now, host)...)
}
memInfo, _ := mem.VirtualMemory()
diskInfo, _ := disk.Usage("/")
disks := []DiskMetric{{
Path: "/",
Total: diskInfo.Total,
Used: diskInfo.Used,
UsedPercent: diskInfo.UsedPercent,
}}
netIO, _ := net.IOCounters(false)
netMetric := NetMetric{}
if len(netIO) > 0 {
netMetric.BytesSent = netIO[0].BytesSent
netMetric.BytesRecv = netIO[0].BytesRecv
}
return Metrics{
Hostname: hostname,
Timestamp: time.Now(),
CPU: CPUMetric{
UsagePercent: cpuUsage,
Cores: runtime.NumCPU(),
},
Memory: MemMetric{
Total: memInfo.Total,
Used: memInfo.Used,
UsedPercent: memInfo.UsedPercent,
},
Disk: disks,
Network: netMetric,
// Send to server
if len(metrics) > 0 {
sendMetrics(config, metrics)
}
}
func sendMetrics(serverURL, apiKey string, metrics Metrics) {
data, _ := json.Marshal(metrics)
req, _ := http.NewRequest("POST", serverURL+"/api/v1/metrics", bytes.NewBuffer(data))
func collectSystemMetrics(now time.Time, hostname string) []Metric {
var metrics []Metric
svc := "system"
// CPU
cpuPercent, err := cpu.Percent(time.Second, false)
if err == nil && len(cpuPercent) > 0 {
metrics = append(metrics, Metric{
Timestamp: now, Service: svc, Host: hostname,
Name: "cpu.usage_percent", Value: cpuPercent[0], MetricType: "gauge",
})
}
// Load average
loadAvg, err := load.Avg()
if err == nil {
metrics = append(metrics, Metric{
Timestamp: now, Service: svc, Host: hostname,
Name: "cpu.load_avg_1", Value: loadAvg.Load1, MetricType: "gauge",
})
metrics = append(metrics, Metric{
Timestamp: now, Service: svc, Host: hostname,
Name: "cpu.load_avg_5", Value: loadAvg.Load5, MetricType: "gauge",
})
}
// Memory
memInfo, err := mem.VirtualMemory()
if err == nil {
metrics = append(metrics, Metric{
Timestamp: now, Service: svc, Host: hostname,
Name: "memory.used_percent", Value: memInfo.UsedPercent, MetricType: "gauge",
})
metrics = append(metrics, Metric{
Timestamp: now, Service: svc, Host: hostname,
Name: "memory.used_bytes", Value: float64(memInfo.Used), MetricType: "gauge",
})
metrics = append(metrics, Metric{
Timestamp: now, Service: svc, Host: hostname,
Name: "memory.available_bytes", Value: float64(memInfo.Available), MetricType: "gauge",
})
}
// Disk
partitions, _ := disk.Partitions(false)
for _, p := range partitions {
if strings.HasPrefix(p.Mountpoint, "/snap") ||
strings.HasPrefix(p.Mountpoint, "/sys") ||
strings.HasPrefix(p.Mountpoint, "/proc") ||
strings.HasPrefix(p.Mountpoint, "/dev") ||
strings.HasPrefix(p.Mountpoint, "/run") {
continue
}
usage, err := disk.Usage(p.Mountpoint)
if err != nil {
continue
}
tags := map[string]string{"path": p.Mountpoint, "device": p.Device}
metrics = append(metrics, Metric{
Timestamp: now, Service: svc, Host: hostname,
Name: "disk.used_percent", Value: usage.UsedPercent, MetricType: "gauge", Tags: tags,
})
metrics = append(metrics, Metric{
Timestamp: now, Service: svc, Host: hostname,
Name: "disk.used_bytes", Value: float64(usage.Used), MetricType: "gauge", Tags: tags,
})
}
// Network
netIO, err := psnet.IOCounters(false)
if err == nil && len(netIO) > 0 {
metrics = append(metrics, Metric{
Timestamp: now, Service: svc, Host: hostname,
Name: "network.bytes_sent", Value: float64(netIO[0].BytesSent), MetricType: "counter",
})
metrics = append(metrics, Metric{
Timestamp: now, Service: svc, Host: hostname,
Name: "network.bytes_recv", Value: float64(netIO[0].BytesRecv), MetricType: "counter",
})
}
// Uptime
hostInfo, err := host.Info()
if err == nil {
metrics = append(metrics, Metric{
Timestamp: now, Service: svc, Host: hostname,
Name: "host.uptime_seconds", Value: float64(hostInfo.Uptime), MetricType: "gauge",
})
}
// Cores
metrics = append(metrics, Metric{
Timestamp: now, Service: svc, Host: hostname,
Name: "host.cpu_cores", Value: float64(runtime.NumCPU()), MetricType: "gauge",
})
return metrics
}
func collectDockerMetrics(now time.Time, hostname string) []Metric {
var metrics []Metric
svc := "docker"
// Check if docker is available
if _, err := exec.LookPath("docker"); err != nil {
return metrics
}
// Get container stats using docker CLI (simpler, no SDK needed)
out, err := exec.Command("docker", "stats", "--no-stream", "--format",
"{{.ID}}\t{{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.MemPerc}}\t{{.NetIO}}").Output()
if err != nil {
return metrics
}
lines := strings.Split(strings.TrimSpace(string(out)), "\n")
runningCount := 0
for _, line := range lines {
if line == "" {
continue
}
runningCount++
parts := strings.Split(line, "\t")
if len(parts) < 6 {
continue
}
id := parts[0]
name := parts[1]
cpuStr := strings.TrimSuffix(parts[2], "%")
memStr := strings.TrimSuffix(parts[4], "%")
var cpuPercent, memPercent float64
fmt.Sscanf(cpuStr, "%f", &cpuPercent)
fmt.Sscanf(memStr, "%f", &memPercent)
tags := map[string]string{"container": name, "container_id": id}
metrics = append(metrics, Metric{
Timestamp: now, Service: svc, Host: hostname,
Name: "container.cpu_percent", Value: cpuPercent, MetricType: "gauge", Tags: tags,
})
metrics = append(metrics, Metric{
Timestamp: now, Service: svc, Host: hostname,
Name: "container.memory_percent", Value: memPercent, MetricType: "gauge", Tags: tags,
})
}
// Total containers
metrics = append(metrics, Metric{
Timestamp: now, Service: svc, Host: hostname,
Name: "containers.running", Value: float64(runningCount), MetricType: "gauge",
})
// Get all containers count
out, err = exec.Command("docker", "ps", "-a", "-q").Output()
if err == nil {
total := len(strings.Split(strings.TrimSpace(string(out)), "\n"))
if strings.TrimSpace(string(out)) == "" {
total = 0
}
metrics = append(metrics, Metric{
Timestamp: now, Service: svc, Host: hostname,
Name: "containers.total", Value: float64(total), MetricType: "gauge",
})
}
return metrics
}
func sendMetrics(config *Config, metrics []Metric) {
data, err := json.Marshal(map[string]any{"metrics": metrics})
if err != nil {
log.Printf("Error marshaling metrics: %v", err)
return
}
req, err := http.NewRequest("POST", config.ServerURL+"/api/v1/metrics", bytes.NewBuffer(data))
if err != nil {
log.Printf("Error creating request: %v", err)
return
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+apiKey)
if config.APIKey != "" {
req.Header.Set("Authorization", "Bearer "+config.APIKey)
}
client := &http.Client{Timeout: 10 * time.Second}
resp, err := client.Do(req)
@@ -128,8 +336,10 @@ func sendMetrics(serverURL, apiKey string, metrics Metrics) {
}
defer resp.Body.Close()
if resp.StatusCode == 200 {
log.Printf("✓ Metrics sent: CPU=%.1f%% MEM=%.1f%%",
metrics.CPU.UsagePercent, metrics.Memory.UsedPercent)
if resp.StatusCode >= 400 {
log.Printf("Server returned error: %d", resp.StatusCode)
return
}
log.Printf("📤 Sent %d metrics", len(metrics))
}

View File

@@ -1,65 +1,737 @@
package main
import (
"context"
"database/sql"
"encoding/json"
"log"
"os"
"os/signal"
"strconv"
"syscall"
"time"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/cors"
"github.com/gofiber/fiber/v2/middleware/logger"
"github.com/gofiber/fiber/v2/middleware/recover"
"github.com/google/uuid"
_ "github.com/lib/pq"
)
// ═══════════════════════════════════════════════════════════
// 🐍 OPHION Server - Observability Platform API
// ═══════════════════════════════════════════════════════════
type Server struct {
app *fiber.App
db *sql.DB
}
type Metric struct {
Timestamp time.Time `json:"timestamp"`
Service string `json:"service"`
Host string `json:"host"`
Name string `json:"name"`
Value float64 `json:"value"`
MetricType string `json:"metric_type"`
Tags map[string]string `json:"tags,omitempty"`
}
type LogEntry struct {
Timestamp time.Time `json:"timestamp"`
Service string `json:"service"`
Host string `json:"host"`
Level string `json:"level"`
Message string `json:"message"`
TraceID string `json:"trace_id,omitempty"`
SpanID string `json:"span_id,omitempty"`
Source string `json:"source"`
ContainerID string `json:"container_id,omitempty"`
}
type Span struct {
TraceID string `json:"trace_id"`
SpanID string `json:"span_id"`
ParentSpanID string `json:"parent_span_id,omitempty"`
Service string `json:"service"`
Operation string `json:"operation"`
StartTime time.Time `json:"start_time"`
EndTime time.Time `json:"end_time"`
DurationNs int64 `json:"duration_ns"`
StatusCode string `json:"status_code"`
StatusMsg string `json:"status_message,omitempty"`
Kind string `json:"kind"`
Attributes map[string]any `json:"attributes,omitempty"`
}
type Alert struct {
ID string `json:"id"`
Name string `json:"name"`
Severity string `json:"severity"`
Service string `json:"service"`
Host string `json:"host"`
Message string `json:"message"`
Status string `json:"status"`
FiredAt time.Time `json:"fired_at"`
ResolvedAt *time.Time `json:"resolved_at,omitempty"`
}
type Agent struct {
ID string `json:"id"`
Hostname string `json:"hostname"`
IP string `json:"ip"`
Version string `json:"version"`
Status string `json:"status"`
LastSeen time.Time `json:"last_seen"`
CreatedAt time.Time `json:"created_at"`
}
func main() {
app := fiber.New(fiber.Config{
AppName: "OPHION Observability Platform",
})
// Initialize database
pgDSN := getEnv("DATABASE_URL", "postgres://ophion:ophion@localhost:5432/ophion?sslmode=disable")
db, err := sql.Open("postgres", pgDSN)
if err != nil {
log.Fatalf("Failed to connect to database: %v", err)
}
defer db.Close()
// Middleware
app.Use(logger.New())
app.Use(cors.New())
// Health check
app.Get("/health", func(c *fiber.Ctx) error {
return c.JSON(fiber.Map{
"status": "healthy",
"service": "ophion",
"version": "0.1.0",
})
})
// API routes
api := app.Group("/api/v1")
api.Get("/metrics", getMetrics)
api.Post("/metrics", ingestMetrics)
api.Get("/logs", getLogs)
api.Post("/logs", ingestLogs)
api.Get("/alerts", getAlerts)
port := os.Getenv("PORT")
if port == "" {
port = "8080"
// Test connection
if err := db.Ping(); err != nil {
log.Printf("⚠ Database not available: %v", err)
} else {
log.Println("✓ Connected to PostgreSQL")
initSchema(db)
}
log.Printf("🐍 OPHION starting on port %s", port)
log.Fatal(app.Listen(":" + port))
server := &Server{db: db}
// Create Fiber app
app := fiber.New(fiber.Config{
AppName: "OPHION Observability Platform",
BodyLimit: 50 * 1024 * 1024,
ReadTimeout: 30 * time.Second,
WriteTimeout: 30 * time.Second,
})
server.app = app
// Middleware
app.Use(recover.New())
app.Use(logger.New(logger.Config{
Format: "${time} ${status} ${method} ${path} ${latency}\n",
}))
app.Use(cors.New(cors.Config{
AllowOrigins: "*",
AllowHeaders: "Origin, Content-Type, Accept, Authorization",
}))
// Routes
server.setupRoutes()
// Graceful shutdown
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go func() {
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
<-sigCh
log.Println("🛑 Shutting down server...")
cancel()
app.Shutdown()
}()
// Background jobs
go server.runBackgroundJobs(ctx)
port := getEnv("PORT", "8080")
log.Printf("🐍 OPHION server starting on port %s", port)
if err := app.Listen(":" + port); err != nil {
log.Fatal(err)
}
}
func getMetrics(c *fiber.Ctx) error {
return c.JSON(fiber.Map{"metrics": []string{}})
func initSchema(db *sql.DB) {
schema := `
CREATE TABLE IF NOT EXISTS metrics (
id SERIAL PRIMARY KEY,
timestamp TIMESTAMPTZ NOT NULL DEFAULT NOW(),
service VARCHAR(255) NOT NULL,
host VARCHAR(255) NOT NULL,
name VARCHAR(255) NOT NULL,
value DOUBLE PRECISION NOT NULL,
metric_type VARCHAR(50),
tags JSONB
);
CREATE TABLE IF NOT EXISTS logs (
id SERIAL PRIMARY KEY,
timestamp TIMESTAMPTZ NOT NULL DEFAULT NOW(),
service VARCHAR(255) NOT NULL,
host VARCHAR(255) NOT NULL,
level VARCHAR(20),
message TEXT,
trace_id VARCHAR(64),
span_id VARCHAR(32),
source VARCHAR(50),
container_id VARCHAR(64)
);
CREATE TABLE IF NOT EXISTS spans (
id SERIAL PRIMARY KEY,
trace_id VARCHAR(64) NOT NULL,
span_id VARCHAR(32) NOT NULL,
parent_span_id VARCHAR(32),
service VARCHAR(255) NOT NULL,
operation VARCHAR(255) NOT NULL,
start_time TIMESTAMPTZ NOT NULL,
end_time TIMESTAMPTZ NOT NULL,
duration_ns BIGINT,
status_code VARCHAR(20),
status_message TEXT,
kind VARCHAR(20),
attributes JSONB
);
CREATE TABLE IF NOT EXISTS agents (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
hostname VARCHAR(255) NOT NULL UNIQUE,
ip VARCHAR(45),
version VARCHAR(50),
status VARCHAR(20) DEFAULT 'active',
last_seen TIMESTAMPTZ DEFAULT NOW(),
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS alerts (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(255) NOT NULL,
severity VARCHAR(20),
service VARCHAR(255),
host VARCHAR(255),
message TEXT,
status VARCHAR(20) DEFAULT 'firing',
fired_at TIMESTAMPTZ DEFAULT NOW(),
resolved_at TIMESTAMPTZ
);
CREATE INDEX IF NOT EXISTS idx_metrics_timestamp ON metrics(timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_metrics_service_name ON metrics(service, name);
CREATE INDEX IF NOT EXISTS idx_logs_timestamp ON logs(timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_logs_service ON logs(service);
CREATE INDEX IF NOT EXISTS idx_spans_trace_id ON spans(trace_id);
CREATE INDEX IF NOT EXISTS idx_spans_service ON spans(service);
`
if _, err := db.Exec(schema); err != nil {
log.Printf("Error creating schema: %v", err)
} else {
log.Println("✓ Database schema initialized")
}
}
func ingestMetrics(c *fiber.Ctx) error {
return c.JSON(fiber.Map{"status": "received"})
func (s *Server) setupRoutes() {
// Health check
s.app.Get("/health", s.healthCheck)
// API v1
api := s.app.Group("/api/v1")
// Ingest endpoints (for agents)
api.Post("/metrics", s.ingestMetrics)
api.Post("/logs", s.ingestLogs)
api.Post("/traces", s.ingestTraces)
// Query endpoints (for dashboard)
api.Get("/metrics", s.queryMetrics)
api.Get("/metrics/names", s.getMetricNames)
api.Get("/logs", s.queryLogs)
api.Get("/traces", s.queryTraces)
api.Get("/traces/:traceId", s.getTrace)
api.Get("/services", s.getServices)
// Agents
api.Get("/agents", s.getAgents)
api.Post("/agents/register", s.registerAgent)
// Alerts
api.Get("/alerts", s.getAlerts)
api.Post("/alerts", s.createAlert)
api.Put("/alerts/:id/resolve", s.resolveAlert)
// Dashboard
api.Get("/dashboard/overview", s.getDashboardOverview)
}
func getLogs(c *fiber.Ctx) error {
return c.JSON(fiber.Map{"logs": []string{}})
func (s *Server) healthCheck(c *fiber.Ctx) error {
return c.JSON(fiber.Map{
"status": "healthy",
"service": "ophion",
"version": "0.2.0",
"timestamp": time.Now(),
})
}
func ingestLogs(c *fiber.Ctx) error {
return c.JSON(fiber.Map{"status": "received"})
// ─────────────────────────────────────────────────────────────
// Ingest Endpoints
// ─────────────────────────────────────────────────────────────
func (s *Server) ingestMetrics(c *fiber.Ctx) error {
var req struct {
Metrics []Metric `json:"metrics"`
}
if err := c.BodyParser(&req); err != nil {
return c.Status(400).JSON(fiber.Map{"error": err.Error()})
}
for _, m := range req.Metrics {
tags, _ := json.Marshal(m.Tags)
_, err := s.db.Exec(`
INSERT INTO metrics (timestamp, service, host, name, value, metric_type, tags)
VALUES ($1, $2, $3, $4, $5, $6, $7)`,
m.Timestamp, m.Service, m.Host, m.Name, m.Value, m.MetricType, tags)
if err != nil {
log.Printf("Error inserting metric: %v", err)
}
}
// Update agent last_seen
if len(req.Metrics) > 0 {
host := req.Metrics[0].Host
s.db.Exec(`UPDATE agents SET last_seen = NOW(), status = 'active' WHERE hostname = $1`, host)
}
return c.JSON(fiber.Map{"status": "received", "count": len(req.Metrics)})
}
func getAlerts(c *fiber.Ctx) error {
return c.JSON(fiber.Map{"alerts": []string{}})
func (s *Server) ingestLogs(c *fiber.Ctx) error {
var req struct {
Logs []LogEntry `json:"logs"`
}
if err := c.BodyParser(&req); err != nil {
return c.Status(400).JSON(fiber.Map{"error": err.Error()})
}
for _, l := range req.Logs {
_, err := s.db.Exec(`
INSERT INTO logs (timestamp, service, host, level, message, trace_id, span_id, source, container_id)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)`,
l.Timestamp, l.Service, l.Host, l.Level, l.Message, l.TraceID, l.SpanID, l.Source, l.ContainerID)
if err != nil {
log.Printf("Error inserting log: %v", err)
}
}
return c.JSON(fiber.Map{"status": "received", "count": len(req.Logs)})
}
func (s *Server) ingestTraces(c *fiber.Ctx) error {
var req struct {
Spans []Span `json:"spans"`
Host string `json:"host"`
}
if err := c.BodyParser(&req); err != nil {
return c.Status(400).JSON(fiber.Map{"error": err.Error()})
}
for _, sp := range req.Spans {
attrs, _ := json.Marshal(sp.Attributes)
_, err := s.db.Exec(`
INSERT INTO spans (trace_id, span_id, parent_span_id, service, operation, start_time, end_time, duration_ns, status_code, status_message, kind, attributes)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)`,
sp.TraceID, sp.SpanID, sp.ParentSpanID, sp.Service, sp.Operation, sp.StartTime, sp.EndTime, sp.DurationNs, sp.StatusCode, sp.StatusMsg, sp.Kind, attrs)
if err != nil {
log.Printf("Error inserting span: %v", err)
}
}
return c.JSON(fiber.Map{"status": "received", "count": len(req.Spans)})
}
// ─────────────────────────────────────────────────────────────
// Query Endpoints
// ─────────────────────────────────────────────────────────────
func (s *Server) queryMetrics(c *fiber.Ctx) error {
service := c.Query("service", "system")
name := c.Query("name", "cpu.usage_percent")
from := parseTime(c.Query("from"), time.Now().Add(-1*time.Hour))
to := parseTime(c.Query("to"), time.Now())
rows, err := s.db.Query(`
SELECT timestamp, value FROM metrics
WHERE service = $1 AND name = $2 AND timestamp >= $3 AND timestamp <= $4
ORDER BY timestamp ASC
LIMIT 1000`, service, name, from, to)
if err != nil {
return c.Status(500).JSON(fiber.Map{"error": err.Error()})
}
defer rows.Close()
var metrics []map[string]any
for rows.Next() {
var ts time.Time
var val float64
if err := rows.Scan(&ts, &val); err == nil {
metrics = append(metrics, map[string]any{"timestamp": ts, "value": val})
}
}
return c.JSON(fiber.Map{"metrics": metrics, "count": len(metrics)})
}
func (s *Server) getMetricNames(c *fiber.Ctx) error {
rows, err := s.db.Query(`SELECT DISTINCT name FROM metrics ORDER BY name`)
if err != nil {
return c.Status(500).JSON(fiber.Map{"error": err.Error()})
}
defer rows.Close()
var names []string
for rows.Next() {
var name string
if rows.Scan(&name) == nil {
names = append(names, name)
}
}
return c.JSON(fiber.Map{"names": names})
}
func (s *Server) queryLogs(c *fiber.Ctx) error {
service := c.Query("service")
level := c.Query("level")
query := c.Query("q")
from := parseTime(c.Query("from"), time.Now().Add(-1*time.Hour))
to := parseTime(c.Query("to"), time.Now())
limit := parseInt(c.Query("limit"), 100)
sql := `SELECT timestamp, service, host, level, message, source, container_id
FROM logs WHERE timestamp >= $1 AND timestamp <= $2`
args := []any{from, to}
argN := 3
if service != "" {
sql += ` AND service = $` + strconv.Itoa(argN)
args = append(args, service)
argN++
}
if level != "" {
sql += ` AND level = $` + strconv.Itoa(argN)
args = append(args, level)
argN++
}
if query != "" {
sql += ` AND message ILIKE $` + strconv.Itoa(argN)
args = append(args, "%"+query+"%")
argN++
}
sql += ` ORDER BY timestamp DESC LIMIT $` + strconv.Itoa(argN)
args = append(args, limit)
rows, err := s.db.Query(sql, args...)
if err != nil {
return c.Status(500).JSON(fiber.Map{"error": err.Error()})
}
defer rows.Close()
var logs []LogEntry
for rows.Next() {
var l LogEntry
if err := rows.Scan(&l.Timestamp, &l.Service, &l.Host, &l.Level, &l.Message, &l.Source, &l.ContainerID); err == nil {
logs = append(logs, l)
}
}
return c.JSON(fiber.Map{"logs": logs, "count": len(logs)})
}
func (s *Server) queryTraces(c *fiber.Ctx) error {
service := c.Query("service")
from := parseTime(c.Query("from"), time.Now().Add(-1*time.Hour))
to := parseTime(c.Query("to"), time.Now())
limit := parseInt(c.Query("limit"), 20)
sql := `SELECT DISTINCT trace_id, service, operation, MIN(start_time), MAX(duration_ns)
FROM spans WHERE start_time >= $1 AND start_time <= $2`
args := []any{from, to}
argN := 3
if service != "" {
sql += ` AND service = $` + strconv.Itoa(argN)
args = append(args, service)
argN++
}
sql += ` GROUP BY trace_id, service, operation ORDER BY MIN(start_time) DESC LIMIT $` + strconv.Itoa(argN)
args = append(args, limit)
rows, err := s.db.Query(sql, args...)
if err != nil {
return c.Status(500).JSON(fiber.Map{"error": err.Error()})
}
defer rows.Close()
var traces []map[string]any
for rows.Next() {
var traceID, service, operation string
var startTime time.Time
var durationNs int64
if err := rows.Scan(&traceID, &service, &operation, &startTime, &durationNs); err == nil {
traces = append(traces, map[string]any{
"trace_id": traceID,
"service": service,
"operation": operation,
"start_time": startTime,
"duration_ns": durationNs,
})
}
}
return c.JSON(fiber.Map{"traces": traces, "count": len(traces)})
}
func (s *Server) getTrace(c *fiber.Ctx) error {
traceID := c.Params("traceId")
rows, err := s.db.Query(`
SELECT trace_id, span_id, parent_span_id, service, operation, start_time, end_time, duration_ns, status_code, status_message, kind
FROM spans WHERE trace_id = $1 ORDER BY start_time`, traceID)
if err != nil {
return c.Status(500).JSON(fiber.Map{"error": err.Error()})
}
defer rows.Close()
var spans []Span
for rows.Next() {
var sp Span
var parentSpanID, statusMsg sql.NullString
if err := rows.Scan(&sp.TraceID, &sp.SpanID, &parentSpanID, &sp.Service, &sp.Operation, &sp.StartTime, &sp.EndTime, &sp.DurationNs, &sp.StatusCode, &statusMsg, &sp.Kind); err == nil {
sp.ParentSpanID = parentSpanID.String
sp.StatusMsg = statusMsg.String
spans = append(spans, sp)
}
}
if len(spans) == 0 {
return c.Status(404).JSON(fiber.Map{"error": "trace not found"})
}
return c.JSON(fiber.Map{
"trace_id": traceID,
"spans": spans,
"duration_ns": spans[len(spans)-1].EndTime.Sub(spans[0].StartTime).Nanoseconds(),
"start_time": spans[0].StartTime,
})
}
func (s *Server) getServices(c *fiber.Ctx) error {
rows, err := s.db.Query(`SELECT DISTINCT service FROM metrics UNION SELECT DISTINCT service FROM spans ORDER BY service`)
if err != nil {
return c.Status(500).JSON(fiber.Map{"error": err.Error()})
}
defer rows.Close()
var services []string
for rows.Next() {
var svc string
if rows.Scan(&svc) == nil {
services = append(services, svc)
}
}
return c.JSON(fiber.Map{"services": services})
}
// ─────────────────────────────────────────────────────────────
// Agents
// ─────────────────────────────────────────────────────────────
func (s *Server) getAgents(c *fiber.Ctx) error {
rows, err := s.db.Query(`SELECT id, hostname, ip, version, status, last_seen, created_at FROM agents ORDER BY last_seen DESC`)
if err != nil {
return c.Status(500).JSON(fiber.Map{"error": err.Error()})
}
defer rows.Close()
var agents []Agent
for rows.Next() {
var a Agent
if err := rows.Scan(&a.ID, &a.Hostname, &a.IP, &a.Version, &a.Status, &a.LastSeen, &a.CreatedAt); err == nil {
agents = append(agents, a)
}
}
return c.JSON(fiber.Map{"agents": agents})
}
func (s *Server) registerAgent(c *fiber.Ctx) error {
var agent Agent
if err := c.BodyParser(&agent); err != nil {
return c.Status(400).JSON(fiber.Map{"error": err.Error()})
}
agent.ID = uuid.New().String()
agent.Status = "active"
agent.LastSeen = time.Now()
agent.CreatedAt = time.Now()
_, err := s.db.Exec(`
INSERT INTO agents (id, hostname, ip, version, status, last_seen, created_at)
VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT (hostname) DO UPDATE SET ip = $3, version = $4, status = 'active', last_seen = NOW()`,
agent.ID, agent.Hostname, agent.IP, agent.Version, agent.Status, agent.LastSeen, agent.CreatedAt)
if err != nil {
return c.Status(500).JSON(fiber.Map{"error": err.Error()})
}
return c.JSON(fiber.Map{"status": "registered", "agent": agent})
}
// ─────────────────────────────────────────────────────────────
// Alerts
// ─────────────────────────────────────────────────────────────
func (s *Server) getAlerts(c *fiber.Ctx) error {
status := c.Query("status")
limit := parseInt(c.Query("limit"), 50)
sql := `SELECT id, name, severity, service, host, message, status, fired_at, resolved_at FROM alerts`
var args []any
if status != "" {
sql += ` WHERE status = $1`
args = append(args, status)
}
sql += ` ORDER BY fired_at DESC LIMIT ` + strconv.Itoa(limit)
rows, err := s.db.Query(sql, args...)
if err != nil {
return c.Status(500).JSON(fiber.Map{"error": err.Error()})
}
defer rows.Close()
var alerts []Alert
for rows.Next() {
var a Alert
var resolvedAt *time.Time
if err := rows.Scan(&a.ID, &a.Name, &a.Severity, &a.Service, &a.Host, &a.Message, &a.Status, &a.FiredAt, &resolvedAt); err == nil {
a.ResolvedAt = resolvedAt
alerts = append(alerts, a)
}
}
return c.JSON(fiber.Map{"alerts": alerts})
}
func (s *Server) createAlert(c *fiber.Ctx) error {
var alert Alert
if err := c.BodyParser(&alert); err != nil {
return c.Status(400).JSON(fiber.Map{"error": err.Error()})
}
alert.ID = uuid.New().String()
alert.Status = "firing"
alert.FiredAt = time.Now()
_, err := s.db.Exec(`
INSERT INTO alerts (id, name, severity, service, host, message, status, fired_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
alert.ID, alert.Name, alert.Severity, alert.Service, alert.Host, alert.Message, alert.Status, alert.FiredAt)
if err != nil {
return c.Status(500).JSON(fiber.Map{"error": err.Error()})
}
return c.JSON(fiber.Map{"status": "created", "alert": alert})
}
func (s *Server) resolveAlert(c *fiber.Ctx) error {
alertID := c.Params("id")
_, err := s.db.Exec(`UPDATE alerts SET status = 'resolved', resolved_at = NOW() WHERE id = $1`, alertID)
if err != nil {
return c.Status(500).JSON(fiber.Map{"error": err.Error()})
}
return c.JSON(fiber.Map{"status": "resolved"})
}
// ─────────────────────────────────────────────────────────────
// Dashboard
// ─────────────────────────────────────────────────────────────
func (s *Server) getDashboardOverview(c *fiber.Ctx) error {
overview := fiber.Map{"timestamp": time.Now()}
// Agents
var totalAgents, activeAgents int
s.db.QueryRow(`SELECT COUNT(*) FROM agents`).Scan(&totalAgents)
s.db.QueryRow(`SELECT COUNT(*) FROM agents WHERE status = 'active'`).Scan(&activeAgents)
overview["agents"] = fiber.Map{"total": totalAgents, "active": activeAgents}
// Alerts
var firingAlerts int
s.db.QueryRow(`SELECT COUNT(*) FROM alerts WHERE status = 'firing'`).Scan(&firingAlerts)
overview["alerts"] = fiber.Map{"firing": firingAlerts}
// Services
var serviceCount int
s.db.QueryRow(`SELECT COUNT(DISTINCT service) FROM metrics`).Scan(&serviceCount)
overview["services"] = fiber.Map{"count": serviceCount}
return c.JSON(overview)
}
// ─────────────────────────────────────────────────────────────
// Background Jobs
// ─────────────────────────────────────────────────────────────
func (s *Server) runBackgroundJobs(ctx context.Context) {
ticker := time.NewTicker(1 * time.Minute)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
// Mark stale agents
s.db.Exec(`UPDATE agents SET status = 'inactive' WHERE last_seen < NOW() - INTERVAL '5 minutes' AND status = 'active'`)
}
}
}
// ─────────────────────────────────────────────────────────────
// Helpers
// ─────────────────────────────────────────────────────────────
func getEnv(key, def string) string {
if v := os.Getenv(key); v != "" {
return v
}
return def
}
func parseTime(s string, def time.Time) time.Time {
if s == "" {
return def
}
if t, err := time.Parse(time.RFC3339, s); err == nil {
return t
}
if ts, err := strconv.ParseInt(s, 10, 64); err == nil {
return time.Unix(ts, 0)
}
if d, err := time.ParseDuration(s); err == nil {
return time.Now().Add(-d)
}
return def
}
func parseInt(s string, def int) int {
if s == "" {
return def
}
if v, err := strconv.Atoi(s); err == nil {
return v
}
return def
}