Files
ophion/internal/aiapm/pricing.go
Rainbow c9e68c5048 feat: add AI APM module for AI/LLM call telemetry
- internal/aiapm/types.go: AICallRecord, filter, summary, and stats types
- internal/aiapm/pricing.go: vendor pricing tables (Anthropic, OpenAI, Google, Mistral, DeepSeek, Groq)
- internal/aiapm/store.go: PostgreSQL storage with batch insert, filtered queries, aggregations, timeseries
- internal/aiapm/collector.go: async collector with buffered channel and background batch writer
- internal/api/aiapm_handlers.go: Fiber route handlers for ingest, summary, models, vendors, costs, calls, pricing
- cmd/server/main.go: register AI APM routes and create ai_calls table at startup
2026-02-08 05:13:38 -03:00

69 lines
3.8 KiB
Go

package aiapm
// ModelPricing holds per-1M-token pricing for a model
type ModelPricing struct {
InputPer1M float64 `json:"input_per_1m"`
OutputPer1M float64 `json:"output_per_1m"`
CacheReadPer1M float64 `json:"cache_read_per_1m"`
CacheWritePer1M float64 `json:"cache_write_per_1m"`
}
// PricingTable maps "vendor/model" to pricing. Prices in USD per 1M tokens.
var PricingTable = map[string]ModelPricing{
// Anthropic
"anthropic/claude-opus-4": {InputPer1M: 15.0, OutputPer1M: 75.0, CacheReadPer1M: 1.5, CacheWritePer1M: 18.75},
"anthropic/claude-sonnet-4": {InputPer1M: 3.0, OutputPer1M: 15.0, CacheReadPer1M: 0.3, CacheWritePer1M: 3.75},
"anthropic/claude-3.5-sonnet": {InputPer1M: 3.0, OutputPer1M: 15.0, CacheReadPer1M: 0.3, CacheWritePer1M: 3.75},
"anthropic/claude-3.5-haiku": {InputPer1M: 0.8, OutputPer1M: 4.0, CacheReadPer1M: 0.08, CacheWritePer1M: 1.0},
"anthropic/claude-3-haiku": {InputPer1M: 0.25, OutputPer1M: 1.25, CacheReadPer1M: 0.03, CacheWritePer1M: 0.3},
// OpenAI
"openai/gpt-4o": {InputPer1M: 2.5, OutputPer1M: 10.0, CacheReadPer1M: 1.25, CacheWritePer1M: 2.5},
"openai/gpt-4o-mini": {InputPer1M: 0.15, OutputPer1M: 0.6, CacheReadPer1M: 0.075, CacheWritePer1M: 0.15},
"openai/o1": {InputPer1M: 15.0, OutputPer1M: 60.0, CacheReadPer1M: 7.5, CacheWritePer1M: 15.0},
"openai/o1-mini": {InputPer1M: 3.0, OutputPer1M: 12.0, CacheReadPer1M: 1.5, CacheWritePer1M: 3.0},
"openai/o3": {InputPer1M: 10.0, OutputPer1M: 40.0, CacheReadPer1M: 5.0, CacheWritePer1M: 10.0},
"openai/o3-mini": {InputPer1M: 1.1, OutputPer1M: 4.4, CacheReadPer1M: 0.55, CacheWritePer1M: 1.1},
// Google
"google/gemini-2.5-pro": {InputPer1M: 1.25, OutputPer1M: 10.0, CacheReadPer1M: 0.315, CacheWritePer1M: 1.25},
"google/gemini-2.5-flash": {InputPer1M: 0.15, OutputPer1M: 0.6, CacheReadPer1M: 0.0375, CacheWritePer1M: 0.15},
"google/gemini-2.0-flash": {InputPer1M: 0.1, OutputPer1M: 0.4, CacheReadPer1M: 0.025, CacheWritePer1M: 0.1},
// Mistral
"mistral/mistral-large": {InputPer1M: 2.0, OutputPer1M: 6.0, CacheReadPer1M: 2.0, CacheWritePer1M: 2.0},
"mistral/mistral-small": {InputPer1M: 0.1, OutputPer1M: 0.3, CacheReadPer1M: 0.1, CacheWritePer1M: 0.1},
"mistral/codestral": {InputPer1M: 0.3, OutputPer1M: 0.9, CacheReadPer1M: 0.3, CacheWritePer1M: 0.3},
// DeepSeek
"deepseek/deepseek-chat": {InputPer1M: 0.14, OutputPer1M: 0.28, CacheReadPer1M: 0.014, CacheWritePer1M: 0.14},
"deepseek/deepseek-reasoner": {InputPer1M: 0.55, OutputPer1M: 2.19, CacheReadPer1M: 0.055, CacheWritePer1M: 0.55},
// Groq (hosted models — pricing approximate)
"groq/llama-3.3-70b": {InputPer1M: 0.59, OutputPer1M: 0.79, CacheReadPer1M: 0.59, CacheWritePer1M: 0.59},
"groq/llama-3.1-8b": {InputPer1M: 0.05, OutputPer1M: 0.08, CacheReadPer1M: 0.05, CacheWritePer1M: 0.05},
"groq/gemma2-9b": {InputPer1M: 0.2, OutputPer1M: 0.2, CacheReadPer1M: 0.2, CacheWritePer1M: 0.2},
}
// EstimateCost calculates the estimated cost in USD for an AI call
func EstimateCost(vendor, model string, tokensIn, tokensOut, cacheRead, cacheWrite int) float64 {
key := vendor + "/" + model
pricing, ok := PricingTable[key]
if !ok {
// Fallback: try just the model name with vendor prefix variations
return 0
}
cost := float64(tokensIn) * pricing.InputPer1M / 1_000_000
cost += float64(tokensOut) * pricing.OutputPer1M / 1_000_000
cost += float64(cacheRead) * pricing.CacheReadPer1M / 1_000_000
cost += float64(cacheWrite) * pricing.CacheWritePer1M / 1_000_000
return cost
}
// GetPricingTable returns the full pricing table (for the API endpoint)
func GetPricingTable() map[string]ModelPricing {
return PricingTable
}