- internal/aiapm/types.go: AICallRecord, filter, summary, and stats types - internal/aiapm/pricing.go: vendor pricing tables (Anthropic, OpenAI, Google, Mistral, DeepSeek, Groq) - internal/aiapm/store.go: PostgreSQL storage with batch insert, filtered queries, aggregations, timeseries - internal/aiapm/collector.go: async collector with buffered channel and background batch writer - internal/api/aiapm_handlers.go: Fiber route handlers for ingest, summary, models, vendors, costs, calls, pricing - cmd/server/main.go: register AI APM routes and create ai_calls table at startup
69 lines
3.8 KiB
Go
69 lines
3.8 KiB
Go
package aiapm
|
|
|
|
// ModelPricing holds per-1M-token pricing for a model
|
|
type ModelPricing struct {
|
|
InputPer1M float64 `json:"input_per_1m"`
|
|
OutputPer1M float64 `json:"output_per_1m"`
|
|
CacheReadPer1M float64 `json:"cache_read_per_1m"`
|
|
CacheWritePer1M float64 `json:"cache_write_per_1m"`
|
|
}
|
|
|
|
// PricingTable maps "vendor/model" to pricing. Prices in USD per 1M tokens.
|
|
var PricingTable = map[string]ModelPricing{
|
|
// Anthropic
|
|
"anthropic/claude-opus-4": {InputPer1M: 15.0, OutputPer1M: 75.0, CacheReadPer1M: 1.5, CacheWritePer1M: 18.75},
|
|
"anthropic/claude-sonnet-4": {InputPer1M: 3.0, OutputPer1M: 15.0, CacheReadPer1M: 0.3, CacheWritePer1M: 3.75},
|
|
"anthropic/claude-3.5-sonnet": {InputPer1M: 3.0, OutputPer1M: 15.0, CacheReadPer1M: 0.3, CacheWritePer1M: 3.75},
|
|
"anthropic/claude-3.5-haiku": {InputPer1M: 0.8, OutputPer1M: 4.0, CacheReadPer1M: 0.08, CacheWritePer1M: 1.0},
|
|
"anthropic/claude-3-haiku": {InputPer1M: 0.25, OutputPer1M: 1.25, CacheReadPer1M: 0.03, CacheWritePer1M: 0.3},
|
|
|
|
// OpenAI
|
|
"openai/gpt-4o": {InputPer1M: 2.5, OutputPer1M: 10.0, CacheReadPer1M: 1.25, CacheWritePer1M: 2.5},
|
|
"openai/gpt-4o-mini": {InputPer1M: 0.15, OutputPer1M: 0.6, CacheReadPer1M: 0.075, CacheWritePer1M: 0.15},
|
|
"openai/o1": {InputPer1M: 15.0, OutputPer1M: 60.0, CacheReadPer1M: 7.5, CacheWritePer1M: 15.0},
|
|
"openai/o1-mini": {InputPer1M: 3.0, OutputPer1M: 12.0, CacheReadPer1M: 1.5, CacheWritePer1M: 3.0},
|
|
"openai/o3": {InputPer1M: 10.0, OutputPer1M: 40.0, CacheReadPer1M: 5.0, CacheWritePer1M: 10.0},
|
|
"openai/o3-mini": {InputPer1M: 1.1, OutputPer1M: 4.4, CacheReadPer1M: 0.55, CacheWritePer1M: 1.1},
|
|
|
|
// Google
|
|
"google/gemini-2.5-pro": {InputPer1M: 1.25, OutputPer1M: 10.0, CacheReadPer1M: 0.315, CacheWritePer1M: 1.25},
|
|
"google/gemini-2.5-flash": {InputPer1M: 0.15, OutputPer1M: 0.6, CacheReadPer1M: 0.0375, CacheWritePer1M: 0.15},
|
|
"google/gemini-2.0-flash": {InputPer1M: 0.1, OutputPer1M: 0.4, CacheReadPer1M: 0.025, CacheWritePer1M: 0.1},
|
|
|
|
// Mistral
|
|
"mistral/mistral-large": {InputPer1M: 2.0, OutputPer1M: 6.0, CacheReadPer1M: 2.0, CacheWritePer1M: 2.0},
|
|
"mistral/mistral-small": {InputPer1M: 0.1, OutputPer1M: 0.3, CacheReadPer1M: 0.1, CacheWritePer1M: 0.1},
|
|
"mistral/codestral": {InputPer1M: 0.3, OutputPer1M: 0.9, CacheReadPer1M: 0.3, CacheWritePer1M: 0.3},
|
|
|
|
// DeepSeek
|
|
"deepseek/deepseek-chat": {InputPer1M: 0.14, OutputPer1M: 0.28, CacheReadPer1M: 0.014, CacheWritePer1M: 0.14},
|
|
"deepseek/deepseek-reasoner": {InputPer1M: 0.55, OutputPer1M: 2.19, CacheReadPer1M: 0.055, CacheWritePer1M: 0.55},
|
|
|
|
// Groq (hosted models — pricing approximate)
|
|
"groq/llama-3.3-70b": {InputPer1M: 0.59, OutputPer1M: 0.79, CacheReadPer1M: 0.59, CacheWritePer1M: 0.59},
|
|
"groq/llama-3.1-8b": {InputPer1M: 0.05, OutputPer1M: 0.08, CacheReadPer1M: 0.05, CacheWritePer1M: 0.05},
|
|
"groq/gemma2-9b": {InputPer1M: 0.2, OutputPer1M: 0.2, CacheReadPer1M: 0.2, CacheWritePer1M: 0.2},
|
|
}
|
|
|
|
// EstimateCost calculates the estimated cost in USD for an AI call
|
|
func EstimateCost(vendor, model string, tokensIn, tokensOut, cacheRead, cacheWrite int) float64 {
|
|
key := vendor + "/" + model
|
|
pricing, ok := PricingTable[key]
|
|
if !ok {
|
|
// Fallback: try just the model name with vendor prefix variations
|
|
return 0
|
|
}
|
|
|
|
cost := float64(tokensIn) * pricing.InputPer1M / 1_000_000
|
|
cost += float64(tokensOut) * pricing.OutputPer1M / 1_000_000
|
|
cost += float64(cacheRead) * pricing.CacheReadPer1M / 1_000_000
|
|
cost += float64(cacheWrite) * pricing.CacheWritePer1M / 1_000_000
|
|
|
|
return cost
|
|
}
|
|
|
|
// GetPricingTable returns the full pricing table (for the API endpoint)
|
|
func GetPricingTable() map[string]ModelPricing {
|
|
return PricingTable
|
|
}
|