package aiapm // ModelPricing holds per-1M-token pricing for a model type ModelPricing struct { InputPer1M float64 `json:"input_per_1m"` OutputPer1M float64 `json:"output_per_1m"` CacheReadPer1M float64 `json:"cache_read_per_1m"` CacheWritePer1M float64 `json:"cache_write_per_1m"` } // PricingTable maps "vendor/model" to pricing. Prices in USD per 1M tokens. var PricingTable = map[string]ModelPricing{ // Anthropic "anthropic/claude-opus-4": {InputPer1M: 15.0, OutputPer1M: 75.0, CacheReadPer1M: 1.5, CacheWritePer1M: 18.75}, "anthropic/claude-sonnet-4": {InputPer1M: 3.0, OutputPer1M: 15.0, CacheReadPer1M: 0.3, CacheWritePer1M: 3.75}, "anthropic/claude-3.5-sonnet": {InputPer1M: 3.0, OutputPer1M: 15.0, CacheReadPer1M: 0.3, CacheWritePer1M: 3.75}, "anthropic/claude-3.5-haiku": {InputPer1M: 0.8, OutputPer1M: 4.0, CacheReadPer1M: 0.08, CacheWritePer1M: 1.0}, "anthropic/claude-3-haiku": {InputPer1M: 0.25, OutputPer1M: 1.25, CacheReadPer1M: 0.03, CacheWritePer1M: 0.3}, // OpenAI "openai/gpt-4o": {InputPer1M: 2.5, OutputPer1M: 10.0, CacheReadPer1M: 1.25, CacheWritePer1M: 2.5}, "openai/gpt-4o-mini": {InputPer1M: 0.15, OutputPer1M: 0.6, CacheReadPer1M: 0.075, CacheWritePer1M: 0.15}, "openai/o1": {InputPer1M: 15.0, OutputPer1M: 60.0, CacheReadPer1M: 7.5, CacheWritePer1M: 15.0}, "openai/o1-mini": {InputPer1M: 3.0, OutputPer1M: 12.0, CacheReadPer1M: 1.5, CacheWritePer1M: 3.0}, "openai/o3": {InputPer1M: 10.0, OutputPer1M: 40.0, CacheReadPer1M: 5.0, CacheWritePer1M: 10.0}, "openai/o3-mini": {InputPer1M: 1.1, OutputPer1M: 4.4, CacheReadPer1M: 0.55, CacheWritePer1M: 1.1}, // Google "google/gemini-2.5-pro": {InputPer1M: 1.25, OutputPer1M: 10.0, CacheReadPer1M: 0.315, CacheWritePer1M: 1.25}, "google/gemini-2.5-flash": {InputPer1M: 0.15, OutputPer1M: 0.6, CacheReadPer1M: 0.0375, CacheWritePer1M: 0.15}, "google/gemini-2.0-flash": {InputPer1M: 0.1, OutputPer1M: 0.4, CacheReadPer1M: 0.025, CacheWritePer1M: 0.1}, // Mistral "mistral/mistral-large": {InputPer1M: 2.0, OutputPer1M: 6.0, CacheReadPer1M: 2.0, CacheWritePer1M: 2.0}, "mistral/mistral-small": {InputPer1M: 0.1, OutputPer1M: 0.3, CacheReadPer1M: 0.1, CacheWritePer1M: 0.1}, "mistral/codestral": {InputPer1M: 0.3, OutputPer1M: 0.9, CacheReadPer1M: 0.3, CacheWritePer1M: 0.3}, // DeepSeek "deepseek/deepseek-chat": {InputPer1M: 0.14, OutputPer1M: 0.28, CacheReadPer1M: 0.014, CacheWritePer1M: 0.14}, "deepseek/deepseek-reasoner": {InputPer1M: 0.55, OutputPer1M: 2.19, CacheReadPer1M: 0.055, CacheWritePer1M: 0.55}, // Groq (hosted models — pricing approximate) "groq/llama-3.3-70b": {InputPer1M: 0.59, OutputPer1M: 0.79, CacheReadPer1M: 0.59, CacheWritePer1M: 0.59}, "groq/llama-3.1-8b": {InputPer1M: 0.05, OutputPer1M: 0.08, CacheReadPer1M: 0.05, CacheWritePer1M: 0.05}, "groq/gemma2-9b": {InputPer1M: 0.2, OutputPer1M: 0.2, CacheReadPer1M: 0.2, CacheWritePer1M: 0.2}, } // EstimateCost calculates the estimated cost in USD for an AI call func EstimateCost(vendor, model string, tokensIn, tokensOut, cacheRead, cacheWrite int) float64 { key := vendor + "/" + model pricing, ok := PricingTable[key] if !ok { // Fallback: try just the model name with vendor prefix variations return 0 } cost := float64(tokensIn) * pricing.InputPer1M / 1_000_000 cost += float64(tokensOut) * pricing.OutputPer1M / 1_000_000 cost += float64(cacheRead) * pricing.CacheReadPer1M / 1_000_000 cost += float64(cacheWrite) * pricing.CacheWritePer1M / 1_000_000 return cost } // GetPricingTable returns the full pricing table (for the API endpoint) func GetPricingTable() map[string]ModelPricing { return PricingTable }