ophion/internal/aiapm/pricing.go

package aiapm

// ModelPricing holds per-1M-token pricing for a model
type ModelPricing struct {
	InputPer1M      float64 `json:"input_per_1m"`
	OutputPer1M     float64 `json:"output_per_1m"`
	CacheReadPer1M  float64 `json:"cache_read_per_1m"`
	CacheWritePer1M float64 `json:"cache_write_per_1m"`
}

// PricingTable maps "vendor/model" to pricing. Prices in USD per 1M tokens.
var PricingTable = map[string]ModelPricing{
	// Anthropic
	"anthropic/claude-opus-4":          {InputPer1M: 15.0, OutputPer1M: 75.0, CacheReadPer1M: 1.5, CacheWritePer1M: 18.75},
	"anthropic/claude-sonnet-4":        {InputPer1M: 3.0, OutputPer1M: 15.0, CacheReadPer1M: 0.3, CacheWritePer1M: 3.75},
	"anthropic/claude-3.5-sonnet":      {InputPer1M: 3.0, OutputPer1M: 15.0, CacheReadPer1M: 0.3, CacheWritePer1M: 3.75},
	"anthropic/claude-3.5-haiku":       {InputPer1M: 0.8, OutputPer1M: 4.0, CacheReadPer1M: 0.08, CacheWritePer1M: 1.0},
	"anthropic/claude-3-haiku":         {InputPer1M: 0.25, OutputPer1M: 1.25, CacheReadPer1M: 0.03, CacheWritePer1M: 0.3},

	// OpenAI
	"openai/gpt-4o":                    {InputPer1M: 2.5, OutputPer1M: 10.0, CacheReadPer1M: 1.25, CacheWritePer1M: 2.5},
	"openai/gpt-4o-mini":               {InputPer1M: 0.15, OutputPer1M: 0.6, CacheReadPer1M: 0.075, CacheWritePer1M: 0.15},
	"openai/o1":                        {InputPer1M: 15.0, OutputPer1M: 60.0, CacheReadPer1M: 7.5, CacheWritePer1M: 15.0},
	"openai/o1-mini":                   {InputPer1M: 3.0, OutputPer1M: 12.0, CacheReadPer1M: 1.5, CacheWritePer1M: 3.0},
	"openai/o3":                        {InputPer1M: 10.0, OutputPer1M: 40.0, CacheReadPer1M: 5.0, CacheWritePer1M: 10.0},
	"openai/o3-mini":                   {InputPer1M: 1.1, OutputPer1M: 4.4, CacheReadPer1M: 0.55, CacheWritePer1M: 1.1},

	// Google
	"google/gemini-2.5-pro":            {InputPer1M: 1.25, OutputPer1M: 10.0, CacheReadPer1M: 0.315, CacheWritePer1M: 1.25},
	"google/gemini-2.5-flash":          {InputPer1M: 0.15, OutputPer1M: 0.6, CacheReadPer1M: 0.0375, CacheWritePer1M: 0.15},
	"google/gemini-2.0-flash":          {InputPer1M: 0.1, OutputPer1M: 0.4, CacheReadPer1M: 0.025, CacheWritePer1M: 0.1},

	// Mistral
	"mistral/mistral-large":            {InputPer1M: 2.0, OutputPer1M: 6.0, CacheReadPer1M: 2.0, CacheWritePer1M: 2.0},
	"mistral/mistral-small":            {InputPer1M: 0.1, OutputPer1M: 0.3, CacheReadPer1M: 0.1, CacheWritePer1M: 0.1},
	"mistral/codestral":                {InputPer1M: 0.3, OutputPer1M: 0.9, CacheReadPer1M: 0.3, CacheWritePer1M: 0.3},

	// DeepSeek
	"deepseek/deepseek-chat":           {InputPer1M: 0.14, OutputPer1M: 0.28, CacheReadPer1M: 0.014, CacheWritePer1M: 0.14},
	"deepseek/deepseek-reasoner":       {InputPer1M: 0.55, OutputPer1M: 2.19, CacheReadPer1M: 0.055, CacheWritePer1M: 0.55},

	// Groq (hosted models — pricing approximate)
	"groq/llama-3.3-70b":               {InputPer1M: 0.59, OutputPer1M: 0.79, CacheReadPer1M: 0.59, CacheWritePer1M: 0.59},
	"groq/llama-3.1-8b":                {InputPer1M: 0.05, OutputPer1M: 0.08, CacheReadPer1M: 0.05, CacheWritePer1M: 0.05},
	"groq/gemma2-9b":                   {InputPer1M: 0.2, OutputPer1M: 0.2, CacheReadPer1M: 0.2, CacheWritePer1M: 0.2},
}

// EstimateCost calculates the estimated cost in USD for an AI call
func EstimateCost(vendor, model string, tokensIn, tokensOut, cacheRead, cacheWrite int) float64 {
	key := vendor + "/" + model
	pricing, ok := PricingTable[key]
	if !ok {
		// Fallback: try just the model name with vendor prefix variations
		return 0
	}

	cost := float64(tokensIn) * pricing.InputPer1M / 1_000_000
	cost += float64(tokensOut) * pricing.OutputPer1M / 1_000_000
	cost += float64(cacheRead) * pricing.CacheReadPer1M / 1_000_000
	cost += float64(cacheWrite) * pricing.CacheWritePer1M / 1_000_000

	return cost
}

// GetPricingTable returns the full pricing table (for the API endpoint)
func GetPricingTable() map[string]ModelPricing {
	return PricingTable
}