{
  "lastUpdated": "2026-04-27",
  "currency": "USD",
  "unit": "per_million_tokens",
  "note": "Prices are best-effort manual snapshots from each provider's pricing page. Verify before invoicing. See /pricing-changelog/ for change history.",
  "models": [
    {
      "slug": "claude-opus",
      "displayName": "Claude Opus 4.7",
      "provider": "anthropic",
      "family": "claude",
      "apiId": "claude-opus-4-7",
      "contextWindow": 200000,
      "input": 5.00,
      "output": 25.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "api",
      "confidence": "exact",
      "notes": "Pricing from anthropic.com/pricing. Opus 4.7 uses a new tokenizer that may produce up to 35% more tokens than earlier Claude models for the same text."
    },
    {
      "slug": "claude-sonnet",
      "displayName": "Claude Sonnet 4.6",
      "provider": "anthropic",
      "family": "claude",
      "apiId": "claude-sonnet-4-6",
      "contextWindow": 200000,
      "input": 3.00,
      "output": 15.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "api",
      "confidence": "exact",
      "notes": "Pricing from anthropic.com/pricing"
    },
    {
      "slug": "claude-haiku",
      "displayName": "Claude Haiku 4.5",
      "provider": "anthropic",
      "family": "claude",
      "apiId": "claude-haiku-4-5-20251001",
      "contextWindow": 200000,
      "input": 1.00,
      "output": 5.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "api",
      "confidence": "exact",
      "notes": "Pricing from anthropic.com/pricing"
    },
    {
      "slug": "gpt-5-5",
      "displayName": "GPT-5.5",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-5.5",
      "contextWindow": 400000,
      "input": 5.00,
      "output": 30.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Current OpenAI flagship. Cached input $0.50/M. Above 128K input: $10/$45 long-context tier."
    },
    {
      "slug": "gpt-5-5-pro",
      "displayName": "GPT-5.5 Pro",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-5.5-pro",
      "contextWindow": 400000,
      "input": 30.00,
      "output": 180.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Highest-tier reasoning. Above 128K input: $60/$270 long-context tier."
    },
    {
      "slug": "gpt-5-4",
      "displayName": "GPT-5.4",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-5.4",
      "contextWindow": 400000,
      "input": 2.50,
      "output": 15.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Cached input $0.25/M. Above 128K input: $5/$22.50 long-context tier."
    },
    {
      "slug": "gpt-5-4-mini",
      "displayName": "GPT-5.4 Mini",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-5.4-mini",
      "contextWindow": 400000,
      "input": 0.75,
      "output": 4.50,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Cached input $0.075/M. No long-context tier."
    },
    {
      "slug": "gpt-5-4-nano",
      "displayName": "GPT-5.4 Nano",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-5.4-nano",
      "contextWindow": 400000,
      "input": 0.20,
      "output": 1.25,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Cached input $0.02/M."
    },
    {
      "slug": "gpt-5-4-pro",
      "displayName": "GPT-5.4 Pro",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-5.4-pro",
      "contextWindow": 400000,
      "input": 30.00,
      "output": 180.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Premium tier. Above 128K input: $60/$270 long-context tier."
    },
    {
      "slug": "gpt-5-3",
      "displayName": "GPT-5.3",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-5.3-chat-latest",
      "contextWindow": 400000,
      "input": 1.75,
      "output": 14.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Same price tier as gpt-5.3-codex. Cached input $0.175/M."
    },
    {
      "slug": "gpt-5-2",
      "displayName": "GPT-5.2",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-5.2",
      "contextWindow": 400000,
      "input": 1.75,
      "output": 14.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Cached input $0.175/M."
    },
    {
      "slug": "gpt-5-2-pro",
      "displayName": "GPT-5.2 Pro",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-5.2-pro",
      "contextWindow": 400000,
      "input": 21.00,
      "output": 168.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Premium tier of GPT-5.2."
    },
    {
      "slug": "gpt-5-1",
      "displayName": "GPT-5.1",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-5.1",
      "contextWindow": 400000,
      "input": 1.25,
      "output": 10.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Cached input $0.125/M."
    },
    {
      "slug": "gpt-5",
      "displayName": "GPT-5",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-5",
      "contextWindow": 400000,
      "input": 1.25,
      "output": 10.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Original GPT-5 baseline. Cached input $0.125/M."
    },
    {
      "slug": "gpt-5-mini",
      "displayName": "GPT-5 Mini",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-5-mini",
      "contextWindow": 400000,
      "input": 0.25,
      "output": 2.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Cached input $0.025/M."
    },
    {
      "slug": "gpt-5-nano",
      "displayName": "GPT-5 Nano",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-5-nano",
      "contextWindow": 400000,
      "input": 0.05,
      "output": 0.40,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Cheapest input rate of any exact-tokenizer model. Cached input $0.005/M."
    },
    {
      "slug": "gpt-5-pro",
      "displayName": "GPT-5 Pro",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-5-pro",
      "contextWindow": 400000,
      "input": 15.00,
      "output": 120.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Premium tier of original GPT-5."
    },
    {
      "slug": "gpt-4-1",
      "displayName": "GPT-4.1",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-4.1",
      "contextWindow": 1000000,
      "input": 2.00,
      "output": 8.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "1M-token context window. Cached input $0.50/M. Still callable via API after Feb 2026 ChatGPT retirement."
    },
    {
      "slug": "gpt-4-1-mini",
      "displayName": "GPT-4.1 Mini",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-4.1-mini",
      "contextWindow": 1000000,
      "input": 0.40,
      "output": 1.60,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "1M-token context window. Cached input $0.10/M. Still on API after Feb 2026 ChatGPT retirement."
    },
    {
      "slug": "gpt-4-1-nano",
      "displayName": "GPT-4.1 Nano",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-4.1-nano",
      "contextWindow": 1000000,
      "input": 0.10,
      "output": 0.40,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "1M-token context window. Cached input $0.025/M."
    },
    {
      "slug": "o3",
      "displayName": "o3",
      "provider": "openai",
      "family": "gpt",
      "apiId": "o3",
      "contextWindow": 200000,
      "input": 2.00,
      "output": 8.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Reasoning-tier model. Cached input $0.50/M."
    },
    {
      "slug": "o3-mini",
      "displayName": "o3-mini",
      "provider": "openai",
      "family": "gpt",
      "apiId": "o3-mini",
      "contextWindow": 200000,
      "input": 1.10,
      "output": 4.40,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Smaller reasoning model. Cached input $0.55/M."
    },
    {
      "slug": "o3-pro",
      "displayName": "o3-pro",
      "provider": "openai",
      "family": "gpt",
      "apiId": "o3-pro",
      "contextWindow": 200000,
      "input": 20.00,
      "output": 80.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Premium reasoning tier."
    },
    {
      "slug": "o4-mini",
      "displayName": "o4-mini",
      "provider": "openai",
      "family": "gpt",
      "apiId": "o4-mini",
      "contextWindow": 200000,
      "input": 1.10,
      "output": 4.40,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Reasoning model. Cached input $0.275/M. Still on API after Feb 2026 ChatGPT retirement."
    },
    {
      "slug": "gpt-4o",
      "displayName": "GPT-4o",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-4o-2024-08-06",
      "contextWindow": 128000,
      "input": 2.50,
      "output": 10.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Cached input $1.25/M. Retired from ChatGPT Feb 13 2026 but still callable via API. Pricing from openai.com/api/pricing."
    },
    {
      "slug": "gpt-4o-mini",
      "displayName": "GPT-4o mini",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-4o-mini",
      "contextWindow": 128000,
      "input": 0.15,
      "output": 0.60,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-o200k",
      "confidence": "exact",
      "notes": "Pricing from openai.com/api/pricing"
    },
    {
      "slug": "gpt-4-turbo",
      "displayName": "GPT-4 Turbo",
      "provider": "openai",
      "family": "gpt",
      "apiId": "gpt-4-turbo-2024-04-09",
      "contextWindow": 128000,
      "input": 10.00,
      "output": 30.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-tiktoken-cl100k",
      "confidence": "exact",
      "notes": "Pricing from openai.com/api/pricing"
    },
    {
      "slug": "gemini-3-1-pro",
      "displayName": "Gemini 3.1 Pro",
      "provider": "google",
      "family": "gemini",
      "apiId": "gemini-3.1-pro-preview",
      "contextWindow": 1000000,
      "input": 2.00,
      "output": 12.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "api",
      "confidence": "exact",
      "notes": "Preview tier. Input rate for ≤200k context; >200k charged at $4 input / $18 output. Pricing from ai.google.dev/pricing."
    },
    {
      "slug": "gemini-3-flash",
      "displayName": "Gemini 3 Flash",
      "provider": "google",
      "family": "gemini",
      "apiId": "gemini-3-flash-preview",
      "contextWindow": 1000000,
      "input": 0.50,
      "output": 3.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "api",
      "confidence": "exact",
      "notes": "Preview tier. Pricing from ai.google.dev/pricing."
    },
    {
      "slug": "gemini-3-1-flash-lite",
      "displayName": "Gemini 3.1 Flash-Lite",
      "provider": "google",
      "family": "gemini",
      "apiId": "gemini-3.1-flash-lite-preview",
      "contextWindow": 1000000,
      "input": 0.25,
      "output": 1.50,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "api",
      "confidence": "exact",
      "notes": "Preview tier. Pricing from ai.google.dev/pricing."
    },
    {
      "slug": "gemini-2-5-pro",
      "displayName": "Gemini 2.5 Pro",
      "provider": "google",
      "family": "gemini",
      "apiId": "gemini-2.5-pro",
      "contextWindow": 2000000,
      "input": 1.25,
      "output": 10.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "api",
      "confidence": "exact",
      "notes": "Pricing from ai.google.dev/pricing — input rate for ≤200k context; >200k charged at $2.50 input / $15 output."
    },
    {
      "slug": "gemini-2-5-flash",
      "displayName": "Gemini 2.5 Flash",
      "provider": "google",
      "family": "gemini",
      "apiId": "gemini-2.5-flash",
      "contextWindow": 1000000,
      "input": 0.30,
      "output": 2.50,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "api",
      "confidence": "exact",
      "notes": "Pricing from ai.google.dev/pricing. Cached input $0.03/M."
    },
    {
      "slug": "gemini-2-5-flash-lite",
      "displayName": "Gemini 2.5 Flash-Lite",
      "provider": "google",
      "family": "gemini",
      "apiId": "gemini-2.5-flash-lite",
      "contextWindow": 1000000,
      "input": 0.10,
      "output": 0.40,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "api",
      "confidence": "exact",
      "notes": "Cheapest GA Gemini model. Pricing from ai.google.dev/pricing."
    },
    {
      "slug": "llama-3-3-70b",
      "displayName": "Llama 3.3 70B",
      "provider": "meta",
      "family": "llama",
      "apiId": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
      "contextWindow": 128000,
      "input": 0.88,
      "output": 0.88,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-bpe-llama",
      "confidence": "approx-3pct",
      "notes": "Together.ai's current Llama flagship as of April 2026. Same per-token rate for input and output. Tokenizer ≈±3% vs reference."
    },
    {
      "slug": "llama-3-1-405b",
      "displayName": "Llama 3.1 405B",
      "provider": "meta",
      "family": "llama",
      "apiId": "meta-llama/llama-3.1-405b-instruct",
      "contextWindow": 128000,
      "input": 3.50,
      "output": 3.50,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-bpe-llama",
      "confidence": "approx-3pct",
      "notes": "Indicative pricing — no longer on Together's main pricing page (April 2026); availability varies by provider (Replicate, Groq, Fireworks). Verify current rate before invoicing."
    },
    {
      "slug": "llama-3-1-70b",
      "displayName": "Llama 3.1 70B",
      "provider": "meta",
      "family": "llama",
      "apiId": "meta-llama/llama-3.1-70b-instruct",
      "contextWindow": 128000,
      "input": 0.59,
      "output": 0.79,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-bpe-llama",
      "confidence": "approx-3pct",
      "notes": "Predecessor to Llama 3.3 70B. Indicative pricing — no longer on Together's main pricing page (April 2026); see Llama 3.3 70B for current Together default."
    },
    {
      "slug": "llama-3-1-8b",
      "displayName": "Llama 3.1 8B",
      "provider": "meta",
      "family": "llama",
      "apiId": "meta-llama/llama-3.1-8b-instruct",
      "contextWindow": 128000,
      "input": 0.18,
      "output": 0.18,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-bpe-llama",
      "confidence": "approx-3pct",
      "notes": "Indicative pricing — Together's current Llama small model is Llama 3 8B Instruct Lite at $0.10/$0.10. Verify your provider."
    },
    {
      "slug": "mistral-large",
      "displayName": "Mistral Large",
      "provider": "mistral",
      "family": "mistral",
      "apiId": "mistral-large-latest",
      "contextWindow": 128000,
      "input": 2.00,
      "output": 6.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-bpe-mistral",
      "confidence": "approx-3pct",
      "notes": "Pricing from mistral.ai/pricing; tokenizer ≈±3% vs reference"
    },
    {
      "slug": "deepseek-v3",
      "displayName": "DeepSeek V3",
      "provider": "deepseek",
      "family": "deepseek",
      "apiId": "deepseek-chat",
      "contextWindow": 128000,
      "input": 0.27,
      "output": 1.10,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-bpe-deepseek",
      "confidence": "approx-3pct",
      "notes": "Pricing from api-docs.deepseek.com (DeepSeek's own API). Higher rates apply when accessed via Together ($0.60/$1.70 — see deepseek-v3-1). Tokenizer ≈±3% vs reference."
    },
    {
      "slug": "deepseek-v3-1",
      "displayName": "DeepSeek V3.1",
      "provider": "deepseek",
      "family": "deepseek",
      "apiId": "deepseek-ai/DeepSeek-V3.1",
      "contextWindow": 128000,
      "input": 0.60,
      "output": 1.70,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-bpe-deepseek",
      "confidence": "approx-3pct",
      "notes": "Together.ai listing as of April 2026. DeepSeek's direct API offers V3 at $0.27/$1.10 (cheaper). Tokenizer ≈±3%."
    },
    {
      "slug": "deepseek-r1",
      "displayName": "DeepSeek R1",
      "provider": "deepseek",
      "family": "deepseek",
      "apiId": "deepseek-ai/DeepSeek-R1",
      "contextWindow": 128000,
      "input": 3.00,
      "output": 7.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-bpe-deepseek",
      "confidence": "approx-3pct",
      "notes": "Reasoning-tier DeepSeek (chain-of-thought style). Pricing via Together.ai. Outputs include reasoning tokens — actual billed output is typically several times higher than visible reply length. Tokenizer ≈±3%."
    },
    {
      "slug": "qwen-2-5-72b",
      "displayName": "Qwen 2.5 72B",
      "provider": "alibaba",
      "family": "qwen",
      "apiId": "qwen2.5-72b-instruct",
      "contextWindow": 131072,
      "input": 0.90,
      "output": 0.90,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-bpe-qwen",
      "confidence": "approx-3pct",
      "notes": "Indicative pricing via Together.ai; tokenizer ≈±3% vs reference"
    },
    {
      "slug": "qwen-2-5-coder",
      "displayName": "Qwen 2.5 Coder 32B",
      "provider": "alibaba",
      "family": "qwen",
      "apiId": "qwen2.5-coder-32b-instruct",
      "contextWindow": 131072,
      "input": 0.80,
      "output": 0.80,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-bpe-qwen",
      "confidence": "approx-3pct",
      "notes": "Predecessor to Qwen3 Coder 480B. Indicative pricing — verify current provider rate. Tokenizer ≈±3% vs reference."
    },
    {
      "slug": "qwen-3-coder-480b",
      "displayName": "Qwen3 Coder 480B",
      "provider": "alibaba",
      "family": "qwen",
      "apiId": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
      "contextWindow": 131072,
      "input": 2.00,
      "output": 2.00,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-bpe-qwen",
      "confidence": "approx-3pct",
      "notes": "Current Alibaba flagship coding model. 480B-parameter MoE with 35B active. Pricing via Together.ai. Tokenizer ≈±3%."
    },
    {
      "slug": "glm-5-1",
      "displayName": "GLM-5.1",
      "provider": "zhipu",
      "family": "glm",
      "apiId": "zai-org/GLM-5.1-Air",
      "contextWindow": 128000,
      "input": 1.40,
      "output": 4.40,
      "deprecated": false,
      "deprecatedAt": null,
      "tokenizerStrategy": "browser-bpe-llama",
      "confidence": "approx-10pct",
      "notes": "Zhipu AI's flagship. Pricing via Together.ai. Tokenizer is an ChatGLM-family BPE — we approximate using Llama-family BPE as a proxy (≈±10%); replace once we ship the real ChatGLM tokenizer."
    }
  ]
}
