{
  "id": "llm-coding",
  "title": "LLM 编程榜",
  "description": "查看面向编程任务的模型表现排行。",
  "category": "Benchmark",
  "sourceLabel": "公开 benchmark 数据",
  "sourceUrl": "https://openrouter.ai/rankings?benchmark=coding",
  "updatedAt": "2026-03-26T07:00:14.164Z",
  "methodology": "解析公开 benchmark 页面内嵌的数据块，提取 coding 榜单，并补充模型元数据。",
  "items": [
    {
      "rank": 1,
      "model": "GPT-5.4",
      "provider": "OpenAI",
      "score": "57.3",
      "change": 0,
      "summary": "GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window…",
      "link": "https://openrouter.ai/openai/gpt-5.4",
      "metrics": {
        "benchmark": "coding",
        "score": "57.3",
        "openrouter_slug": "openai/gpt-5.4",
        "context": "1.1M"
      },
      "secondary_en": "OpenAI",
      "secondary_zh": "OpenAI",
      "summary_zh": "GPT-5.4是OpenAI最新的前沿模型，它将Codex和GPT系列统一整合为一个单一系统。该模型具备超过100万令牌的上下文窗口……",
      "summary_en": "GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window…"
    },
    {
      "rank": 2,
      "model": "Gemini 3.1 Pro Preview",
      "provider": "Google",
      "score": "55.5",
      "change": 0,
      "summary": "Gemini 3.1 Pro Preview is Google’s frontier reasoning model, delivering enhanced software engineering performance, improved agentic relia…",
      "link": "https://openrouter.ai/google/gemini-3.1-pro-preview",
      "metrics": {
        "benchmark": "coding",
        "score": "55.5",
        "openrouter_slug": "google/gemini-3.1-pro-preview",
        "context": "1.0M"
      },
      "secondary_en": "Google",
      "secondary_zh": "Google",
      "summary_zh": "Gemini 3.1 Pro Preview是谷歌的前沿推理模型，在软件工程性能上实现显著提升，增强了代理可靠性……",
      "summary_en": "Gemini 3.1 Pro Preview is Google’s frontier reasoning model, delivering enhanced software engineering performance, improved agentic relia…"
    },
    {
      "rank": 3,
      "model": "GPT-5.3-Codex",
      "provider": "OpenAI",
      "score": "53.1",
      "change": 0,
      "summary": "GPT-5.3-Codex is OpenAI’s most advanced agentic coding model, combining the frontier software engineering performance of GPT-5.2-Codex wi…",
      "link": "https://openrouter.ai/openai/gpt-5.3-codex",
      "metrics": {
        "benchmark": "coding",
        "score": "53.1",
        "openrouter_slug": "openai/gpt-5.3-codex",
        "context": "400K"
      },
      "secondary_en": "OpenAI",
      "secondary_zh": "OpenAI",
      "summary_zh": "GPT-5.3-Codex是OpenAI最先进的代理式编码模型，它融合了GPT-5.2-Codex的前沿软件工程性能……",
      "summary_en": "GPT-5.3-Codex is OpenAI’s most advanced agentic coding model, combining the frontier software engineering performance of GPT-5.2-Codex wi…"
    },
    {
      "rank": 4,
      "model": "GPT-5.4 Mini",
      "provider": "OpenAI",
      "score": "51.5",
      "change": 0,
      "summary": "GPT-5.4 mini brings the core capabilities of GPT-5.4 to a faster, more efficient model optimized for high-throughput workloads. It suppor…",
      "link": "https://openrouter.ai/openai/gpt-5.4-mini",
      "metrics": {
        "benchmark": "coding",
        "score": "51.5",
        "openrouter_slug": "openai/gpt-5.4-mini",
        "context": "400K"
      },
      "secondary_en": "OpenAI",
      "secondary_zh": "OpenAI",
      "summary_zh": "GPT-5.4 mini 将 GPT-5.4 的核心能力融入一个更快速、更高效的模型，专为高吞吐量工作负载优化。它支持…",
      "summary_en": "GPT-5.4 mini brings the core capabilities of GPT-5.4 to a faster, more efficient model optimized for high-throughput workloads. It suppor…"
    },
    {
      "rank": 5,
      "model": "Claude Sonnet 4.6",
      "provider": "Anthropic",
      "score": "50.9",
      "change": 0,
      "summary": "Sonnet 4.6 is Anthropic's most capable Sonnet-class model yet, with frontier performance across coding, agents, and professional work. It…",
      "link": "https://openrouter.ai/anthropic/claude-sonnet-4.6",
      "metrics": {
        "benchmark": "coding",
        "score": "50.9",
        "openrouter_slug": "anthropic/claude-sonnet-4.6",
        "context": "1.0M"
      },
      "secondary_en": "Anthropic",
      "secondary_zh": "Anthropic",
      "summary_zh": "Sonnet 4.6是Anthropic迄今为止最强大的Sonnet系列模型，在编程、智能体应用和专业工作领域均展现出前沿性能。它……",
      "summary_en": "Sonnet 4.6 is Anthropic's most capable Sonnet-class model yet, with frontier performance across coding, agents, and professional work. It…"
    },
    {
      "rank": 6,
      "model": "GPT-5.2",
      "provider": "OpenAI",
      "score": "48.7",
      "change": 0,
      "summary": "GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context perfomance compared to GPT-5.1…",
      "link": "https://openrouter.ai/openai/gpt-5.2",
      "metrics": {
        "benchmark": "coding",
        "score": "48.7",
        "openrouter_slug": "openai/gpt-5.2",
        "context": "400K"
      },
      "secondary_en": "OpenAI",
      "secondary_zh": "OpenAI",
      "summary_zh": "GPT-5.2是GPT-5系列中的最新前沿级模型，相比GPT-5.1，它在代理能力和长上下文处理性能上表现更加强大。",
      "summary_en": "GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context perfomance compared to GPT-5.1…"
    },
    {
      "rank": 7,
      "model": "Claude Opus 4.6",
      "provider": "Anthropic",
      "score": "48.1",
      "change": 0,
      "summary": "Opus 4.6 is Anthropic’s strongest model for coding and long-running professional tasks. It is built for agents that operate across entire…",
      "link": "https://openrouter.ai/anthropic/claude-opus-4.6",
      "metrics": {
        "benchmark": "coding",
        "score": "48.1",
        "openrouter_slug": "anthropic/claude-opus-4.6",
        "context": "1.0M"
      },
      "secondary_en": "Anthropic",
      "secondary_zh": "Anthropic",
      "summary_zh": "Opus 4.6是Anthropic针对编程和长时间专业任务推出的最强模型。它专为能在整个...范围内运行的智能体而构建。",
      "summary_en": "Opus 4.6 is Anthropic’s strongest model for coding and long-running professional tasks. It is built for agents that operate across entire…"
    },
    {
      "rank": 8,
      "model": "Claude Opus 4.5",
      "provider": "Anthropic",
      "score": "47.8",
      "change": 0,
      "summary": "Claude Opus 4.5 is Anthropic’s frontier reasoning model optimized for complex software engineering, agentic workflows, and long-horizon c…",
      "link": "https://openrouter.ai/anthropic/claude-opus-4.5",
      "metrics": {
        "benchmark": "coding",
        "score": "47.8",
        "openrouter_slug": "anthropic/claude-opus-4.5",
        "context": "200K"
      },
      "secondary_en": "Anthropic",
      "secondary_zh": "Anthropic",
      "summary_zh": "Claude Opus 4.5是Anthropic公司推出的前沿推理模型，专为复杂软件工程、智能体工作流和长周期任务优化设计。",
      "summary_en": "Claude Opus 4.5 is Anthropic’s frontier reasoning model optimized for complex software engineering, agentic workflows, and long-horizon c…"
    },
    {
      "rank": 9,
      "model": "Gemini 2.5 Pro",
      "provider": "Google",
      "score": "46.7",
      "change": 0,
      "summary": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It emplo…",
      "link": "https://openrouter.ai/google/gemini-2.5-pro-exp-03-25",
      "metrics": {
        "benchmark": "coding",
        "score": "46.7",
        "openrouter_slug": "google/gemini-2.5-pro-exp-03-25",
        "context": "1.0M"
      },
      "secondary_en": "Google",
      "secondary_zh": "Google",
      "summary_zh": "Gemini 2.5 Pro是谷歌推出的尖端人工智能模型，专为高级推理、编程、数学及科学任务而设计。它采用…",
      "summary_en": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It emplo…"
    },
    {
      "rank": 10,
      "model": "Gemini 3 Pro Preview (high)",
      "provider": "Google",
      "score": "46.5",
      "change": 0,
      "summary": "Coding benchmark score.",
      "link": "https://openrouter.ai/google/gemini-3-pro-preview",
      "metrics": {
        "benchmark": "coding",
        "score": "46.5",
        "openrouter_slug": "google/gemini-3-pro-preview",
        "context": "-"
      },
      "secondary_en": "Google",
      "secondary_zh": "Google",
      "summary_zh": "编码基准分数。",
      "summary_en": "Coding benchmark score."
    }
  ]
}
