{
  "id": "llm-agentic",
  "title": "LLM 智能体榜",
  "description": "查看面向智能体场景的模型表现排行。",
  "category": "Benchmark",
  "sourceLabel": "公开 benchmark 数据",
  "sourceUrl": "https://openrouter.ai/rankings?benchmark=agentic",
  "updatedAt": "2026-03-26T07:00:14.164Z",
  "methodology": "解析公开 benchmark 页面内嵌的数据块，提取 agentic 榜单，并补充模型元数据。",
  "items": [
    {
      "rank": 1,
      "model": "GPT-5.4",
      "provider": "OpenAI",
      "score": "69.4",
      "change": 0,
      "summary": "GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window…",
      "link": "https://openrouter.ai/openai/gpt-5.4",
      "metrics": {
        "benchmark": "agentic",
        "score": "69.4",
        "openrouter_slug": "openai/gpt-5.4",
        "context": "1.1M"
      },
      "secondary_en": "OpenAI",
      "secondary_zh": "OpenAI",
      "summary_zh": "GPT-5.4是OpenAI最新的前沿模型，它将Codex和GPT系列统一整合为一个单一系统。该模型具备超过100万令牌的上下文窗口……",
      "summary_en": "GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window…"
    },
    {
      "rank": 2,
      "model": "Claude Opus 4.6",
      "provider": "Anthropic",
      "score": "67.6",
      "change": 0,
      "summary": "Opus 4.6 is Anthropic’s strongest model for coding and long-running professional tasks. It is built for agents that operate across entire…",
      "link": "https://openrouter.ai/anthropic/claude-opus-4.6",
      "metrics": {
        "benchmark": "agentic",
        "score": "67.6",
        "openrouter_slug": "anthropic/claude-opus-4.6",
        "context": "1.0M"
      },
      "secondary_en": "Anthropic",
      "secondary_zh": "Anthropic",
      "summary_zh": "Opus 4.6是Anthropic针对编程和长时间专业任务推出的最强模型。它专为能在整个...范围内运行的智能体而构建。",
      "summary_en": "Opus 4.6 is Anthropic’s strongest model for coding and long-running professional tasks. It is built for agents that operate across entire…"
    },
    {
      "rank": 3,
      "model": "GLM 5 Turbo",
      "provider": "Z.ai",
      "score": "66.1",
      "change": 0,
      "summary": "GLM-5 Turbo is a new model from Z.ai designed for fast inference and strong performance in agent-driven environments such as OpenClaw sce…",
      "link": "https://openrouter.ai/z-ai/glm-5-turbo",
      "metrics": {
        "benchmark": "agentic",
        "score": "66.1",
        "openrouter_slug": "z-ai/glm-5-turbo",
        "context": "203K"
      },
      "secondary_en": "Z.ai",
      "secondary_zh": "Z.ai",
      "summary_zh": "GLM-5 Turbo是Z.ai推出的新模型，专为快速推理和在OpenClaw等智能体驱动环境中实现强劲性能而设计…",
      "summary_en": "GLM-5 Turbo is a new model from Z.ai designed for fast inference and strong performance in agent-driven environments such as OpenClaw sce…"
    },
    {
      "rank": 4,
      "model": "GLM 5",
      "provider": "Z.ai",
      "score": "63.1",
      "change": 0,
      "summary": "GLM-5 is Z.ai’s flagship open-source foundation model engineered for complex systems design and long-horizon agent workflows. Built for e…",
      "link": "https://openrouter.ai/z-ai/glm-5",
      "metrics": {
        "benchmark": "agentic",
        "score": "63.1",
        "openrouter_slug": "z-ai/glm-5",
        "context": "80K"
      },
      "secondary_en": "Z.ai",
      "secondary_zh": "Z.ai",
      "summary_zh": "GLM-5是Z.ai推出的旗舰开源基础模型，专为复杂系统设计与长周期智能体工作流而打造。",
      "summary_en": "GLM-5 is Z.ai’s flagship open-source foundation model engineered for complex systems design and long-horizon agent workflows. Built for e…"
    },
    {
      "rank": 5,
      "model": "Claude Sonnet 4.6",
      "provider": "Anthropic",
      "score": "63",
      "change": 0,
      "summary": "Sonnet 4.6 is Anthropic's most capable Sonnet-class model yet, with frontier performance across coding, agents, and professional work. It…",
      "link": "https://openrouter.ai/anthropic/claude-sonnet-4.6",
      "metrics": {
        "benchmark": "agentic",
        "score": "63",
        "openrouter_slug": "anthropic/claude-sonnet-4.6",
        "context": "1.0M"
      },
      "secondary_en": "Anthropic",
      "secondary_zh": "Anthropic",
      "summary_zh": "Sonnet 4.6是Anthropic迄今为止最强大的Sonnet系列模型，在编程、智能体应用和专业工作领域均展现出前沿性能。它……",
      "summary_en": "Sonnet 4.6 is Anthropic's most capable Sonnet-class model yet, with frontier performance across coding, agents, and professional work. It…"
    },
    {
      "rank": 6,
      "model": "MiMo-V2-Pro",
      "provider": "Xiaomi",
      "score": "62.8",
      "change": 0,
      "summary": "MiMo-V2-Pro is Xiaomi's flagship foundation model, featuring over 1T total parameters and a 1M context length, deeply optimized for agent…",
      "link": "https://openrouter.ai/xiaomi/mimo-v2-pro",
      "metrics": {
        "benchmark": "agentic",
        "score": "62.8",
        "openrouter_slug": "xiaomi/mimo-v2-pro",
        "context": "1.0M"
      },
      "secondary_en": "Xiaomi",
      "secondary_zh": "Xiaomi",
      "summary_zh": "MiMo-V2-Pro是小米的旗舰基础模型，拥有超过1万亿总参数和100万上下文长度，专为智能体场景深度优化……",
      "summary_en": "MiMo-V2-Pro is Xiaomi's flagship foundation model, featuring over 1T total parameters and a 1M context length, deeply optimized for agent…"
    },
    {
      "rank": 7,
      "model": "GPT-5.3-Codex",
      "provider": "OpenAI",
      "score": "62.2",
      "change": 0,
      "summary": "GPT-5.3-Codex is OpenAI’s most advanced agentic coding model, combining the frontier software engineering performance of GPT-5.2-Codex wi…",
      "link": "https://openrouter.ai/openai/gpt-5.3-codex",
      "metrics": {
        "benchmark": "agentic",
        "score": "62.2",
        "openrouter_slug": "openai/gpt-5.3-codex",
        "context": "400K"
      },
      "secondary_en": "OpenAI",
      "secondary_zh": "OpenAI",
      "summary_zh": "GPT-5.3-Codex是OpenAI最先进的代理式编码模型，它融合了GPT-5.2-Codex的前沿软件工程性能……",
      "summary_en": "GPT-5.3-Codex is OpenAI’s most advanced agentic coding model, combining the frontier software engineering performance of GPT-5.2-Codex wi…"
    },
    {
      "rank": 8,
      "model": "MiniMax M2.7",
      "provider": "MiniMax",
      "score": "61.5",
      "change": 0,
      "summary": "MiniMax-M2.7 is a next-generation large language model designed for autonomous, real-world productivity and continuous improvement. Built…",
      "link": "https://openrouter.ai/minimax/minimax-m2.7",
      "metrics": {
        "benchmark": "agentic",
        "score": "61.5",
        "openrouter_slug": "minimax/minimax-m2.7",
        "context": "205K"
      },
      "secondary_en": "MiniMax",
      "secondary_zh": "MiniMax",
      "summary_zh": "MiniMax-M2.7是新一代大型语言模型，专为自主、现实世界生产力及持续改进而设计。构建……",
      "summary_en": "MiniMax-M2.7 is a next-generation large language model designed for autonomous, real-world productivity and continuous improvement. Built…"
    },
    {
      "rank": 9,
      "model": "GPT-5.2",
      "provider": "OpenAI",
      "score": "60.2",
      "change": 0,
      "summary": "GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context perfomance compared to GPT-5.1…",
      "link": "https://openrouter.ai/openai/gpt-5.2",
      "metrics": {
        "benchmark": "agentic",
        "score": "60.2",
        "openrouter_slug": "openai/gpt-5.2",
        "context": "400K"
      },
      "secondary_en": "OpenAI",
      "secondary_zh": "OpenAI",
      "summary_zh": "GPT-5.2是GPT-5系列中的最新前沿级模型，相比GPT-5.1，它在代理能力和长上下文处理性能上表现更加强大。",
      "summary_en": "GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context perfomance compared to GPT-5.1…"
    },
    {
      "rank": 10,
      "model": "Claude Opus 4.5",
      "provider": "Anthropic",
      "score": "59.6",
      "change": 0,
      "summary": "Claude Opus 4.5 is Anthropic’s frontier reasoning model optimized for complex software engineering, agentic workflows, and long-horizon c…",
      "link": "https://openrouter.ai/anthropic/claude-opus-4.5",
      "metrics": {
        "benchmark": "agentic",
        "score": "59.6",
        "openrouter_slug": "anthropic/claude-opus-4.5",
        "context": "200K"
      },
      "secondary_en": "Anthropic",
      "secondary_zh": "Anthropic",
      "summary_zh": "Claude Opus 4.5是Anthropic公司推出的前沿推理模型，专为复杂软件工程、智能体工作流和长周期任务优化设计。",
      "summary_en": "Claude Opus 4.5 is Anthropic’s frontier reasoning model optimized for complex software engineering, agentic workflows, and long-horizon c…"
    }
  ]
}
