{"data":[{"id":"sakana/fugu-ultra","canonical_slug":"sakana/fugu-ultra-20260615","hugging_face_id":null,"name":"Sakana: Fugu Ultra","created":1782276303,"description":"Fugu Ultra is the higher-performance model in Sakana AI's Fugu family. Rather than a single monolithic model, Fugu is a learned multi-agent orchestration system: a language model trained to route...","context_length":1000000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000050000","completion":"0.0000300000","web_search":"0.0100000000","input_cache_read":"0.0000005000"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","reasoning","structured_outputs","tool_choice","tools","web_search_options"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/sakana/fugu-ultra-20260615/endpoints"},"reasoning":{"mandatory":true,"default_enabled":true,"supported_efforts":["max","xhigh","high"],"default_effort":"xhigh"},"aliases":["sakana/fugu-ultra-20260615"]},{"id":"google/gemini-3-pro-image","canonical_slug":"google/gemini-3-pro-image-20260528","hugging_face_id":null,"name":"Google: Nano Banana Pro (Gemini 3 Pro Image)","created":1781754054,"description":"Nano Banana Pro is Google’s most advanced image-generation and editing model, built on Gemini 3 Pro. It extends the original Nano Banana with significantly improved multimodal reasoning, real-world grounding, and...","context_length":65536,"architecture":{"modality":"text+image->text+image","input_modalities":["image","text"],"output_modalities":["image","text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":"0.0000020000","completion":"0.0000120000","image":"0.0000020000","audio":"0.0000020000","web_search":"0.0140000000","internal_reasoning":"0.000012","input_cache_read":"0.0000002000","input_cache_write":"0.0000003750"},"top_provider":{"context_length":65536,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/google/gemini-3-pro-image-20260528/endpoints"},"reasoning":{"mandatory":true},"aliases":["google/gemini-3-pro-image-20260528"]},{"id":"z-ai/glm-5.2","canonical_slug":"z-ai/glm-5.2-20260616","hugging_face_id":"zai-org/GLM-5.2","name":"Z.ai: GLM 5.2","created":1781631930,"description":"GLM 5.2 is a large-scale reasoning model from Z.ai. It supports text input and output with a 1M-token context window, and is suited for long-horizon agent workflows, project-level software engineering,...","context_length":1048576,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000009500","completion":"0.0000030000","input_cache_read":"0.0000001800"},"top_provider":{"context_length":1048576,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","parallel_tool_calls","presence_penalty","reasoning","reasoning_effort","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/z-ai/glm-5.2-20260616/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"androidnative","elo":1229,"win_rate":54,"rank":10},{"arena":"agents","category":"fullstack","elo":1297,"win_rate":64,"rank":3},{"arena":"agents","category":"htmlslides","elo":1202,"win_rate":51.9,"rank":8},{"arena":"agents","category":"mobileapps","elo":1242,"win_rate":53.9,"rank":5},{"arena":"agents","category":"python-pptxslides","elo":1222,"win_rate":49.3,"rank":5},{"arena":"agents","category":"webapps","elo":1283,"win_rate":57.6,"rank":3},{"arena":"models","category":"3d","elo":1376,"win_rate":64,"rank":1},{"arena":"models","category":"codecategories","elo":1363,"win_rate":62.3,"rank":1},{"arena":"models","category":"dataviz","elo":1337,"win_rate":61.5,"rank":3},{"arena":"models","category":"gamedev","elo":1365,"win_rate":62.1,"rank":2},{"arena":"models","category":"uicomponent","elo":1339,"win_rate":59.7,"rank":5},{"arena":"models","category":"website","elo":1359,"win_rate":61.8,"rank":1}],"artificial_analysis":{"intelligence_index":51.1,"coding_index":68.8,"agentic_index":43.1}},"reasoning":{"mandatory":false,"default_enabled":true,"supported_efforts":["xhigh","high"],"default_effort":"high"},"aliases":["z-ai/glm-5.2-20260616"]},{"id":"moonshotai/kimi-k2.7-code","canonical_slug":"moonshotai/kimi-k2.7-code-20260612","hugging_face_id":"moonshotai/Kimi-K2.7-Code","name":"MoonshotAI: Kimi K2.7 Code","created":1781266361,"description":"MoonshotAI: Kimi K2.7 Code is a coding-focused model in Moonshot AI's Kimi K2 family, built to complete end-to-end programming tasks reliably over long contexts. It uses a native multimodal mixture-of-experts...","context_length":262144,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000007400","completion":"0.0000035000","input_cache_read":"0.0000001500"},"top_provider":{"context_length":262144,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","parallel_tool_calls","presence_penalty","reasoning","reasoning_effort","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/moonshotai/kimi-k2.7-code-20260612/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"androidnative","elo":1245,"win_rate":56.3,"rank":6},{"arena":"agents","category":"fullstack","elo":1230,"win_rate":53.4,"rank":8},{"arena":"agents","category":"htmlslides","elo":1207,"win_rate":52.6,"rank":7},{"arena":"agents","category":"mobileapps","elo":1222,"win_rate":49.8,"rank":10},{"arena":"agents","category":"webapps","elo":1247,"win_rate":50.8,"rank":8},{"arena":"models","category":"3d","elo":1318,"win_rate":57.8,"rank":13},{"arena":"models","category":"codecategories","elo":1308,"win_rate":55.7,"rank":11},{"arena":"models","category":"dataviz","elo":1276,"win_rate":54.2,"rank":18},{"arena":"models","category":"gamedev","elo":1283,"win_rate":51.7,"rank":21},{"arena":"models","category":"uicomponent","elo":1300,"win_rate":55,"rank":13},{"arena":"models","category":"website","elo":1319,"win_rate":57.2,"rank":8}],"artificial_analysis":{"intelligence_index":41.9,"coding_index":60.8,"agentic_index":29.6}},"reasoning":{"mandatory":true,"default_enabled":true},"aliases":["moonshotai/kimi-k2.7-code-20260612"]},{"id":"~anthropic/claude-fable-latest","canonical_slug":"~anthropic/claude-fable-latest","hugging_face_id":null,"name":"Anthropic: Claude Fable Latest","created":1781029944,"description":"This model always redirects to the latest model in the Claude Fable family.","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Router","instruct_type":null},"pricing":{"prompt":"0.0000100000","completion":"0.0000500000","web_search":"0.0100000000","input_cache_read":"0.0000010000","input_cache_write":"0.0000125000"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","stop","structured_outputs","tool_choice","tools","verbosity"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/~anthropic/claude-fable-latest/endpoints"},"reasoning":{"mandatory":true,"supported_efforts":["max","xhigh","high","medium","low"],"default_effort":"medium"},"aliases":["~anthropic/claude-fable-latest"]},{"id":"anthropic/claude-fable-5","canonical_slug":"anthropic/claude-5-fable-20260609","hugging_face_id":null,"name":"Anthropic: Claude Fable 5","created":1781007515,"description":"Claude Fable 5 is a Mythos-class model from Anthropic, built for autonomous knowledge work and coding. It supports text, image, and file inputs with text output, with reasoning support and...","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.0000100000","completion":"0.0000500000","web_search":"0.0100000000","input_cache_read":"0.0000010000","input_cache_write":"0.0000125000"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","stop","structured_outputs","tool_choice","tools","verbosity"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/anthropic/claude-5-fable-20260609/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"agentichtmlslides","elo":1254,"win_rate":59.4,"rank":1},{"arena":"agents","category":"agenticslides(html)","elo":1252,"win_rate":59.5,"rank":1},{"arena":"agents","category":"htmlslides","elo":1260,"win_rate":60.6,"rank":1},{"arena":"models","category":"3d","elo":1370,"win_rate":67.2,"rank":2},{"arena":"models","category":"asciiart","elo":1368,"win_rate":70.1,"rank":1},{"arena":"models","category":"codecategories","elo":1350,"win_rate":63.3,"rank":2},{"arena":"models","category":"dataviz","elo":1380,"win_rate":70.2,"rank":1},{"arena":"models","category":"gamedev","elo":1380,"win_rate":65.1,"rank":1},{"arena":"models","category":"svg","elo":1371,"win_rate":71.8,"rank":1},{"arena":"models","category":"uicomponent","elo":1415,"win_rate":71.3,"rank":1},{"arena":"models","category":"website","elo":1343,"win_rate":62.5,"rank":2}],"artificial_analysis":{"intelligence_index":59.9,"coding_index":76.5,"agentic_index":52.8}},"reasoning":{"mandatory":true,"supported_efforts":["max","xhigh","high","medium","low"],"default_effort":"medium"},"aliases":["anthropic/claude-5-fable-20260609"]},{"id":"nvidia/nemotron-3-ultra-550b-a55b","canonical_slug":"nvidia/nemotron-3-ultra-550b-a55b-20260604","hugging_face_id":"nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16","name":"NVIDIA: Nemotron 3 Ultra","created":1780551208,"description":"NVIDIA Nemotron 3 Ultra is an open frontier-reasoning and orchestration model from NVIDIA, with 55B active parameters out of 550B total (MoE). Built on a hybrid Transformer-Mamba mixture-of-experts architecture, it...","context_length":1000000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000005000","completion":"0.0000022000","input_cache_read":"0.0000001000"},"top_provider":{"context_length":262144,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/nvidia/nemotron-3-ultra-550b-a55b-20260604/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1210,"win_rate":43.1,"rank":41},{"arena":"models","category":"asciiart","elo":1094,"win_rate":35.6,"rank":44},{"arena":"models","category":"codecategories","elo":1172,"win_rate":36.6,"rank":60},{"arena":"models","category":"dataviz","elo":1169,"win_rate":38.8,"rank":60},{"arena":"models","category":"gamedev","elo":1194,"win_rate":39,"rank":54},{"arena":"models","category":"svg","elo":1137,"win_rate":38,"rank":44},{"arena":"models","category":"uicomponent","elo":1177,"win_rate":38.2,"rank":55},{"arena":"models","category":"website","elo":1137,"win_rate":32,"rank":76}],"artificial_analysis":{"intelligence_index":37.8,"coding_index":49.3,"agentic_index":27.4}},"reasoning":{"mandatory":false,"default_enabled":true,"supports_max_tokens":true,"supported_efforts":["high","medium"],"default_effort":"high"},"aliases":["nvidia/nemotron-3-ultra-550b-a55b-20260604"]},{"id":"qwen/qwen3.7-plus","canonical_slug":"qwen/qwen3.7-plus-20260602","hugging_face_id":null,"name":"Qwen: Qwen3.7 Plus","created":1780491783,"description":"Qwen3.7-Plus is a cost-effective model in Alibaba's Qwen3.7 series. It supports text and image input with text output, building on the series' text capabilities with a comprehensive upgrade to its...","context_length":1000000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":"0.0000003200","completion":"0.0000012800","input_cache_read":"0.0000000640","input_cache_write":"0.0000004000"},"top_provider":{"context_length":1000000,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","logprobs","max_tokens","presence_penalty","reasoning","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3.7-plus-20260602/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":39,"coding_index":55.9,"agentic_index":20.8}},"reasoning":{"mandatory":false,"default_enabled":true},"aliases":["qwen/qwen3.7-plus-20260602"]},{"id":"minimax/minimax-m3","canonical_slug":"minimax/minimax-m3-20260531","hugging_face_id":"MiniMaxAI/Minimax-M3","name":"MiniMax: MiniMax M3","created":1780245374,"description":"MiniMax-M3 is a multimodal foundation model from MiniMax. It supports text, image, and video inputs with text output, a 1M-token context window, and is suited for long-horizon agentic work, coding,...","context_length":1048576,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000003000","completion":"0.0000012000","input_cache_read":"0.0000000600"},"top_provider":{"context_length":524288,"max_completion_tokens":512000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/minimax/minimax-m3-20260531/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"androidnative","elo":996,"win_rate":21.3,"rank":27},{"arena":"models","category":"3d","elo":1314,"win_rate":57.5,"rank":15},{"arena":"models","category":"asciiart","elo":1233,"win_rate":52,"rank":13},{"arena":"models","category":"codecategories","elo":1307,"win_rate":55.7,"rank":12},{"arena":"models","category":"dataviz","elo":1289,"win_rate":56.2,"rank":12},{"arena":"models","category":"gamedev","elo":1291,"win_rate":51.7,"rank":19},{"arena":"models","category":"svg","elo":1249,"win_rate":55.2,"rank":13},{"arena":"models","category":"uicomponent","elo":1288,"win_rate":53.1,"rank":20},{"arena":"models","category":"website","elo":1305,"win_rate":55.6,"rank":12}],"artificial_analysis":{"intelligence_index":44.4,"coding_index":58.6,"agentic_index":35.4}},"reasoning":{"mandatory":false},"aliases":["minimax/minimax-m3-20260531"]},{"id":"stepfun/step-3.7-flash","canonical_slug":"stepfun/step-3.7-flash-20260528","hugging_face_id":"stepfun-ai/Step-3.7-Flash","name":"StepFun: Step 3.7 Flash","created":1779985069,"description":"Step 3.7 Flash is StepFun's latest high-efficiency multimodal Mixture-of-Experts model. It pairs a 196B-parameter language backbone with a vision encoder for native image and video understanding, activating roughly 11B parameters...","context_length":256000,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000002000","completion":"0.0000011500","input_cache_read":"0.0000000400"},"top_provider":{"context_length":256000,"max_completion_tokens":256000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/stepfun/step-3.7-flash-20260528/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1195,"win_rate":42.9,"rank":47},{"arena":"models","category":"asciiart","elo":1222,"win_rate":52.1,"rank":15},{"arena":"models","category":"codecategories","elo":1216,"win_rate":45.7,"rank":47},{"arena":"models","category":"dataviz","elo":1210,"win_rate":46.8,"rank":42},{"arena":"models","category":"gamedev","elo":1210,"win_rate":41.9,"rank":43},{"arena":"models","category":"svg","elo":1123,"win_rate":40.3,"rank":47},{"arena":"models","category":"uicomponent","elo":1212,"win_rate":44.7,"rank":44},{"arena":"models","category":"website","elo":1225,"win_rate":47.2,"rank":43}],"artificial_analysis":{"intelligence_index":29.7,"coding_index":37.3,"agentic_index":21.5}},"reasoning":{"mandatory":true,"supported_efforts":["high","medium","low"],"default_effort":"medium"},"aliases":["stepfun/step-3.7-flash-20260528"]},{"id":"anthropic/claude-opus-4.8-fast","canonical_slug":"anthropic/claude-4.8-opus-fast-20260528","hugging_face_id":null,"name":"Anthropic: Claude Opus 4.8 (Fast)","created":1779913703,"description":"Fast-mode variant of [Opus 4.8](/anthropic/claude-opus-4.8) - identical capabilities with higher output speed at 2x pricing relative to regular Opus 4.8.\n\nLearn more in Anthropic's docs: https://platform.claude.com/docs/en/build-with-claude/fast-mode","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.0000100000","completion":"0.0000500000","web_search":"0.0100000000","input_cache_read":"0.0000010000","input_cache_write":"0.0000125000","input_cache_write_1h":"0.00002"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","tool_choice","tools","verbosity"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/anthropic/claude-4.8-opus-fast-20260528/endpoints"},"reasoning":{"mandatory":false,"supported_efforts":["max","xhigh","high","medium","low"],"default_effort":"medium"},"aliases":["anthropic/claude-4.8-opus-fast-20260528"]},{"id":"anthropic/claude-opus-4.8","canonical_slug":"anthropic/claude-4.8-opus-20260528","hugging_face_id":null,"name":"Anthropic: Claude Opus 4.8","created":1779905091,"description":"Claude Opus 4.8 is Anthropic's most capable generally available model in the Opus family. It supports text, image, and file inputs with text output, with reasoning support and a 1M-token...","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.0000050000","completion":"0.0000250000","web_search":"0.0100000000","input_cache_read":"0.0000005000","input_cache_write":"0.0000062500","input_cache_write_1h":"0.00001"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","tool_choice","tools","verbosity"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/anthropic/claude-4.8-opus-20260528/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"agenticgamedev","elo":1269,"win_rate":63.8,"rank":2},{"arena":"agents","category":"agentichtmlslides","elo":1227,"win_rate":55.6,"rank":4},{"arena":"agents","category":"agenticslides","elo":1294,"win_rate":64.8,"rank":2},{"arena":"agents","category":"agenticslides(html)","elo":1230,"win_rate":56,"rank":4},{"arena":"agents","category":"agenticslides(python-pptx)","elo":1310,"win_rate":68.9,"rank":2},{"arena":"agents","category":"androidnative","elo":1337,"win_rate":68.1,"rank":1},{"arena":"agents","category":"fullstack","elo":1329,"win_rate":66.9,"rank":2},{"arena":"agents","category":"htmlslides","elo":1225,"win_rate":55.7,"rank":4},{"arena":"agents","category":"mobileapps","elo":1285,"win_rate":59.4,"rank":1},{"arena":"agents","category":"pptxslides","elo":1306,"win_rate":67.9,"rank":2},{"arena":"agents","category":"python-pptxslides","elo":1298,"win_rate":65.9,"rank":2},{"arena":"agents","category":"webapps","elo":1292,"win_rate":55.8,"rank":2},{"arena":"models","category":"3d","elo":1287,"win_rate":56.3,"rank":22},{"arena":"models","category":"asciiart","elo":1297,"win_rate":62,"rank":7},{"arena":"models","category":"codecategories","elo":1282,"win_rate":54.9,"rank":21},{"arena":"models","category":"dataviz","elo":1274,"win_rate":54.6,"rank":19},{"arena":"models","category":"gamedev","elo":1302,"win_rate":54.8,"rank":15},{"arena":"models","category":"svg","elo":1235,"win_rate":54.1,"rank":16},{"arena":"models","category":"uicomponent","elo":1282,"win_rate":55,"rank":22},{"arena":"models","category":"website","elo":1282,"win_rate":55.1,"rank":21}],"artificial_analysis":{"intelligence_index":55.7,"coding_index":74.3,"agentic_index":47.2}},"reasoning":{"mandatory":false,"supported_efforts":["max","xhigh","high","medium","low"],"default_effort":"medium"},"aliases":["anthropic/claude-4.8-opus-20260528"]},{"id":"qwen/qwen3.7-max","canonical_slug":"qwen/qwen3.7-max-20260520","hugging_face_id":null,"name":"Qwen: Qwen3.7 Max","created":1779376861,"description":"Qwen3.7-Max is the flagship model in Alibaba's Qwen3.7 series. It supports text input and output and is designed for agent-centric workloads, with particular strengths in coding, office and productivity tasks,...","context_length":1000000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":"0.0000012500","completion":"0.0000037500","input_cache_read":"0.0000002500","input_cache_write":"0.0000015625"},"top_provider":{"context_length":1000000,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","logprobs","max_tokens","presence_penalty","reasoning","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3.7-max-20260520/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"androidnative","elo":1170,"win_rate":44.6,"rank":16},{"arena":"agents","category":"fullstack","elo":1222,"win_rate":48.2,"rank":11},{"arena":"agents","category":"mobileapps","elo":1215,"win_rate":47.5,"rank":13},{"arena":"agents","category":"webapps","elo":1258,"win_rate":50.3,"rank":7},{"arena":"models","category":"3d","elo":1325,"win_rate":59.9,"rank":9},{"arena":"models","category":"asciiart","elo":1260,"win_rate":54.3,"rank":9},{"arena":"models","category":"codecategories","elo":1312,"win_rate":57.6,"rank":10},{"arena":"models","category":"dataviz","elo":1288,"win_rate":56,"rank":13},{"arena":"models","category":"gamedev","elo":1322,"win_rate":58.5,"rank":11},{"arena":"models","category":"svg","elo":1279,"win_rate":60.7,"rank":8},{"arena":"models","category":"uicomponent","elo":1327,"win_rate":59.6,"rank":6},{"arena":"models","category":"website","elo":1305,"win_rate":56.7,"rank":13}],"artificial_analysis":{"intelligence_index":46,"coding_index":66,"agentic_index":30.6}},"reasoning":{"mandatory":false,"default_enabled":true},"aliases":["qwen/qwen3.7-max-20260520"]},{"id":"x-ai/grok-build-0.1","canonical_slug":"x-ai/grok-build-0.1-20260520","hugging_face_id":null,"name":"xAI: Grok Build 0.1","created":1779298123,"description":"Grok Build 0.1 is xAI’s fast coding model trained specifically for agentic software engineering workflows. It supports text and image inputs with text output, and is optimized for interactive coding...","context_length":256000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Grok","instruct_type":null},"pricing":{"prompt":"0.0000010000","completion":"0.0000020000","web_search":"0.0050000000","input_cache_read":"0.0000002000"},"top_provider":{"context_length":256000,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logprobs","max_tokens","presence_penalty","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/x-ai/grok-build-0.1-20260520/endpoints"},"reasoning":{"mandatory":true},"aliases":["x-ai/grok-build-0.1-20260520"]},{"id":"google/gemini-embedding-2","canonical_slug":"google/gemini-embedding-2","hugging_face_id":null,"name":"Google: Gemini Embedding 2","created":1779290135,"description":"Gemini Embedding 2 is Google's first multimodal embedding model. We currently support mapping text and images into a unified vector space for semantic search and retrieval-augmented generation (RAG). It supports...","context_length":8192,"architecture":{"modality":"text+image+file+audio+video->embeddings","input_modalities":["text","image","file","audio","video"],"output_modalities":["embeddings"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":"0.0000002000","completion":"0.0000000000","image":"0.0000004500","audio":"0.0000065000"},"top_provider":{"context_length":8192,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["max_tokens","response_format","seed","temperature","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/google/gemini-embedding-2/endpoints"},"aliases":["google/gemini-embedding-2"]},{"id":"google/gemini-3.5-flash","canonical_slug":"google/gemini-3.5-flash-20260519","hugging_face_id":null,"name":"Google: Gemini 3.5 Flash","created":1779193800,"description":"Gemini 3.5 Flash is Google's high-efficiency multimodal model, bringing near-Pro level coding and reasoning at Flash-tier cost and speed. It is highly optimized for coding proficiency and parallel agentic execution...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","video","file","audio"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":"0.0000015000","completion":"0.0000090000","image":"0.0000015000","audio":"0.0000030000","web_search":"0.0140000000","internal_reasoning":"0.000009","input_cache_read":"0.0000001500","input_cache_write":"0.0000000833"},"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-01-01","expiration_date":null,"links":{"details":"/api/v1/models/google/gemini-3.5-flash-20260519/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"agenticgamedev","elo":1217,"win_rate":55.1,"rank":3},{"arena":"agents","category":"agentichtmlslides","elo":1162,"win_rate":45.8,"rank":7},{"arena":"agents","category":"agenticslides","elo":1244,"win_rate":57.5,"rank":4},{"arena":"agents","category":"agenticslides(html)","elo":1162,"win_rate":45.7,"rank":7},{"arena":"agents","category":"agenticslides(python-pptx)","elo":1242,"win_rate":57.8,"rank":3},{"arena":"agents","category":"androidnative","elo":1261,"win_rate":55,"rank":5},{"arena":"agents","category":"fullstack","elo":1269,"win_rate":58.4,"rank":6},{"arena":"agents","category":"htmlslides","elo":1184,"win_rate":49,"rank":9},{"arena":"agents","category":"mobileapps","elo":1262,"win_rate":57,"rank":2},{"arena":"agents","category":"pptxslides","elo":1244,"win_rate":57.7,"rank":3},{"arena":"agents","category":"python-pptxslides","elo":1247,"win_rate":57.4,"rank":4},{"arena":"agents","category":"webapps","elo":1263,"win_rate":54.4,"rank":5},{"arena":"models","category":"3d","elo":1314,"win_rate":60.7,"rank":14},{"arena":"models","category":"asciiart","elo":1313,"win_rate":62.7,"rank":3},{"arena":"models","category":"codecategories","elo":1306,"win_rate":58.6,"rank":13},{"arena":"models","category":"dataviz","elo":1268,"win_rate":55.4,"rank":24},{"arena":"models","category":"gamedev","elo":1332,"win_rate":59.4,"rank":8},{"arena":"models","category":"svg","elo":1311,"win_rate":63.6,"rank":3},{"arena":"models","category":"uicomponent","elo":1315,"win_rate":60.1,"rank":11},{"arena":"models","category":"website","elo":1299,"win_rate":57.5,"rank":14}],"artificial_analysis":{"intelligence_index":50.2,"coding_index":70.1,"agentic_index":37.4}},"reasoning":{"mandatory":true,"default_enabled":true,"supported_efforts":["high","medium","low","minimal"],"default_effort":"medium"},"aliases":["google/gemini-3.5-flash-20260519"]},{"id":"anthropic/claude-opus-4.7-fast","canonical_slug":"anthropic/claude-4.7-opus-fast-20260512","hugging_face_id":null,"name":"Anthropic: Claude Opus 4.7 (Fast)","created":1778613011,"description":"Fast-mode variant of [Opus 4.7](/anthropic/claude-opus-4.7) - identical capabilities with higher output speed at premium 6x pricing.\n\nLearn more in Anthropic's docs: https://platform.claude.com/docs/en/build-with-claude/fast-mode","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.0000300000","completion":"0.0001500000","web_search":"0.0100000000","input_cache_read":"0.0000030000","input_cache_write":"0.0000375000","input_cache_write_1h":"0.00006"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","tool_choice","tools","verbosity"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/anthropic/claude-4.7-opus-fast-20260512/endpoints"},"reasoning":{"mandatory":false,"supported_efforts":["max","xhigh","high","medium","low"],"default_effort":"medium"},"aliases":["anthropic/claude-4.7-opus-fast-20260512"]},{"id":"inclusionai/ring-2.6-1t","canonical_slug":"inclusionai/ring-2.6-1t-20260508","hugging_face_id":null,"name":"inclusionAI: Ring-2.6-1T","created":1778247440,"description":"Ring-2.6-1T is a 1T-parameter-scale thinking model with 63B active parameters, built for real-world agent workflows that require both strong capability and operational efficiency. It is optimized for coding agents, tool...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000750","completion":"0.0000006250","input_cache_read":"0.0000000150"},"top_provider":{"context_length":262144,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","max_tokens","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/inclusionai/ring-2.6-1t-20260508/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":30.6,"coding_index":42.8,"agentic_index":18.9}},"reasoning":{"mandatory":true,"supported_efforts":["xhigh","high"],"default_effort":"high"},"aliases":["inclusionai/ring-2.6-1t-20260508"]},{"id":"google/gemini-3.1-flash-lite","canonical_slug":"google/gemini-3.1-flash-lite-20260507","hugging_face_id":null,"name":"Google: Gemini 3.1 Flash Lite","created":1778168828,"description":"Gemini 3.1 Flash Lite is Google’s GA high-efficiency multimodal model optimized for low-latency, high-volume workloads. It supports text, image, video, audio, and PDF inputs, and is designed for lightweight agentic...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","video","file","audio"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":"0.0000002500","completion":"0.0000015000","image":"0.0000002500","audio":"0.0000005000","web_search":"0.0140000000","internal_reasoning":"0.0000015","input_cache_read":"0.0000000250","input_cache_write":"0.0000000833"},"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/google/gemini-3.1-flash-lite-20260507/endpoints"},"reasoning":{"mandatory":false,"default_enabled":true,"supported_efforts":["high","medium","low","minimal"],"default_effort":"minimal"},"aliases":["google/gemini-3.1-flash-lite-20260507"]},{"id":"openai/gpt-chat-latest","canonical_slug":"openai/gpt-chat-latest-20260505","hugging_face_id":null,"name":"OpenAI: GPT Chat Latest","created":1778000212,"description":"GPT Chat Latest points to OpenAI's stable API alias `chat-latest` that always resolves to the latest Instant chat model used in ChatGPT. As OpenAI rolls out new Instant model updates...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000050000","completion":"0.0000300000","web_search":"0.0100000000","input_cache_read":"0.0000005000"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","tool_choice","tools","top_logprobs"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-chat-latest-20260505/endpoints"},"aliases":["openai/gpt-chat-latest-20260505"]},{"id":"x-ai/grok-4.3","canonical_slug":"x-ai/grok-4.3-20260430","hugging_face_id":null,"name":"xAI: Grok 4.3","created":1777591821,"description":"Grok 4.3 is a reasoning model from xAI. It accepts text and image inputs with text output, and is suited for agentic workflows, instruction-following tasks, and applications requiring high factual...","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Grok","instruct_type":null},"pricing":{"prompt":"0.0000012500","completion":"0.0000025000","web_search":"0.0050000000","input_cache_read":"0.0000002000"},"top_provider":{"context_length":1000000,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logprobs","max_tokens","presence_penalty","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/x-ai/grok-4.3-20260430/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"agenticgamedev","elo":1033,"win_rate":27.5,"rank":10},{"arena":"agents","category":"agentichtmlslides","elo":1066,"win_rate":31.8,"rank":10},{"arena":"agents","category":"agenticslides","elo":1072,"win_rate":31.9,"rank":10},{"arena":"agents","category":"agenticslides(html)","elo":1066,"win_rate":31.7,"rank":10},{"arena":"agents","category":"agenticslides(python-pptx)","elo":1068,"win_rate":32.4,"rank":10},{"arena":"agents","category":"androidnative","elo":1071,"win_rate":28.1,"rank":23},{"arena":"agents","category":"fullstack","elo":1071,"win_rate":31.4,"rank":26},{"arena":"agents","category":"godotgamedev","elo":1134,"win_rate":38.6,"rank":16},{"arena":"agents","category":"htmlslides","elo":1067,"win_rate":31.8,"rank":12},{"arena":"agents","category":"mobileapps","elo":1142,"win_rate":38.7,"rank":26},{"arena":"agents","category":"pptxslides","elo":1071,"win_rate":32.5,"rank":9},{"arena":"agents","category":"python-pptxslides","elo":1073,"win_rate":31.1,"rank":10},{"arena":"agents","category":"webapps","elo":1195,"win_rate":46.7,"rank":14},{"arena":"models","category":"3d","elo":1205,"win_rate":44.9,"rank":43},{"arena":"models","category":"asciiart","elo":1193,"win_rate":47.7,"rank":25},{"arena":"models","category":"codecategories","elo":1245,"win_rate":49.9,"rank":31},{"arena":"models","category":"dataviz","elo":1237,"win_rate":48.9,"rank":33},{"arena":"models","category":"gamedev","elo":1244,"win_rate":49.9,"rank":33},{"arena":"models","category":"svg","elo":1143,"win_rate":42.3,"rank":43},{"arena":"models","category":"uicomponent","elo":1250,"win_rate":49,"rank":30},{"arena":"models","category":"website","elo":1246,"win_rate":50.5,"rank":31}],"artificial_analysis":{"intelligence_index":37.6,"coding_index":42.2,"agentic_index":24.1}},"reasoning":{"mandatory":false,"default_enabled":true,"supported_efforts":["high","medium","low","none"],"default_effort":"low"},"aliases":["x-ai/grok-4.3-20260430"]},{"id":"ibm-granite/granite-4.1-8b","canonical_slug":"ibm-granite/granite-4.1-8b-20260429","hugging_face_id":"ibm-granite/granite-4.1-8b","name":"IBM: Granite 4.1 8B","created":1777577071,"description":"Granite 4.1 8B is a dense, decoder-only 8-billion-parameter language model from IBM, part of the Granite 4.1 family. It supports a 131K-token context window and is designed for enterprise tasks...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000500","completion":"0.0000001000","input_cache_read":"0.0000000500"},"top_provider":{"context_length":131072,"max_completion_tokens":131072,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logprobs","max_tokens","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/ibm-granite/granite-4.1-8b-20260429/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":null,"coding_index":9.5,"agentic_index":null}},"aliases":["ibm-granite/granite-4.1-8b-20260429"]},{"id":"mistralai/mistral-medium-3-5","canonical_slug":"mistralai/mistral-medium-3.5-20260430","hugging_face_id":null,"name":"Mistral: Mistral Medium 3.5","created":1777570439,"description":"Mistral Medium 3.5 is a dense 128B instruction-following model from Mistral AI. It supports text and image inputs with text output, and is designed for agentic workflows, coding, and complex...","context_length":262144,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":"0.0000015000","completion":"0.0000075000"},"top_provider":{"context_length":262144,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","max_tokens","presence_penalty","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/mistralai/mistral-medium-3.5-20260430/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":29.9,"coding_index":46.9,"agentic_index":19}},"reasoning":{"mandatory":false,"supported_efforts":["high","none"],"default_effort":"high"},"aliases":["mistralai/mistral-medium-3.5-20260430"]},{"id":"openrouter/owl-alpha","canonical_slug":"openrouter/owl-alpha","hugging_face_id":null,"name":"Owl Alpha","created":1777398589,"description":"Owl Alpha is a high-performance foundation model designed for agentic workloads. Natively supports tool use, and long-context tasks, with strong performance in code generation, automated workflows, and complex instruction execution....","context_length":1048756,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000000","completion":"0.0000000000"},"top_provider":{"context_length":1048756,"max_completion_tokens":262144,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","max_tokens","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tools","top_k","top_p"],"default_parameters":null,"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openrouter/owl-alpha/endpoints"},"aliases":["openrouter/owl-alpha"]},{"id":"poolside/laguna-xs.2","canonical_slug":"poolside/laguna-xs.2-20260421","hugging_face_id":"poolside/Laguna-XS.2","name":"Poolside: Laguna XS.2","created":1777389604,"description":"Laguna XS.2 is the second-generation model in the XS size class from [Poolside](https://poolside.ai/), their efficient coding agent series. It combines tool calling and reasoning capabilities with a compact footprint, offering...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000001000","completion":"0.0000002000","input_cache_read":"0.0000000500"},"top_provider":{"context_length":262144,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","temperature","tool_choice","tools"],"default_parameters":{"temperature":0.7,"top_p":0.9,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/poolside/laguna-xs.2-20260421/endpoints"},"reasoning":{"mandatory":false,"default_enabled":true},"aliases":["poolside/laguna-xs.2-20260421"]},{"id":"poolside/laguna-m.1","canonical_slug":"poolside/laguna-m.1-20260312","hugging_face_id":"poolside/Laguna-M.1","name":"Poolside: Laguna M.1","created":1777388504,"description":"Laguna M.1 is the flagship coding agent model from [Poolside](https://poolside.ai/), optimized for complex software engineering tasks. Designed for agentic coding workflows, it supports tool calling and reasoning, with a 256K...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000002000","completion":"0.0000004000","input_cache_read":"0.0000001000"},"top_provider":{"context_length":262144,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","temperature","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/poolside/laguna-m.1-20260312/endpoints"},"reasoning":{"mandatory":false,"default_enabled":true},"aliases":["poolside/laguna-m.1-20260312"]},{"id":"~anthropic/claude-haiku-latest","canonical_slug":"~anthropic/claude-haiku-latest","hugging_face_id":null,"name":"Anthropic Claude Haiku Latest","created":1777318492,"description":"This model always redirects to the latest model in the Anthropic Claude Haiku family.","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Router","instruct_type":null},"pricing":{"prompt":"0.0000010000","completion":"0.0000050000","web_search":"0.0100000000","input_cache_read":"0.0000001000","input_cache_write":"0.0000012500","input_cache_write_1h":"0.000002"},"top_provider":{"context_length":200000,"max_completion_tokens":64000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/~anthropic/claude-haiku-latest/endpoints"},"reasoning":{"mandatory":false},"aliases":["~anthropic/claude-haiku-latest"]},{"id":"~openai/gpt-mini-latest","canonical_slug":"~openai/gpt-mini-latest","hugging_face_id":null,"name":"OpenAI GPT Mini Latest","created":1777318471,"description":"This model always redirects to the latest model in the OpenAI GPT Mini family.","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"Router","instruct_type":null},"pricing":{"prompt":"0.0000007500","completion":"0.0000045000","web_search":"0.0100000000","input_cache_read":"0.0000000750"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-08-31","expiration_date":null,"links":{"details":"/api/v1/models/~openai/gpt-mini-latest/endpoints"},"reasoning":{"mandatory":false,"default_enabled":false,"supported_efforts":["xhigh","high","medium","low","none"],"default_effort":"medium"},"aliases":["~openai/gpt-mini-latest"]},{"id":"~google/gemini-pro-latest","canonical_slug":"~google/gemini-pro-latest","hugging_face_id":null,"name":"Google Gemini Pro Latest","created":1777318451,"description":"This model always redirects to the latest model in the Google Gemini Pro family.","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["audio","file","image","text","video"],"output_modalities":["text"],"tokenizer":"Router","instruct_type":null},"pricing":{"prompt":"0.0000020000","completion":"0.0000120000","image":"0.0000020000","audio":"0.0000020000","web_search":"0.0140000000","internal_reasoning":"0.000012","input_cache_read":"0.0000002000","input_cache_write":"0.0000003750"},"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/~google/gemini-pro-latest/endpoints"},"reasoning":{"mandatory":true,"supported_efforts":["high","medium","low"],"default_effort":"medium"},"aliases":["~google/gemini-pro-latest"]},{"id":"~moonshotai/kimi-latest","canonical_slug":"~moonshotai/kimi-latest","hugging_face_id":null,"name":"MoonshotAI Kimi Latest","created":1777318428,"description":"This model always redirects to the latest model in the MoonshotAI Kimi family.","context_length":262144,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Router","instruct_type":null},"pricing":{"prompt":"0.0000006600","completion":"0.0000034100","input_cache_read":"0.0000001440"},"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","parallel_tool_calls","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/~moonshotai/kimi-latest/endpoints"},"reasoning":{"mandatory":false,"default_enabled":true},"aliases":["~moonshotai/kimi-latest"]},{"id":"~google/gemini-flash-latest","canonical_slug":"~google/gemini-flash-latest","hugging_face_id":null,"name":"Google Gemini Flash Latest","created":1777318398,"description":"This model always redirects to the latest model in the Google Gemini Flash family.","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","video","file","audio"],"output_modalities":["text"],"tokenizer":"Router","instruct_type":null},"pricing":{"prompt":"0.0000015000","completion":"0.0000090000","image":"0.0000015000","audio":"0.0000030000","web_search":"0.0140000000","internal_reasoning":"0.000009","input_cache_read":"0.0000001500","input_cache_write":"0.0000000833"},"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-01-01","expiration_date":null,"links":{"details":"/api/v1/models/~google/gemini-flash-latest/endpoints"},"reasoning":{"mandatory":true,"default_enabled":true,"supported_efforts":["high","medium","low","minimal"],"default_effort":"medium"},"aliases":["~google/gemini-flash-latest"]},{"id":"~anthropic/claude-sonnet-latest","canonical_slug":"~anthropic/claude-sonnet-latest","hugging_face_id":null,"name":"Anthropic Claude Sonnet Latest","created":1777318368,"description":"This model always redirects to the latest model in the Anthropic Claude Sonnet family.","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Router","instruct_type":null},"pricing":{"prompt":"0.0000030000","completion":"0.0000150000","web_search":"0.0100000000","input_cache_read":"0.0000003000","input_cache_write":"0.0000037500","input_cache_write_1h":"0.000006"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p","verbosity"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/~anthropic/claude-sonnet-latest/endpoints"},"reasoning":{"mandatory":false,"supported_efforts":["max","high","medium","low"],"default_effort":"medium"},"aliases":["~anthropic/claude-sonnet-latest"]},{"id":"~openai/gpt-latest","canonical_slug":"~openai/gpt-latest","hugging_face_id":null,"name":"OpenAI GPT Latest","created":1777318334,"description":"This model always redirects to the latest model in the OpenAI GPT family.","context_length":1050000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"Router","instruct_type":null},"pricing":{"prompt":"0.0000050000","completion":"0.0000300000","web_search":"0.0100000000","input_cache_read":"0.0000005000"},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-12-01","expiration_date":null,"links":{"details":"/api/v1/models/~openai/gpt-latest/endpoints"},"reasoning":{"mandatory":false,"default_enabled":true,"supported_efforts":["xhigh","high","medium","low","none"],"default_effort":"medium"},"aliases":["~openai/gpt-latest"]},{"id":"qwen/qwen3.5-plus-20260420","canonical_slug":"qwen/qwen3.5-plus-20260420","hugging_face_id":null,"name":"Qwen: Qwen3.5 Plus 2026-04-20","created":1777261368,"description":"Qwen3.5 Plus (April 2026) is a large-scale multimodal language model from Alibaba. It accepts text, image, and video input and produces text output, with a 1M token context window. This...","context_length":1000000,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000003000","completion":"0.0000018000","input_cache_write":"0.0000003750"},"top_provider":{"context_length":1000000,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","logprobs","max_tokens","presence_penalty","reasoning","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3.5-plus-20260420/endpoints"},"reasoning":{"mandatory":false},"aliases":["qwen/qwen3.5-plus-20260420"]},{"id":"qwen/qwen3.6-flash","canonical_slug":"qwen/qwen3.6-flash","hugging_face_id":null,"name":"Qwen: Qwen3.6 Flash","created":1777261362,"description":"Qwen3.6 Flash is a fast, efficient language model from Alibaba's Qwen 3.6 series. It supports text, image, and video input with a 1M token context window. Tiered pricing kicks in...","context_length":1000000,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000001875","completion":"0.0000011250","input_cache_write":"0.0000002344"},"top_provider":{"context_length":1000000,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","logprobs","max_tokens","presence_penalty","reasoning","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3.6-flash/endpoints"},"reasoning":{"mandatory":false},"aliases":["qwen/qwen3.6-flash"]},{"id":"qwen/qwen3.6-35b-a3b","canonical_slug":"qwen/qwen3.6-35b-a3b-20260415","hugging_face_id":"Qwen/Qwen3.6-35B-A3B","name":"Qwen: Qwen3.6 35B A3B","created":1777260255,"description":"Qwen3.6-35B-A3B is an open-weight multimodal model from Alibaba Cloud with 35 billion total parameters and 3 billion active parameters per token. It uses a hybrid sparse mixture-of-experts architecture combining Gated...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":"0.0000001400","completion":"0.0000010000"},"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":20},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3.6-35b-a3b-20260415/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":31.6,"coding_index":41.9,"agentic_index":21.4}},"reasoning":{"mandatory":false,"default_enabled":true},"aliases":["qwen/qwen3.6-35b-a3b-20260415"]},{"id":"qwen/qwen3.6-max-preview","canonical_slug":"qwen/qwen3.6-max-preview-20260420","hugging_face_id":null,"name":"Qwen: Qwen3.6 Max Preview","created":1777260242,"description":"Qwen3.6-Max-Preview is a proprietary frontier model from Alibaba Cloud built on a sparse mixture-of-experts architecture with approximately 1 trillion total parameters. It is optimized for agentic coding, tool use, and...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":"0.0000010400","completion":"0.0000062400","input_cache_write":"0.0000013000"},"top_provider":{"context_length":262144,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","logprobs","max_tokens","presence_penalty","reasoning","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3.6-max-preview-20260420/endpoints"},"reasoning":{"mandatory":false,"default_enabled":true},"aliases":["qwen/qwen3.6-max-preview-20260420"]},{"id":"qwen/qwen3.6-27b","canonical_slug":"qwen/qwen3.6-27b-20260422","hugging_face_id":"Qwen/Qwen3.6-27B","name":"Qwen: Qwen3.6 27B","created":1777255064,"description":"Qwen3.6 27B is a dense 27-billion-parameter language model from the Qwen Team at Alibaba, released in April 2026. It features hybrid multimodal capabilities — accepting text, image, and video inputs...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000002596","completion":"0.0000023850"},"top_provider":{"context_length":262140,"max_completion_tokens":262140,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3.6-27b-20260422/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":37.1,"coding_index":53.7,"agentic_index":27}},"reasoning":{"mandatory":false,"default_enabled":true},"aliases":["qwen/qwen3.6-27b-20260422"]},{"id":"openai/gpt-5.5-pro","canonical_slug":"openai/gpt-5.5-pro-20260423","hugging_face_id":"","name":"OpenAI: GPT-5.5 Pro","created":1777051896,"description":"GPT-5.5 Pro is OpenAI’s high-capability model optimized for deep reasoning and accuracy on complex, high-stakes workloads. It features a 1M+ token context window (922K input, 128K output) with support for...","context_length":1050000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000300000","completion":"0.0001800000","web_search":"0.0100000000"},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-12-01","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5.5-pro-20260423/endpoints"},"reasoning":{"mandatory":true,"supported_efforts":["xhigh","high","medium"],"default_effort":"medium"},"aliases":["openai/gpt-5.5-pro-20260423"]},{"id":"openai/gpt-5.5","canonical_slug":"openai/gpt-5.5-20260423","hugging_face_id":"","name":"OpenAI: GPT-5.5","created":1777051893,"description":"GPT-5.5 is OpenAI’s frontier model designed for complex professional workloads, building on GPT-5.4 with stronger reasoning, higher reliability, and improved token efficiency on hard tasks. It features a 1M+ token...","context_length":1050000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000050000","completion":"0.0000300000","web_search":"0.0100000000","input_cache_read":"0.0000005000"},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-12-01","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5.5-20260423/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"agenticgamedev","elo":1211,"win_rate":53.2,"rank":5},{"arena":"agents","category":"agentichtmlslides","elo":1084,"win_rate":34.2,"rank":9},{"arena":"agents","category":"agenticslides","elo":1150,"win_rate":43.5,"rank":7},{"arena":"agents","category":"agenticslides(html)","elo":1077,"win_rate":33.2,"rank":9},{"arena":"agents","category":"agenticslides(python-pptx)","elo":1155,"win_rate":45.2,"rank":7},{"arena":"agents","category":"androidnative","elo":1264,"win_rate":55.8,"rank":4},{"arena":"agents","category":"fullstack","elo":1155,"win_rate":45,"rank":15},{"arena":"agents","category":"godotgamedev","elo":1217,"win_rate":53.2,"rank":9},{"arena":"agents","category":"htmlslides","elo":1084,"win_rate":34.2,"rank":11},{"arena":"agents","category":"mobileapps","elo":1225,"win_rate":51.4,"rank":9},{"arena":"agents","category":"pptxslides","elo":1157,"win_rate":45.3,"rank":7},{"arena":"agents","category":"python-pptxslides","elo":1152,"win_rate":43.3,"rank":8},{"arena":"agents","category":"webapps","elo":1181,"win_rate":45.5,"rank":18},{"arena":"models","category":"3d","elo":1262,"win_rate":53.3,"rank":29},{"arena":"models","category":"asciiart","elo":1321,"win_rate":64.8,"rank":2},{"arena":"models","category":"codecategories","elo":1297,"win_rate":57,"rank":15},{"arena":"models","category":"dataviz","elo":1302,"win_rate":58.7,"rank":9},{"arena":"models","category":"gamedev","elo":1349,"win_rate":62.9,"rank":3},{"arena":"models","category":"svg","elo":1285,"win_rate":60.3,"rank":6},{"arena":"models","category":"uicomponent","elo":1296,"win_rate":57.3,"rank":16},{"arena":"models","category":"website","elo":1292,"win_rate":56.2,"rank":16}],"artificial_analysis":{"intelligence_index":54.8,"coding_index":74.9,"agentic_index":44.9}},"reasoning":{"mandatory":false,"default_enabled":true,"supported_efforts":["xhigh","high","medium","low","none"],"default_effort":"medium"},"aliases":["openai/gpt-5.5-20260423"]},{"id":"deepseek/deepseek-v4-pro","canonical_slug":"deepseek/deepseek-v4-pro-20260423","hugging_face_id":"deepseek-ai/DeepSeek-V4-Pro","name":"DeepSeek: DeepSeek V4 Pro","created":1777000679,"description":"DeepSeek V4 Pro is a large-scale Mixture-of-Experts model from DeepSeek with 1.6T total parameters and 49B activated parameters, supporting a 1M-token context window. It is designed for advanced reasoning, coding,...","context_length":1048576,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":null},"pricing":{"prompt":"0.0000016000","completion":"0.0000032000","input_cache_read":"0.0000001400"},"top_provider":{"context_length":1048576,"max_completion_tokens":384000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":1,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/deepseek/deepseek-v4-pro-20260423/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"fullstack","elo":948,"win_rate":22.1,"rank":29},{"arena":"agents","category":"godotgamedev","elo":1097,"win_rate":34,"rank":20},{"arena":"agents","category":"webapps","elo":1017,"win_rate":26.6,"rank":25},{"arena":"models","category":"3d","elo":1334,"win_rate":61.9,"rank":7},{"arena":"models","category":"asciiart","elo":1200,"win_rate":47.5,"rank":21},{"arena":"models","category":"codecategories","elo":1289,"win_rate":55.2,"rank":19},{"arena":"models","category":"dataviz","elo":1220,"win_rate":48.9,"rank":37},{"arena":"models","category":"gamedev","elo":1299,"win_rate":56.6,"rank":17},{"arena":"models","category":"svg","elo":1191,"win_rate":46.8,"rank":32},{"arena":"models","category":"uicomponent","elo":1274,"win_rate":52.2,"rank":24},{"arena":"models","category":"website","elo":1280,"win_rate":54,"rank":22}],"artificial_analysis":{"intelligence_index":44.3,"coding_index":59.4,"agentic_index":36.4}},"reasoning":{"mandatory":false,"supported_efforts":["xhigh","high"],"default_effort":"high"},"aliases":["deepseek/deepseek-v4-pro-20260423"]},{"id":"deepseek/deepseek-v4-flash","canonical_slug":"deepseek/deepseek-v4-flash-20260423","hugging_face_id":"deepseek-ai/DeepSeek-V4-Flash","name":"DeepSeek: DeepSeek V4 Flash","created":1777000666,"description":"DeepSeek V4 Flash is an efficiency-optimized Mixture-of-Experts model from DeepSeek with 284B total parameters and 13B activated parameters, supporting a 1M-token context window. It is designed for fast inference and...","context_length":1048576,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":null},"pricing":{"prompt":"0.0000000900","completion":"0.0000001800","input_cache_read":"0.0000000200"},"top_provider":{"context_length":1000000,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/deepseek/deepseek-v4-flash-20260423/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1272,"win_rate":50.2,"rank":24},{"arena":"models","category":"asciiart","elo":1175,"win_rate":46,"rank":31},{"arena":"models","category":"codecategories","elo":1258,"win_rate":50.5,"rank":29},{"arena":"models","category":"dataviz","elo":1170,"win_rate":42.3,"rank":58},{"arena":"models","category":"gamedev","elo":1264,"win_rate":50.7,"rank":27},{"arena":"models","category":"svg","elo":1214,"win_rate":49.2,"rank":21},{"arena":"models","category":"uicomponent","elo":1225,"win_rate":47.2,"rank":38},{"arena":"models","category":"website","elo":1254,"win_rate":51.2,"rank":30}],"artificial_analysis":{"intelligence_index":40.3,"coding_index":56.2,"agentic_index":31.1}},"reasoning":{"mandatory":false,"supported_efforts":["xhigh","high"],"default_effort":"high"},"aliases":["deepseek/deepseek-v4-flash-20260423"]},{"id":"inclusionai/ling-2.6-1t","canonical_slug":"inclusionai/ling-2.6-1t-20260423","hugging_face_id":null,"name":"inclusionAI: Ling-2.6-1T","created":1776948238,"description":"Ling-2.6-1T is an instant (instruct) model from inclusionAI and the company’s trillion-parameter flagship, designed for real-world agents that require fast execution and high efficiency at scale. It uses a “fast...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000750","completion":"0.0000006250","input_cache_read":"0.0000000150"},"top_provider":{"context_length":262144,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logprobs","max_tokens","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/inclusionai/ling-2.6-1t-20260423/endpoints"},"aliases":["inclusionai/ling-2.6-1t-20260423"]},{"id":"tencent/hy3-preview","canonical_slug":"tencent/hy3-preview-20260421","hugging_face_id":"tencent/Hy3-preview","name":"Tencent: Hy3 preview","created":1776878150,"description":"Hy3 preview is a high-efficiency Mixture-of-Experts model from Tencent designed for agentic workflows and production use. It supports configurable reasoning levels across disabled, low, and high modes, allowing it to...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000630","completion":"0.0000002100","input_cache_read":"0.0000000210"},"top_provider":{"context_length":262144,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","max_tokens","presence_penalty","reasoning","seed","stop","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":0.9,"top_p":1,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/tencent/hy3-preview-20260421/endpoints"},"reasoning":{"mandatory":false,"default_enabled":true,"supported_efforts":["high","low","none"],"default_effort":"high"},"aliases":["tencent/hy3-preview-20260421"]},{"id":"xiaomi/mimo-v2.5-pro","canonical_slug":"xiaomi/mimo-v2.5-pro-20260422","hugging_face_id":"XiaomiMiMo/MiMo-V2.5-Pro","name":"Xiaomi: MiMo-V2.5-Pro","created":1776874273,"description":"MiMo-V2.5-Pro is Xiaomi’s flagship model, delivering strong performance in general agentic capabilities, complex software engineering, and long-horizon tasks, with top rankings on benchmarks such as ClawEval, GDPVal, and SWE-bench Pro....","context_length":1048576,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000004350","completion":"0.0000008700","input_cache_read":"0.0000000036"},"top_provider":{"context_length":1048576,"max_completion_tokens":131072,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/xiaomi/mimo-v2.5-pro-20260422/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1330,"win_rate":60.2,"rank":8},{"arena":"models","category":"asciiart","elo":1216,"win_rate":52.5,"rank":16},{"arena":"models","category":"codecategories","elo":1320,"win_rate":58.3,"rank":9},{"arena":"models","category":"dataviz","elo":1295,"win_rate":58.6,"rank":10},{"arena":"models","category":"gamedev","elo":1336,"win_rate":61,"rank":7},{"arena":"models","category":"uicomponent","elo":1291,"win_rate":55,"rank":18},{"arena":"models","category":"website","elo":1311,"win_rate":56.7,"rank":10}],"artificial_analysis":{"intelligence_index":42.2,"coding_index":60.2,"agentic_index":29.1}},"reasoning":{"mandatory":false},"aliases":["xiaomi/mimo-v2.5-pro-20260422"]},{"id":"xiaomi/mimo-v2.5","canonical_slug":"xiaomi/mimo-v2.5-20260422","hugging_face_id":"XiaomiMiMo/MiMo-V2.5","name":"Xiaomi: MiMo-V2.5","created":1776874269,"description":"MiMo-V2.5 is a native omnimodal model by Xiaomi. It delivers Pro-level agentic performance at roughly half the inference cost, while surpassing MiMo-V2-Omni in multimodal perception across image and video understanding...","context_length":1048576,"architecture":{"modality":"text+image+audio+video->text","input_modalities":["text","audio","image","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000001050","completion":"0.0000002800"},"top_provider":{"context_length":32000,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/xiaomi/mimo-v2.5-20260422/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1301,"win_rate":54.1,"rank":18},{"arena":"models","category":"asciiart","elo":1191,"win_rate":47.9,"rank":26},{"arena":"models","category":"codecategories","elo":1304,"win_rate":55,"rank":14},{"arena":"models","category":"dataviz","elo":1281,"win_rate":53.9,"rank":16},{"arena":"models","category":"gamedev","elo":1303,"win_rate":56.3,"rank":14},{"arena":"models","category":"svg","elo":1214,"win_rate":51.9,"rank":22},{"arena":"models","category":"uicomponent","elo":1314,"win_rate":56.3,"rank":12},{"arena":"models","category":"website","elo":1305,"win_rate":55.2,"rank":11}]},"reasoning":{"mandatory":false},"aliases":["xiaomi/mimo-v2.5-20260422"]},{"id":"inclusionai/ling-2.6-flash","canonical_slug":"inclusionai/ling-2.6-flash-20260421","hugging_face_id":"","name":"inclusionAI: Ling-2.6-flash","created":1776795886,"description":"Ling-2.6-flash is an instant (instruct) model from inclusionAI with 104B total parameters and 7.4B active parameters, designed for real-world agents that require fast responses, strong execution, and high token efficiency....","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000100","completion":"0.0000000300","input_cache_read":"0.0000000020"},"top_provider":{"context_length":262144,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logprobs","max_tokens","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/inclusionai/ling-2.6-flash-20260421/endpoints"},"aliases":["inclusionai/ling-2.6-flash-20260421"]},{"id":"~anthropic/claude-opus-latest","canonical_slug":"~anthropic/claude-opus-latest","hugging_face_id":"","name":"Anthropic: Claude Opus Latest","created":1776795361,"description":"This model always redirects to the latest model in the Claude Opus family.","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Router","instruct_type":null},"pricing":{"prompt":"0.0000050000","completion":"0.0000250000","web_search":"0.0100000000","input_cache_read":"0.0000005000","input_cache_write":"0.0000062500","input_cache_write_1h":"0.00001"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","tool_choice","tools","verbosity"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/~anthropic/claude-opus-latest/endpoints"},"reasoning":{"mandatory":false,"supported_efforts":["max","xhigh","high","medium","low"],"default_effort":"medium"},"aliases":["~anthropic/claude-opus-latest"]},{"id":"moonshotai/kimi-k2.6","canonical_slug":"moonshotai/kimi-k2.6-20260420","hugging_face_id":"moonshotai/Kimi-K2.6","name":"MoonshotAI: Kimi K2.6","created":1776699402,"description":"Kimi K2.6 is Moonshot AI's next-generation multimodal model, designed for long-horizon coding, coding-driven UI/UX generation, and multi-agent orchestration. It handles complex end-to-end coding tasks across Python, Rust, and Go, and...","context_length":262144,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000006600","completion":"0.0000034100","input_cache_read":"0.0000001440"},"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","parallel_tool_calls","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/moonshotai/kimi-k2.6-20260420/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"agenticgamedev","elo":1184,"win_rate":50.3,"rank":7},{"arena":"agents","category":"agentichtmlslides","elo":1248,"win_rate":59,"rank":2},{"arena":"agents","category":"agenticslides","elo":1187,"win_rate":45.8,"rank":5},{"arena":"agents","category":"agenticslides(html)","elo":1252,"win_rate":59.2,"rank":2},{"arena":"agents","category":"agenticslides(python-pptx)","elo":1186,"win_rate":45.5,"rank":5},{"arena":"agents","category":"androidnative","elo":1210,"win_rate":50.6,"rank":12},{"arena":"agents","category":"fullstack","elo":1223,"win_rate":56.3,"rank":10},{"arena":"agents","category":"godotgamedev","elo":1214,"win_rate":53,"rank":10},{"arena":"agents","category":"htmlslides","elo":1251,"win_rate":59,"rank":2},{"arena":"agents","category":"mobileapps","elo":1227,"win_rate":53.2,"rank":8},{"arena":"agents","category":"pptxslides","elo":1181,"win_rate":44.3,"rank":5},{"arena":"agents","category":"python-pptxslides","elo":1180,"win_rate":42.1,"rank":6},{"arena":"agents","category":"webapps","elo":1259,"win_rate":58.2,"rank":6},{"arena":"models","category":"3d","elo":1357,"win_rate":62.9,"rank":3},{"arena":"models","category":"codecategories","elo":1328,"win_rate":58.1,"rank":6},{"arena":"models","category":"dataviz","elo":1303,"win_rate":56.7,"rank":8},{"arena":"models","category":"gamedev","elo":1317,"win_rate":58.1,"rank":12},{"arena":"models","category":"svg","elo":1234,"win_rate":52.5,"rank":17},{"arena":"models","category":"uicomponent","elo":1323,"win_rate":57.4,"rank":7},{"arena":"models","category":"website","elo":1319,"win_rate":56.3,"rank":7}],"artificial_analysis":{"intelligence_index":42.8,"coding_index":56,"agentic_index":30.3}},"reasoning":{"mandatory":false,"default_enabled":true},"aliases":["moonshotai/kimi-k2.6-20260420"]},{"id":"google/gemini-embedding-2-preview","canonical_slug":"google/gemini-embedding-2-preview","hugging_face_id":null,"name":"Google: Gemini Embedding 2 Preview","created":1776436465,"description":"Gemini Embedding 2 Preview is Google's first multimodal embedding model. We currently support mapping text and images into a unified vector space for semantic search and retrieval-augmented generation (RAG). It...","context_length":8192,"architecture":{"modality":"text+image+file+audio+video->embeddings","input_modalities":["text","image","file","audio","video"],"output_modalities":["embeddings"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":"0.0000002000","completion":"0.0000000000","image":"0.0000004500","audio":"0.0000065000"},"top_provider":{"context_length":8192,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["max_tokens","response_format","seed","temperature","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/google/gemini-embedding-2-preview/endpoints"},"aliases":["google/gemini-embedding-2-preview"]},{"id":"anthropic/claude-opus-4.7","canonical_slug":"anthropic/claude-4.7-opus-20260416","hugging_face_id":null,"name":"Anthropic: Claude Opus 4.7","created":1776351100,"description":"Opus 4.7 is the next generation of Anthropic's Opus family, built for long-running, asynchronous agents. Building on the coding and agentic strengths of Opus 4.6, it delivers stronger performance on...","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.0000050000","completion":"0.0000250000","web_search":"0.0100000000","input_cache_read":"0.0000005000","input_cache_write":"0.0000062500","input_cache_write_1h":"0.00001"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","tool_choice","tools","verbosity"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/anthropic/claude-4.7-opus-20260416/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"agenticgamedev","elo":1289,"win_rate":64.2,"rank":1},{"arena":"agents","category":"agentichtmlslides","elo":1243,"win_rate":58,"rank":3},{"arena":"agents","category":"agenticslides","elo":1334,"win_rate":64.7,"rank":1},{"arena":"agents","category":"agenticslides(html)","elo":1242,"win_rate":57.8,"rank":3},{"arena":"agents","category":"agenticslides(python-pptx)","elo":1335,"win_rate":66.5,"rank":1},{"arena":"agents","category":"androidnative","elo":1329,"win_rate":61.7,"rank":2},{"arena":"agents","category":"fullstack","elo":1503,"win_rate":80.1,"rank":1},{"arena":"agents","category":"godotgamedev","elo":1210,"win_rate":50.8,"rank":11},{"arena":"agents","category":"htmlslides","elo":1240,"win_rate":57.6,"rank":3},{"arena":"agents","category":"mobileapps","elo":1197,"win_rate":50.9,"rank":15},{"arena":"agents","category":"pptxslides","elo":1344,"win_rate":67.3,"rank":1},{"arena":"agents","category":"python-pptxslides","elo":1351,"win_rate":66.3,"rank":1},{"arena":"agents","category":"webapps","elo":1329,"win_rate":65.5,"rank":1},{"arena":"models","category":"3d","elo":1324,"win_rate":60.6,"rank":10},{"arena":"models","category":"asciiart","elo":1309,"win_rate":64.7,"rank":5},{"arena":"models","category":"codecategories","elo":1337,"win_rate":61.1,"rank":4},{"arena":"models","category":"dataviz","elo":1323,"win_rate":61.6,"rank":4},{"arena":"models","category":"gamedev","elo":1342,"win_rate":62.9,"rank":5},{"arena":"models","category":"svg","elo":1283,"win_rate":61.8,"rank":7},{"arena":"models","category":"uicomponent","elo":1359,"win_rate":64.1,"rank":2},{"arena":"models","category":"website","elo":1337,"win_rate":60.6,"rank":4}],"artificial_analysis":{"intelligence_index":53.5,"coding_index":73.6,"agentic_index":44.4}},"reasoning":{"mandatory":false,"supported_efforts":["max","xhigh","high","medium","low"],"default_effort":"medium"},"aliases":["anthropic/claude-4.7-opus-20260416"]},{"id":"anthropic/claude-opus-4.6-fast","canonical_slug":"anthropic/claude-4.6-opus-fast-20260407","hugging_face_id":null,"name":"Anthropic: Claude Opus 4.6 (Fast)","created":1775592472,"description":"Fast-mode variant of [Opus 4.6](/anthropic/claude-opus-4.6) - identical capabilities with higher output speed at premium 6x pricing.\n\nLearn more in Anthropic's docs: https://platform.claude.com/docs/en/build-with-claude/fast-mode","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.0000300000","completion":"0.0001500000","web_search":"0.0100000000","input_cache_read":"0.0000030000","input_cache_write":"0.0000375000","input_cache_write_1h":"0.00006"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_p","verbosity"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":"2026-06-29","links":{"details":"/api/v1/models/anthropic/claude-4.6-opus-fast-20260407/endpoints"},"reasoning":{"mandatory":false,"supports_max_tokens":true,"supported_efforts":["max","high","medium","low"],"default_effort":"medium"},"aliases":["anthropic/claude-4.6-opus-fast-20260407"]},{"id":"z-ai/glm-5.1","canonical_slug":"z-ai/glm-5.1-20260406","hugging_face_id":"zai-org/GLM-5.1","name":"Z.ai: GLM 5.1","created":1775578025,"description":"GLM-5.1 delivers a major leap in coding capability, with particularly significant gains in handling long-horizon tasks. Unlike previous models built around minute-level interactions, GLM-5.1 can work independently and continuously on...","context_length":202752,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000012600","completion":"0.0000039600","input_cache_read":"0.0000002300"},"top_provider":{"context_length":202752,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","parallel_tool_calls","presence_penalty","reasoning","reasoning_effort","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/z-ai/glm-5.1-20260406/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1344,"win_rate":64,"rank":5},{"arena":"models","category":"asciiart","elo":1158,"win_rate":46.3,"rank":36},{"arena":"models","category":"codecategories","elo":1334,"win_rate":60.6,"rank":5},{"arena":"models","category":"dataviz","elo":1366,"win_rate":67,"rank":2},{"arena":"models","category":"gamedev","elo":1339,"win_rate":62.1,"rank":6},{"arena":"models","category":"svg","elo":1285,"win_rate":62.3,"rank":5},{"arena":"models","category":"uicomponent","elo":1342,"win_rate":62.9,"rank":4},{"arena":"models","category":"website","elo":1321,"win_rate":57.6,"rank":6},{"arena":"agents","category":"agenticgamedev","elo":1216,"win_rate":54.1,"rank":4},{"arena":"agents","category":"agentichtmlslides","elo":1205,"win_rate":52,"rank":6},{"arena":"agents","category":"agenticslides","elo":1245,"win_rate":54.4,"rank":3},{"arena":"agents","category":"agenticslides(html)","elo":1204,"win_rate":51.8,"rank":6},{"arena":"agents","category":"agenticslides(python-pptx)","elo":1240,"win_rate":53.3,"rank":4},{"arena":"agents","category":"androidnative","elo":1241,"win_rate":53.6,"rank":8},{"arena":"agents","category":"fullstack","elo":1234,"win_rate":57,"rank":7},{"arena":"agents","category":"godotgamedev","elo":1236,"win_rate":55.9,"rank":4},{"arena":"agents","category":"htmlslides","elo":1207,"win_rate":52.7,"rank":6},{"arena":"agents","category":"mobileapps","elo":1236,"win_rate":55.4,"rank":7},{"arena":"agents","category":"pptxslides","elo":1241,"win_rate":53.5,"rank":4},{"arena":"agents","category":"python-pptxslides","elo":1258,"win_rate":54.2,"rank":3},{"arena":"agents","category":"webapps","elo":1243,"win_rate":55.2,"rank":10}],"artificial_analysis":{"intelligence_index":40.2,"coding_index":55.8,"agentic_index":29.9}},"reasoning":{"mandatory":false,"default_enabled":true},"aliases":["z-ai/glm-5.1-20260406"]},{"id":"google/gemma-4-26b-a4b-it","canonical_slug":"google/gemma-4-26b-a4b-it-20260403","hugging_face_id":"google/gemma-4-26B-A4B-it","name":"Google: Gemma 4 26B A4B ","created":1775227989,"description":"Gemma 4 26B A4B IT is an instruction-tuned Mixture-of-Experts (MoE) model from Google DeepMind. Despite 25.2B total parameters, only 3.8B activate per token during inference — delivering near-31B quality at...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["image","text","video"],"output_modalities":["text"],"tokenizer":"Gemma","instruct_type":null},"pricing":{"prompt":"0.0000000600","completion":"0.0000003300"},"top_provider":{"context_length":262144,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":64},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/google/gemma-4-26b-a4b-it-20260403/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":25.7,"coding_index":39.3,"agentic_index":11}},"reasoning":{"mandatory":false,"default_enabled":false},"aliases":["google/gemma-4-26b-a4b-it-20260403"]},{"id":"google/gemma-4-31b-it","canonical_slug":"google/gemma-4-31b-it-20260402","hugging_face_id":"google/gemma-4-31B-it","name":"Google: Gemma 4 31B","created":1775148486,"description":"Gemma 4 31B Instruct is Google DeepMind's 30.7B dense multimodal model supporting text and image input with text output. Features a 256K token context window, configurable thinking/reasoning mode, native function...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["image","text","video"],"output_modalities":["text"],"tokenizer":"Gemma","instruct_type":null},"pricing":{"prompt":"0.0000001200","completion":"0.0000003500","input_cache_read":"0.0000000900"},"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":64,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/google/gemma-4-31b-it-20260402/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":29.4,"coding_index":43.4,"agentic_index":14.4}},"reasoning":{"mandatory":false,"default_enabled":false},"aliases":["google/gemma-4-31b-it-20260402"]},{"id":"qwen/qwen3.6-plus","canonical_slug":"qwen/qwen3.6-plus-04-02","hugging_face_id":"","name":"Qwen: Qwen3.6 Plus","created":1775133557,"description":"Qwen 3.6 Plus builds on a hybrid architecture that combines efficient linear attention with sparse mixture-of-experts routing, enabling strong scalability and high-performance inference. Compared to the 3.5 series, it delivers...","context_length":1000000,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000003250","completion":"0.0000019500","input_cache_write":"0.0000004062"},"top_provider":{"context_length":1000000,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","logprobs","max_tokens","presence_penalty","reasoning","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3.6-plus-04-02/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1276,"win_rate":51.5,"rank":23},{"arena":"models","category":"asciiart","elo":1168,"win_rate":46.2,"rank":33},{"arena":"models","category":"codecategories","elo":1275,"win_rate":51.8,"rank":25},{"arena":"models","category":"dataviz","elo":1261,"win_rate":50.6,"rank":26},{"arena":"models","category":"gamedev","elo":1272,"win_rate":52.2,"rank":25},{"arena":"models","category":"svg","elo":1218,"win_rate":52,"rank":20},{"arena":"models","category":"uicomponent","elo":1286,"win_rate":52.5,"rank":21},{"arena":"models","category":"website","elo":1263,"win_rate":51.7,"rank":28}],"artificial_analysis":{"intelligence_index":39.6,"coding_index":54.5,"agentic_index":27.6}},"reasoning":{"mandatory":false},"aliases":["qwen/qwen3.6-plus-04-02"]},{"id":"z-ai/glm-5v-turbo","canonical_slug":"z-ai/glm-5v-turbo-20260401","hugging_face_id":"","name":"Z.ai: GLM 5V Turbo","created":1775061458,"description":"GLM-5V-Turbo is Z.ai’s first native multimodal agent foundation model, built for vision-based coding and agent-driven tasks. It natively handles image, video, and text inputs, excels at long-horizon planning, complex coding,...","context_length":202752,"architecture":{"modality":"text+image+video->text","input_modalities":["image","text","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000012000","completion":"0.0000040000","input_cache_read":"0.0000002400"},"top_provider":{"context_length":202752,"max_completion_tokens":131072,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":"2098-12-31","links":{"details":"/api/v1/models/z-ai/glm-5v-turbo-20260401/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"agenticgamedev","elo":1128,"win_rate":40.6,"rank":9},{"arena":"agents","category":"agentichtmlslides","elo":1134,"win_rate":41.7,"rank":8},{"arena":"agents","category":"agenticslides","elo":1171,"win_rate":51.6,"rank":6},{"arena":"agents","category":"agenticslides(html)","elo":1137,"win_rate":41.8,"rank":8},{"arena":"agents","category":"agenticslides(python-pptx)","elo":1183,"win_rate":53.9,"rank":6},{"arena":"agents","category":"androidnative","elo":1267,"win_rate":54.8,"rank":3},{"arena":"agents","category":"fullstack","elo":1213,"win_rate":54.7,"rank":12},{"arena":"agents","category":"godotgamedev","elo":1221,"win_rate":53.6,"rank":5},{"arena":"agents","category":"htmlslides","elo":1137,"win_rate":41.8,"rank":10},{"arena":"agents","category":"mobileapps","elo":1221,"win_rate":52.4,"rank":12},{"arena":"agents","category":"pptxslides","elo":1164,"win_rate":52.1,"rank":6},{"arena":"agents","category":"python-pptxslides","elo":1168,"win_rate":52.2,"rank":7},{"arena":"agents","category":"webapps","elo":1187,"win_rate":46.1,"rank":16},{"arena":"models","category":"3d","elo":1291,"win_rate":55.3,"rank":20},{"arena":"models","category":"asciiart","elo":1138,"win_rate":42.7,"rank":40},{"arena":"models","category":"codecategories","elo":1279,"win_rate":52.6,"rank":23},{"arena":"models","category":"dataviz","elo":1248,"win_rate":49.8,"rank":29},{"arena":"models","category":"gamedev","elo":1290,"win_rate":55,"rank":20},{"arena":"models","category":"svg","elo":1204,"win_rate":51.1,"rank":27},{"arena":"models","category":"uicomponent","elo":1269,"win_rate":51.9,"rank":25},{"arena":"models","category":"website","elo":1271,"win_rate":51,"rank":23}]},"reasoning":{"mandatory":false,"default_enabled":true},"aliases":["z-ai/glm-5v-turbo-20260401"]},{"id":"arcee-ai/trinity-large-thinking","canonical_slug":"arcee-ai/trinity-large-thinking","hugging_face_id":"arcee-ai/Trinity-Large-Thinking","name":"Arcee AI: Trinity Large Thinking","created":1775058318,"description":"Trinity Large Thinking is a powerful open source reasoning model from the team at Arcee AI. It shows strong performance in PinchBench, agentic workloads, and reasoning tasks. Launch video: https://youtu.be/Gc82AXLa0Rg?si=4RLn6WBz33qT--B7...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000002500","completion":"0.0000008000","input_cache_read":"0.0000000600"},"top_provider":{"context_length":262144,"max_completion_tokens":80000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":0.3,"top_p":0.8,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/arcee-ai/trinity-large-thinking/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1160,"win_rate":41.3,"rank":59},{"arena":"models","category":"asciiart","elo":1085,"win_rate":37.1,"rank":45},{"arena":"models","category":"codecategories","elo":1166,"win_rate":40.1,"rank":62},{"arena":"models","category":"dataviz","elo":1142,"win_rate":39.3,"rank":68},{"arena":"models","category":"gamedev","elo":1143,"win_rate":38.1,"rank":68},{"arena":"models","category":"svg","elo":1073,"win_rate":35.2,"rank":58},{"arena":"models","category":"uicomponent","elo":1100,"win_rate":32.5,"rank":73},{"arena":"models","category":"website","elo":1179,"win_rate":41.3,"rank":60}]},"reasoning":{"mandatory":true},"aliases":["arcee-ai/trinity-large-thinking"]},{"id":"x-ai/grok-4.20","canonical_slug":"x-ai/grok-4.20-20260309","hugging_face_id":"","name":"xAI: Grok 4.20","created":1774979019,"description":"Grok 4.20 is a reasoning model from xAI with industry-leading speed and agentic tool calling capabilities. It combines the lowest hallucination rate on the market with strict prompt adherance, delivering...","context_length":2000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Grok","instruct_type":null},"pricing":{"prompt":"0.0000012500","completion":"0.0000025000","web_search":"0.0050000000","input_cache_read":"0.0000002000"},"top_provider":{"context_length":2000000,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","logprobs","max_tokens","reasoning","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-09-01","expiration_date":null,"links":{"details":"/api/v1/models/x-ai/grok-4.20-20260309/endpoints"},"reasoning":{"mandatory":false,"default_enabled":false},"aliases":["x-ai/grok-4.20-20260309"]},{"id":"kwaipilot/kat-coder-pro-v2","canonical_slug":"kwaipilot/kat-coder-pro-v2-20260327","hugging_face_id":"","name":"Kwaipilot: KAT-Coder-Pro V2","created":1774649310,"description":"KAT-Coder-Pro V2 is the latest high-performance model in KwaiKAT’s KAT-Coder series, designed for complex enterprise-grade software engineering and SaaS integration. It builds on the agentic coding strengths of earlier versions,...","context_length":256000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000003000","completion":"0.0000012000","input_cache_read":"0.0000000600"},"top_provider":{"context_length":256000,"max_completion_tokens":80000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/kwaipilot/kat-coder-pro-v2-20260327/endpoints"},"aliases":["kwaipilot/kat-coder-pro-v2-20260327"]},{"id":"rekaai/reka-edge","canonical_slug":"rekaai/reka-edge-2603","hugging_face_id":"RekaAI/reka-edge-2603","name":"Reka Edge","created":1774026965,"description":"Reka Edge is an extremely efficient 7B multimodal vision-language model that accepts image/video+text inputs and generates text outputs. This model is optimized specifically to deliver industry-leading performance in image understanding,...","context_length":16384,"architecture":{"modality":"text+image+video->text","input_modalities":["image","text","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000001000","completion":"0.0000001000"},"top_provider":{"context_length":16384,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logprobs","max_tokens","presence_penalty","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/rekaai/reka-edge-2603/endpoints"},"reasoning":{"mandatory":false},"aliases":["rekaai/reka-edge-2603"]},{"id":"minimax/minimax-m2.7","canonical_slug":"minimax/minimax-m2.7-20260318","hugging_face_id":"MiniMaxAI/MiniMax-M2.7","name":"MiniMax: MiniMax M2.7","created":1773836697,"description":"MiniMax-M2.7 is a next-generation large language model designed for autonomous, real-world productivity and continuous improvement. Built to actively participate in its own evolution, M2.7 integrates advanced agentic capabilities through multi-agent...","context_length":204800,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000001800","completion":"0.0000007200"},"top_provider":{"context_length":196608,"max_completion_tokens":196608,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/minimax/minimax-m2.7-20260318/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1270,"win_rate":51.1,"rank":27},{"arena":"models","category":"asciiart","elo":1186,"win_rate":48.4,"rank":28},{"arena":"models","category":"codecategories","elo":1282,"win_rate":53.3,"rank":22},{"arena":"models","category":"dataviz","elo":1272,"win_rate":53.2,"rank":20},{"arena":"models","category":"gamedev","elo":1274,"win_rate":53.4,"rank":22},{"arena":"models","category":"svg","elo":1196,"win_rate":50.7,"rank":31},{"arena":"models","category":"uicomponent","elo":1262,"win_rate":49.9,"rank":27},{"arena":"models","category":"website","elo":1287,"win_rate":54.2,"rank":20}],"artificial_analysis":{"intelligence_index":38.1,"coding_index":52.6,"agentic_index":25.6}},"reasoning":{"mandatory":true},"aliases":["minimax/minimax-m2.7-20260318"]},{"id":"openai/gpt-5.4-nano","canonical_slug":"openai/gpt-5.4-nano-20260317","hugging_face_id":"","name":"OpenAI: GPT-5.4 Nano","created":1773748187,"description":"GPT-5.4 nano is the most lightweight and cost-efficient variant of the GPT-5.4 family, optimized for speed-critical and high-volume tasks. It supports text and image inputs and is designed for low-latency...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000002000","completion":"0.0000012500","web_search":"0.0100000000","input_cache_read":"0.0000000200"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-08-31","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5.4-nano-20260317/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":38.2,"coding_index":56.1,"agentic_index":27.5}},"reasoning":{"mandatory":false,"default_enabled":false,"supported_efforts":["xhigh","high","medium","low","none"],"default_effort":"medium"},"aliases":["openai/gpt-5.4-nano-20260317"]},{"id":"openai/gpt-5.4-mini","canonical_slug":"openai/gpt-5.4-mini-20260317","hugging_face_id":"","name":"OpenAI: GPT-5.4 Mini","created":1773748178,"description":"GPT-5.4 mini brings the core capabilities of GPT-5.4 to a faster, more efficient model optimized for high-throughput workloads. It supports text and image inputs with strong performance across reasoning, coding,...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000007500","completion":"0.0000045000","web_search":"0.0100000000","input_cache_read":"0.0000000750"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-08-31","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5.4-mini-20260317/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":40,"coding_index":56.1,"agentic_index":30.2}},"reasoning":{"mandatory":false,"default_enabled":false,"supported_efforts":["xhigh","high","medium","low","none"],"default_effort":"medium"},"aliases":["openai/gpt-5.4-mini-20260317"]},{"id":"mistralai/mistral-small-2603","canonical_slug":"mistralai/mistral-small-2603","hugging_face_id":"mistralai/Mistral-Small-4-119B-2603","name":"Mistral: Mistral Small 4","created":1773695685,"description":"Mistral Small 4 is the next major release in the Mistral Small family, unifying the capabilities of several flagship Mistral models into a single system. It combines strong reasoning from...","context_length":262144,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":"0.0000001500","completion":"0.0000006000","input_cache_read":"0.0000000150"},"top_provider":{"context_length":262144,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","max_tokens","presence_penalty","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/mistralai/mistral-small-2603/endpoints"},"reasoning":{"mandatory":false,"default_enabled":false,"supported_efforts":["high","none"],"default_effort":"high"},"aliases":["mistralai/mistral-small-2603"]},{"id":"perplexity/pplx-embed-v1-4b","canonical_slug":"perplexity/pplx-embed-v1-4B","hugging_face_id":"","name":"Perplexity: Embed V1 4B","created":1773625372,"description":"pplx-embed-v1 -4B is one of Perplexity's state-of-the-art text embedding models built for real-world, web-scale retrieval. pplx-embed-v1 is optimized for standard dense text retrieval with the 4B parameter model maximizing retrieval...","context_length":32000,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000300","completion":"0.0000000000"},"top_provider":{"context_length":32000,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","presence_penalty","temperature","top_k","top_p","web_search_options"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/perplexity/pplx-embed-v1-4B/endpoints"},"aliases":["perplexity/pplx-embed-v1-4B"]},{"id":"perplexity/pplx-embed-v1-0.6b","canonical_slug":"perplexity/pplx-embed-v1-0.6B","hugging_face_id":"","name":"Perplexity: Embed V1 0.6B","created":1773624868,"description":"pplx-embed-v1-0.6B is one of Perplexity's state-of-the-art text embedding models built for real-world, web-scale retrieval. pplx-embed-v1 is optimized for standard dense text retrieval with the 0.6B parameter model targeting lightweight, low-latency...","context_length":32000,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000040","completion":"0.0000000000"},"top_provider":{"context_length":32000,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","presence_penalty","temperature","top_k","top_p","web_search_options"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/perplexity/pplx-embed-v1-0.6B/endpoints"},"aliases":["perplexity/pplx-embed-v1-0.6B"]},{"id":"z-ai/glm-5-turbo","canonical_slug":"z-ai/glm-5-turbo-20260315","hugging_face_id":"","name":"Z.ai: GLM 5 Turbo","created":1773583573,"description":"GLM-5 Turbo is a new model from Z.ai designed for fast inference and strong performance in agent-driven environments such as OpenClaw scenarios. It is deeply optimized for real-world agent workflows...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000012000","completion":"0.0000040000","input_cache_read":"0.0000002400"},"top_provider":{"context_length":262144,"max_completion_tokens":131072,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/z-ai/glm-5-turbo-20260315/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1335,"win_rate":60.9,"rank":6},{"arena":"models","category":"asciiart","elo":1200,"win_rate":51.3,"rank":22},{"arena":"models","category":"codecategories","elo":1322,"win_rate":57.7,"rank":8},{"arena":"models","category":"dataviz","elo":1308,"win_rate":58.3,"rank":7},{"arena":"models","category":"gamedev","elo":1327,"win_rate":59.8,"rank":9},{"arena":"models","category":"svg","elo":1270,"win_rate":59.6,"rank":10},{"arena":"models","category":"uicomponent","elo":1320,"win_rate":57.9,"rank":10},{"arena":"models","category":"website","elo":1317,"win_rate":56.7,"rank":9}]},"reasoning":{"mandatory":false,"default_enabled":true},"aliases":["z-ai/glm-5-turbo-20260315"]},{"id":"nvidia/nemotron-3-super-120b-a12b","canonical_slug":"nvidia/nemotron-3-super-120b-a12b-20230311","hugging_face_id":"nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8","name":"NVIDIA: Nemotron 3 Super","created":1773245239,"description":"NVIDIA Nemotron 3 Super is a 120B-parameter open hybrid MoE model, activating just 12B parameters for maximum compute efficiency and accuracy in complex multi-agent applications. Built on a hybrid Mamba-Transformer...","context_length":1000000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000850","completion":"0.0000004000"},"top_provider":{"context_length":262144,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/nvidia/nemotron-3-super-120b-a12b-20230311/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":25.4,"coding_index":37.7,"agentic_index":8.7}},"reasoning":{"mandatory":false,"default_enabled":true,"supports_max_tokens":true,"supported_efforts":["medium","low"],"default_effort":"medium"},"aliases":["nvidia/nemotron-3-super-120b-a12b-20230311"]},{"id":"bytedance-seed/seed-2.0-lite","canonical_slug":"bytedance-seed/seed-2.0-lite-20260309","hugging_face_id":null,"name":"ByteDance Seed: Seed-2.0-Lite","created":1773157231,"description":"Seed-2.0-Lite is a versatile, cost‑efficient enterprise workhorse that delivers strong multimodal and agent capabilities while offering noticeably lower latency, making it a practical default choice for most production workloads across...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000002500","completion":"0.0000020000"},"top_provider":{"context_length":262144,"max_completion_tokens":131072,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/bytedance-seed/seed-2.0-lite-20260309/endpoints"},"reasoning":{"mandatory":false,"supported_efforts":["high","medium","low","minimal"],"default_effort":"medium"},"aliases":["bytedance-seed/seed-2.0-lite-20260309"]},{"id":"qwen/qwen3.5-9b","canonical_slug":"qwen/qwen3.5-9b-20260310","hugging_face_id":"Qwen/Qwen3.5-9B","name":"Qwen: Qwen3.5-9B","created":1773152396,"description":"Qwen3.5-9B is a multimodal foundation model from the Qwen3.5 family, designed to deliver strong reasoning, coding, and visual understanding in an efficient 9B-parameter architecture. It uses a unified vision-language design...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000001000","completion":"0.0000001500"},"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3.5-9b-20260310/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":null,"coding_index":28.7,"agentic_index":null}},"reasoning":{"mandatory":false},"aliases":["qwen/qwen3.5-9b-20260310"]},{"id":"openai/gpt-5.4-pro","canonical_slug":"openai/gpt-5.4-pro-20260305","hugging_face_id":"","name":"OpenAI: GPT-5.4 Pro","created":1772734366,"description":"GPT-5.4 Pro is OpenAI's most advanced model, building on GPT-5.4's unified architecture with enhanced reasoning capabilities for complex, high-stakes tasks. It features a 1M+ token context window (922K input, 128K...","context_length":1050000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000300000","completion":"0.0001800000","web_search":"0.0100000000"},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5.4-pro-20260305/endpoints"},"reasoning":{"mandatory":true,"supported_efforts":["xhigh","high","medium"],"default_effort":"medium"},"aliases":["gpt-5.4-pro","openai/gpt-5.4-pro-20260305"]},{"id":"openai/gpt-5.4","canonical_slug":"openai/gpt-5.4-20260305","hugging_face_id":"","name":"OpenAI: GPT-5.4","created":1772734352,"description":"GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window (922K input, 128K output) with support for...","context_length":1050000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000025000","completion":"0.0000150000","web_search":"0.0100000000","input_cache_read":"0.0000002500"},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5.4-20260305/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1181,"win_rate":43.4,"rank":50},{"arena":"models","category":"asciiart","elo":1235,"win_rate":54.7,"rank":11},{"arena":"models","category":"codecategories","elo":1265,"win_rate":53.6,"rank":28},{"arena":"models","category":"dataviz","elo":1276,"win_rate":57,"rank":17},{"arena":"models","category":"gamedev","elo":1307,"win_rate":58.2,"rank":13},{"arena":"models","category":"svg","elo":1255,"win_rate":58.3,"rank":12},{"arena":"models","category":"uicomponent","elo":1299,"win_rate":58.8,"rank":14},{"arena":"models","category":"website","elo":1266,"win_rate":53.6,"rank":27},{"arena":"agents","category":"androidnative","elo":1043,"win_rate":47.5,"rank":26},{"arena":"agents","category":"fullstack","elo":1086,"win_rate":40.8,"rank":24},{"arena":"agents","category":"godotgamedev","elo":1172,"win_rate":46.9,"rank":14},{"arena":"agents","category":"mobileapps","elo":1152,"win_rate":44.4,"rank":25},{"arena":"agents","category":"webapps","elo":1138,"win_rate":41.8,"rank":20}],"artificial_analysis":{"intelligence_index":51.4,"coding_index":71.1,"agentic_index":41.1}},"reasoning":{"mandatory":false,"default_enabled":false,"supported_efforts":["xhigh","high","medium","low","none"],"default_effort":"medium"},"aliases":["gpt-5.4","openai/gpt-5.4-20260305"]},{"id":"inception/mercury-2","canonical_slug":"inception/mercury-2-20260304","hugging_face_id":null,"name":"Inception: Mercury 2","created":1772636275,"description":"Mercury 2 is an extremely fast reasoning LLM, and the first reasoning diffusion LLM (dLLM). Instead of generating tokens sequentially, Mercury 2 produces and refines multiple tokens in parallel, achieving...","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000002500","completion":"0.0000007500","input_cache_read":"0.0000000250"},"top_provider":{"context_length":128000,"max_completion_tokens":50000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools"],"default_parameters":{"temperature":0.75,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/inception/mercury-2-20260304/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1036,"win_rate":22.7,"rank":87},{"arena":"models","category":"codecategories","elo":1036,"win_rate":21.2,"rank":93},{"arena":"models","category":"dataviz","elo":994,"win_rate":19.3,"rank":89},{"arena":"models","category":"gamedev","elo":1042,"win_rate":21.1,"rank":88},{"arena":"models","category":"uicomponent","elo":1026,"win_rate":20.9,"rank":85},{"arena":"models","category":"website","elo":1028,"win_rate":20.7,"rank":96}]},"reasoning":{"mandatory":false,"default_enabled":true,"supported_efforts":["high","medium","low","none"],"default_effort":"medium"},"aliases":["inception/mercury-2-20260304"]},{"id":"openai/gpt-5.3-chat","canonical_slug":"openai/gpt-5.3-chat-20260303","hugging_face_id":"","name":"OpenAI: GPT-5.3 Chat","created":1772564061,"description":"GPT-5.3 Chat is an update to ChatGPT's most-used model that makes everyday conversations smoother, more useful, and more directly helpful. It delivers more accurate answers with better contextualization and significantly...","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000017500","completion":"0.0000140000","web_search":"0.0100000000","input_cache_read":"0.0000001750"},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["max_completion_tokens","max_tokens","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5.3-chat-20260303/endpoints"},"aliases":["openai/gpt-5.3-chat-20260303"]},{"id":"google/gemini-3.1-flash-lite-preview","canonical_slug":"google/gemini-3.1-flash-lite-preview-20260303","hugging_face_id":"","name":"Google: Gemini 3.1 Flash Lite Preview","created":1772512673,"description":"Gemini 3.1 Flash Lite Preview is Google's high-efficiency model optimized for high-volume use cases. It outperforms Gemini 2.5 Flash Lite on overall quality and approaches Gemini 2.5 Flash performance across...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","video","file","audio"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":"0.0000002500","completion":"0.0000015000","image":"0.0000002500","audio":"0.0000005000","web_search":"0.0140000000","internal_reasoning":"0.0000015","input_cache_read":"0.0000000250","input_cache_write":"0.0000000833"},"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/google/gemini-3.1-flash-lite-preview-20260303/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1127,"win_rate":38.8,"rank":72},{"arena":"models","category":"asciiart","elo":1212,"win_rate":50.7,"rank":18},{"arena":"models","category":"codecategories","elo":1122,"win_rate":36.4,"rank":76},{"arena":"models","category":"dataviz","elo":1089,"win_rate":33.3,"rank":78},{"arena":"models","category":"gamedev","elo":1098,"win_rate":34,"rank":79},{"arena":"models","category":"svg","elo":1109,"win_rate":42.5,"rank":49},{"arena":"models","category":"uicomponent","elo":1126,"win_rate":37.7,"rank":69},{"arena":"models","category":"website","elo":1125,"win_rate":36.5,"rank":78}],"artificial_analysis":{"intelligence_index":25,"coding_index":34.7,"agentic_index":6.2}},"reasoning":{"mandatory":false,"default_enabled":true,"supported_efforts":["high","medium","low","minimal"],"default_effort":"minimal"},"aliases":["google/gemini-3.1-flash-lite-preview-20260303"]},{"id":"bytedance-seed/seed-2.0-mini","canonical_slug":"bytedance-seed/seed-2.0-mini-20260224","hugging_face_id":"","name":"ByteDance Seed: Seed-2.0-Mini","created":1772131107,"description":"Seed-2.0-mini targets latency-sensitive, high-concurrency, and cost-sensitive scenarios, emphasizing fast response and flexible inference deployment. It delivers performance comparable to ByteDance-Seed-1.6, supports 256k context, four reasoning effort modes (minimal/low/medium/high), multimodal understanding,...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000001000","completion":"0.0000004000"},"top_provider":{"context_length":262144,"max_completion_tokens":131072,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/bytedance-seed/seed-2.0-mini-20260224/endpoints"},"reasoning":{"mandatory":false,"supported_efforts":["high","medium","low","minimal"],"default_effort":"medium"},"aliases":["bytedance-seed/seed-2.0-mini-20260224"]},{"id":"qwen/qwen3.5-35b-a3b","canonical_slug":"qwen/qwen3.5-35b-a3b-20260224","hugging_face_id":"Qwen/Qwen3.5-35B-A3B","name":"Qwen: Qwen3.5-35B-A3B","created":1772053822,"description":"The Qwen3.5 Series 35B-A3B is a native vision-language model designed with a hybrid architecture that integrates linear attention mechanisms and a sparse mixture-of-experts model, achieving higher inference efficiency. Its overall...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000001400","completion":"0.0000010000","input_cache_read":"0.0000000500"},"top_provider":{"context_length":262144,"max_completion_tokens":81920,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":20,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3.5-35b-a3b-20260224/endpoints"},"reasoning":{"mandatory":false},"aliases":["qwen/qwen3.5-35b-a3b-20260224"]},{"id":"qwen/qwen3.5-27b","canonical_slug":"qwen/qwen3.5-27b-20260224","hugging_face_id":"Qwen/Qwen3.5-27B","name":"Qwen: Qwen3.5-27B","created":1772053810,"description":"The Qwen3.5 27B native vision-language Dense model incorporates a linear attention mechanism, delivering fast response times while balancing inference speed and performance. Its overall capabilities are comparable to those of...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000001950","completion":"0.0000015600"},"top_provider":{"context_length":262144,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":0.6,"top_p":0.95,"top_k":20,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3.5-27b-20260224/endpoints"},"reasoning":{"mandatory":false},"aliases":["qwen/qwen3.5-27b-20260224"]},{"id":"qwen/qwen3.5-122b-a10b","canonical_slug":"qwen/qwen3.5-122b-a10b-20260224","hugging_face_id":"Qwen/Qwen3.5-122B-A10B","name":"Qwen: Qwen3.5-122B-A10B","created":1772053789,"description":"The Qwen3.5 122B-A10B native vision-language model is built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. In terms of...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000002600","completion":"0.0000020800"},"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":0.6,"top_p":0.95,"top_k":20,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3.5-122b-a10b-20260224/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":32.3,"coding_index":45.7,"agentic_index":20.7}},"reasoning":{"mandatory":false},"aliases":["qwen/qwen3.5-122b-a10b-20260224"]},{"id":"qwen/qwen3.5-flash-02-23","canonical_slug":"qwen/qwen3.5-flash-20260224","hugging_face_id":null,"name":"Qwen: Qwen3.5-Flash","created":1772053776,"description":"The Qwen3.5 native vision-language Flash models are built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. Compared to the...","context_length":1000000,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000000650","completion":"0.0000002600"},"top_provider":{"context_length":1000000,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","presence_penalty","reasoning","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3.5-flash-20260224/endpoints"},"reasoning":{"mandatory":false},"aliases":["qwen/qwen3.5-flash-20260224"]},{"id":"google/gemini-3.1-pro-preview-customtools","canonical_slug":"google/gemini-3.1-pro-preview-customtools-20260219","hugging_face_id":null,"name":"Google: Gemini 3.1 Pro Preview Custom Tools","created":1772045923,"description":"Gemini 3.1 Pro Preview Custom Tools is a variant of Gemini 3.1 Pro that improves tool selection behavior by preventing overuse of a general bash tool when more efficient third-party...","context_length":1048756,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","audio","image","video","file"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":"0.0000020000","completion":"0.0000120000","image":"0.0000020000","audio":"0.0000020000","web_search":"0.0140000000","internal_reasoning":"0.000012","input_cache_read":"0.0000002000","input_cache_write":"0.0000003750"},"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/google/gemini-3.1-pro-preview-customtools-20260219/endpoints"},"reasoning":{"mandatory":true,"supported_efforts":["high","medium","low"],"default_effort":"medium"},"aliases":["google/gemini-3.1-pro-preview-customtools-20260219"]},{"id":"openai/gpt-5.3-codex","canonical_slug":"openai/gpt-5.3-codex-20260224","hugging_face_id":"","name":"OpenAI: GPT-5.3-Codex","created":1771959164,"description":"GPT-5.3-Codex is OpenAI’s most advanced agentic coding model, combining the frontier software engineering performance of GPT-5.2-Codex with the broader reasoning and professional knowledge capabilities of GPT-5.2. It achieves state-of-the-art results...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000017500","completion":"0.0000140000","web_search":"0.0100000000","input_cache_read":"0.0000001750"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5.3-codex-20260224/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"androidnative","elo":1084,"win_rate":35.2,"rank":21},{"arena":"agents","category":"fullstack","elo":1051,"win_rate":36.4,"rank":28},{"arena":"agents","category":"godotgamedev","elo":1163,"win_rate":45.1,"rank":15},{"arena":"agents","category":"mobileapps","elo":1133,"win_rate":41.6,"rank":27},{"arena":"agents","category":"webapps","elo":1118,"win_rate":39.1,"rank":23},{"arena":"models","category":"3d","elo":1087,"win_rate":35.3,"rank":77},{"arena":"models","category":"asciiart","elo":1195,"win_rate":51.2,"rank":24},{"arena":"models","category":"codecategories","elo":1195,"win_rate":47.3,"rank":53},{"arena":"models","category":"dataviz","elo":1206,"win_rate":50.4,"rank":46},{"arena":"models","category":"gamedev","elo":1228,"win_rate":51.2,"rank":39},{"arena":"models","category":"svg","elo":1187,"win_rate":54,"rank":34},{"arena":"models","category":"uicomponent","elo":1193,"win_rate":47.3,"rank":51},{"arena":"models","category":"website","elo":1207,"win_rate":48.7,"rank":52}]},"reasoning":{"mandatory":false,"supported_efforts":["xhigh","high","medium","low","none"],"default_effort":"medium"},"aliases":["gpt-5.3-codex","openai/gpt-5.3-codex-20260224"]},{"id":"google/gemini-3.1-pro-preview","canonical_slug":"google/gemini-3.1-pro-preview-20260219","hugging_face_id":"","name":"Google: Gemini 3.1 Pro Preview","created":1771509627,"description":"Gemini 3.1 Pro Preview is Google’s frontier reasoning model, delivering enhanced software engineering performance, improved agentic reliability, and more efficient token usage across complex workflows. Building on the multimodal foundation...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["audio","file","image","text","video"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":"0.0000020000","completion":"0.0000120000","image":"0.0000020000","audio":"0.0000020000","web_search":"0.0140000000","internal_reasoning":"0.000012","input_cache_read":"0.0000002000","input_cache_write":"0.0000003750"},"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/google/gemini-3.1-pro-preview-20260219/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"agenticgamedev","elo":1149,"win_rate":43.7,"rank":8},{"arena":"agents","category":"agentichtmlslides","elo":1226,"win_rate":55.8,"rank":5},{"arena":"agents","category":"agenticslides","elo":1112,"win_rate":33.8,"rank":8},{"arena":"agents","category":"agenticslides(html)","elo":1219,"win_rate":54.4,"rank":5},{"arena":"agents","category":"agenticslides(python-pptx)","elo":1107,"win_rate":33.9,"rank":8},{"arena":"agents","category":"androidnative","elo":1061,"win_rate":39.3,"rank":25},{"arena":"agents","category":"fullstack","elo":1140,"win_rate":45.3,"rank":16},{"arena":"agents","category":"godotgamedev","elo":1220,"win_rate":53.6,"rank":6},{"arena":"agents","category":"htmlslides","elo":1211,"win_rate":53,"rank":5},{"arena":"agents","category":"mobileapps","elo":1174,"win_rate":46.5,"rank":22},{"arena":"agents","category":"pptxslides","elo":1110,"win_rate":34.1,"rank":8},{"arena":"agents","category":"python-pptxslides","elo":1109,"win_rate":31.9,"rank":9},{"arena":"agents","category":"webapps","elo":1196,"win_rate":48.3,"rank":13},{"arena":"models","category":"3d","elo":1305,"win_rate":61.7,"rank":17},{"arena":"models","category":"asciiart","elo":1311,"win_rate":63.4,"rank":4},{"arena":"models","category":"codecategories","elo":1291,"win_rate":64.2,"rank":18},{"arena":"models","category":"dataviz","elo":1270,"win_rate":60.6,"rank":22},{"arena":"models","category":"gamedev","elo":1266,"win_rate":54.9,"rank":26},{"arena":"models","category":"svg","elo":1346,"win_rate":70.3,"rank":2},{"arena":"models","category":"uicomponent","elo":1322,"win_rate":68.1,"rank":9},{"arena":"models","category":"website","elo":1295,"win_rate":64.3,"rank":15}],"artificial_analysis":{"intelligence_index":46.5,"coding_index":68.8,"agentic_index":21.4}},"reasoning":{"mandatory":true,"supported_efforts":["high","medium","low"],"default_effort":"medium"},"aliases":["google/gemini-3.1-pro-preview-20260219"]},{"id":"anthropic/claude-sonnet-4.6","canonical_slug":"anthropic/claude-4.6-sonnet-20260217","hugging_face_id":"","name":"Anthropic: Claude Sonnet 4.6","created":1771342990,"description":"Sonnet 4.6 is Anthropic's most capable Sonnet-class model yet, with frontier performance across coding, agents, and professional work. It excels at iterative development, complex codebase navigation, end-to-end project management with...","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.0000030000","completion":"0.0000150000","web_search":"0.0100000000","input_cache_read":"0.0000003000","input_cache_write":"0.0000037500","input_cache_write_1h":"0.000006"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p","verbosity"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/anthropic/claude-4.6-sonnet-20260217/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"agenticgamedev","elo":1200,"win_rate":50.9,"rank":6},{"arena":"agents","category":"androidnative","elo":1238,"win_rate":61.8,"rank":9},{"arena":"agents","category":"fullstack","elo":1274,"win_rate":63.1,"rank":5},{"arena":"agents","category":"godotgamedev","elo":1277,"win_rate":60.6,"rank":1},{"arena":"agents","category":"mobileapps","elo":1256,"win_rate":59.2,"rank":4},{"arena":"agents","category":"webapps","elo":1246,"win_rate":55.8,"rank":9},{"arena":"models","category":"3d","elo":1318,"win_rate":60.9,"rank":12},{"arena":"models","category":"asciiart","elo":1271,"win_rate":60.1,"rank":8},{"arena":"models","category":"codecategories","elo":1326,"win_rate":61.6,"rank":7},{"arena":"models","category":"dataviz","elo":1318,"win_rate":61,"rank":6},{"arena":"models","category":"gamedev","elo":1322,"win_rate":60.5,"rank":10},{"arena":"models","category":"svg","elo":1255,"win_rate":59.6,"rank":11},{"arena":"models","category":"uicomponent","elo":1322,"win_rate":61.7,"rank":8},{"arena":"models","category":"website","elo":1328,"win_rate":61.6,"rank":5}],"artificial_analysis":{"intelligence_index":47.2,"coding_index":63,"agentic_index":40.8}},"reasoning":{"mandatory":false,"supported_efforts":["max","high","medium","low"],"default_effort":"medium"},"aliases":["claude-sonnet-4-6","anthropic/claude-4.6-sonnet-20260217"]},{"id":"qwen/qwen3.5-plus-02-15","canonical_slug":"qwen/qwen3.5-plus-20260216","hugging_face_id":"","name":"Qwen: Qwen3.5 Plus 2026-02-15","created":1771229416,"description":"The Qwen3.5 native vision-language series Plus models are built on a hybrid architecture that integrates linear attention mechanisms with sparse mixture-of-experts models, achieving higher inference efficiency. In a variety of...","context_length":1000000,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000002600","completion":"0.0000015600"},"top_provider":{"context_length":1000000,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","logprobs","max_tokens","presence_penalty","reasoning","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3.5-plus-20260216/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1198,"win_rate":47.8,"rank":46},{"arena":"models","category":"asciiart","elo":1138,"win_rate":43.2,"rank":41},{"arena":"models","category":"codecategories","elo":1217,"win_rate":48.5,"rank":44},{"arena":"models","category":"dataviz","elo":1178,"win_rate":44.8,"rank":57},{"arena":"models","category":"gamedev","elo":1173,"win_rate":42.7,"rank":57},{"arena":"models","category":"svg","elo":1165,"win_rate":48.9,"rank":38},{"arena":"models","category":"uicomponent","elo":1233,"win_rate":52.2,"rank":36},{"arena":"models","category":"website","elo":1231,"win_rate":50.1,"rank":41}]},"reasoning":{"mandatory":false},"aliases":["qwen3.5","qwen/qwen3.5-plus-20260216"]},{"id":"qwen/qwen3.5-397b-a17b","canonical_slug":"qwen/qwen3.5-397b-a17b-20260216","hugging_face_id":"Qwen/Qwen3.5-397B-A17B","name":"Qwen: Qwen3.5 397B A17B","created":1771223018,"description":"The Qwen3.5 series 397B-A17B native vision-language model is built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. It delivers...","context_length":256000,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000003850","completion":"0.0000024500"},"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":0.6,"top_p":0.95,"top_k":20,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3.5-397b-a17b-20260216/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1239,"win_rate":56.7,"rank":33},{"arena":"models","category":"codecategories","elo":1230,"win_rate":52.6,"rank":35},{"arena":"models","category":"dataviz","elo":1219,"win_rate":53.2,"rank":38},{"arena":"models","category":"gamedev","elo":1207,"win_rate":50.1,"rank":44},{"arena":"models","category":"svg","elo":1199,"win_rate":56.1,"rank":29},{"arena":"models","category":"uicomponent","elo":1218,"win_rate":51.4,"rank":42},{"arena":"models","category":"website","elo":1234,"win_rate":52.6,"rank":37}],"artificial_analysis":{"intelligence_index":33.7,"coding_index":48.2,"agentic_index":19.8}},"reasoning":{"mandatory":false},"aliases":["qwen/qwen3.5-397b-a17b-20260216"]},{"id":"minimax/minimax-m2.5","canonical_slug":"minimax/minimax-m2.5-20260211","hugging_face_id":"MiniMaxAI/MiniMax-M2.5","name":"MiniMax: MiniMax M2.5","created":1770908502,"description":"MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1...","context_length":204800,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000001200","completion":"0.0000004800"},"top_provider":{"context_length":196608,"max_completion_tokens":196608,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","parallel_tool_calls","presence_penalty","reasoning","reasoning_effort","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/minimax/minimax-m2.5-20260211/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1248,"win_rate":57.6,"rank":31},{"arena":"models","category":"codecategories","elo":1257,"win_rate":56.8,"rank":30},{"arena":"models","category":"dataviz","elo":1216,"win_rate":51.2,"rank":40},{"arena":"models","category":"gamedev","elo":1243,"win_rate":55.5,"rank":34},{"arena":"models","category":"svg","elo":1208,"win_rate":54.5,"rank":25},{"arena":"models","category":"uicomponent","elo":1228,"win_rate":53.4,"rank":37},{"arena":"models","category":"website","elo":1266,"win_rate":57.5,"rank":26}]},"reasoning":{"mandatory":true},"aliases":["minimax-m2.5","minimax/minimax-m2.5-20260211"]},{"id":"z-ai/glm-5","canonical_slug":"z-ai/glm-5-20260211","hugging_face_id":"zai-org/GLM-5","name":"Z.ai: GLM 5","created":1770829182,"description":"GLM-5 is Z.ai’s flagship open-source foundation model engineered for complex systems design and long-horizon agent workflows. Built for expert developers, it delivers production-grade performance on large-scale programming tasks, rivaling leading...","context_length":202752,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000006000","completion":"0.0000019200","input_cache_read":"0.0000001200"},"top_provider":{"context_length":202752,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/z-ai/glm-5-20260211/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"androidnative","elo":1243,"win_rate":62.2,"rank":7},{"arena":"agents","category":"fullstack","elo":1190,"win_rate":52.9,"rank":13},{"arena":"agents","category":"godotgamedev","elo":1237,"win_rate":54.8,"rank":3},{"arena":"agents","category":"mobileapps","elo":1221,"win_rate":53.2,"rank":11},{"arena":"models","category":"3d","elo":1309,"win_rate":56.8,"rank":16},{"arena":"models","category":"asciiart","elo":1190,"win_rate":47.6,"rank":27},{"arena":"models","category":"codecategories","elo":1296,"win_rate":55.7,"rank":16},{"arena":"models","category":"dataviz","elo":1270,"win_rate":52.9,"rank":23},{"arena":"models","category":"gamedev","elo":1301,"win_rate":57.6,"rank":16},{"arena":"models","category":"svg","elo":1225,"win_rate":54.5,"rank":18},{"arena":"models","category":"uicomponent","elo":1288,"win_rate":54.1,"rank":19},{"arena":"models","category":"website","elo":1291,"win_rate":55,"rank":19}]},"reasoning":{"mandatory":false,"default_enabled":true},"aliases":["glm5","z-ai/glm-5-20260211"]},{"id":"qwen/qwen3-max-thinking","canonical_slug":"qwen/qwen3-max-thinking-20260123","hugging_face_id":null,"name":"Qwen: Qwen3 Max Thinking","created":1770671901,"description":"Qwen3-Max-Thinking is the flagship reasoning model in the Qwen3 series, designed for high-stakes cognitive tasks that require deep, multi-step reasoning. By significantly scaling model capacity and reinforcement learning compute, it...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":"0.0000007800","completion":"0.0000039000"},"top_provider":{"context_length":262144,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","logprobs","max_tokens","presence_penalty","reasoning","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-max-thinking-20260123/endpoints"},"reasoning":{"mandatory":false},"aliases":["qwen/qwen3-max-thinking-20260123"]},{"id":"anthropic/claude-opus-4.6","canonical_slug":"anthropic/claude-4.6-opus-20260205","hugging_face_id":"","name":"Anthropic: Claude Opus 4.6","created":1770219050,"description":"Opus 4.6 is Anthropic’s strongest model for coding and long-running professional tasks. It is built for agents that operate across entire workflows rather than single prompts, making it especially effective...","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.0000050000","completion":"0.0000250000","web_search":"0.0100000000","input_cache_read":"0.0000005000","input_cache_write":"0.0000062500","input_cache_write_1h":"0.00001"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p","verbosity"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/anthropic/claude-4.6-opus-20260205/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"androidnative","elo":1218,"win_rate":68.8,"rank":11},{"arena":"agents","category":"fullstack","elo":1288,"win_rate":68.6,"rank":4},{"arena":"agents","category":"mobileapps","elo":1260,"win_rate":60.5,"rank":3},{"arena":"agents","category":"webapps","elo":1267,"win_rate":60.9,"rank":4},{"arena":"models","category":"3d","elo":1354,"win_rate":65.7,"rank":4},{"arena":"models","category":"asciiart","elo":1308,"win_rate":63.9,"rank":6},{"arena":"models","category":"codecategories","elo":1343,"win_rate":63.9,"rank":3},{"arena":"models","category":"dataviz","elo":1320,"win_rate":61.4,"rank":5},{"arena":"models","category":"gamedev","elo":1344,"win_rate":63.3,"rank":4},{"arena":"models","category":"svg","elo":1287,"win_rate":62.6,"rank":4},{"arena":"models","category":"uicomponent","elo":1351,"win_rate":64.5,"rank":3},{"arena":"models","category":"website","elo":1339,"win_rate":63.4,"rank":3}]},"reasoning":{"mandatory":false,"supports_max_tokens":true,"supported_efforts":["max","high","medium","low"],"default_effort":"medium"},"aliases":["claude-opus-4-6","anthropic/claude-4.6-opus-20260205"]},{"id":"qwen/qwen3-coder-next","canonical_slug":"qwen/qwen3-coder-next-2025-02-03","hugging_face_id":"Qwen/Qwen3-Coder-Next","name":"Qwen: Qwen3 Coder Next","created":1770164101,"description":"Qwen3-Coder-Next is an open-weight causal language model optimized for coding agents and local development workflows. It uses a sparse MoE design with 80B total parameters and only 3B activated per...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":"0.0000001100","completion":"0.0000008000","input_cache_read":"0.0000000700"},"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-coder-next-2025-02-03/endpoints"},"aliases":["qwen/qwen3-coder-next-2025-02-03"]},{"id":"stepfun/step-3.5-flash","canonical_slug":"stepfun/step-3.5-flash","hugging_face_id":"stepfun-ai/Step-3.5-Flash","name":"StepFun: Step 3.5 Flash","created":1769728337,"description":"Step 3.5 Flash is StepFun's most capable open-source foundation model. Built on a sparse Mixture of Experts (MoE) architecture, it selectively activates only 11B of its 196B parameters per token....","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000900","completion":"0.0000003000","input_cache_read":"0.0000000200"},"top_provider":{"context_length":262144,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":"2026-06-29","links":{"details":"/api/v1/models/stepfun/step-3.5-flash/endpoints"},"reasoning":{"mandatory":true},"aliases":["stepfun/step-3.5-flash"]},{"id":"moonshotai/kimi-k2.5","canonical_slug":"moonshotai/kimi-k2.5-0127","hugging_face_id":"moonshotai/Kimi-K2.5","name":"MoonshotAI: Kimi K2.5","created":1769487076,"description":"Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed...","context_length":262144,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000003750","completion":"0.0000020250"},"top_provider":{"context_length":256000,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/moonshotai/kimi-k2.5-0127/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"androidnative","elo":1134,"win_rate":57.8,"rank":17},{"arena":"agents","category":"fullstack","elo":1182,"win_rate":54.2,"rank":14},{"arena":"agents","category":"godotgamedev","elo":1254,"win_rate":59.5,"rank":2},{"arena":"agents","category":"mobileapps","elo":1186,"win_rate":49.3,"rank":20},{"arena":"agents","category":"webapps","elo":1194,"win_rate":50.3,"rank":15},{"arena":"models","category":"3d","elo":1288,"win_rate":54.1,"rank":21},{"arena":"models","category":"asciiart","elo":1215,"win_rate":47.4,"rank":17},{"arena":"models","category":"codecategories","elo":1287,"win_rate":54.8,"rank":20},{"arena":"models","category":"dataviz","elo":1271,"win_rate":52.9,"rank":21},{"arena":"models","category":"gamedev","elo":1273,"win_rate":53.7,"rank":24},{"arena":"models","category":"svg","elo":1211,"win_rate":50.3,"rank":23},{"arena":"models","category":"uicomponent","elo":1291,"win_rate":54.3,"rank":17},{"arena":"models","category":"website","elo":1292,"win_rate":55.9,"rank":17}]},"reasoning":{"mandatory":false,"default_enabled":true},"aliases":["kimi-k2.5","moonshotai/kimi-k2.5-0127"]},{"id":"upstage/solar-pro-3","canonical_slug":"upstage/solar-pro-3","hugging_face_id":"","name":"Upstage: Solar Pro 3","created":1769481200,"description":"Solar Pro 3 is Upstage's powerful Mixture-of-Experts (MoE) language model. With 102B total parameters and 12B active parameters per forward pass, it delivers exceptional performance while maintaining computational efficiency. Optimized...","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000001500","completion":"0.0000006000","input_cache_read":"0.0000000150"},"top_provider":{"context_length":128000,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","structured_outputs","temperature","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/upstage/solar-pro-3/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":14.1,"coding_index":16.2,"agentic_index":2.7}},"reasoning":{"mandatory":false},"aliases":["upstage/solar-pro-3"]},{"id":"openai/gpt-audio","canonical_slug":"openai/gpt-audio","hugging_face_id":"","name":"OpenAI: GPT Audio","created":1768862569,"description":"The gpt-audio model is OpenAI's first generally available audio model. The new snapshot features an upgraded decoder for more natural sounding voices and maintains better voice consistency. Audio is priced...","context_length":128000,"architecture":{"modality":"text+audio->text+audio","input_modalities":["text","audio"],"output_modalities":["text","audio"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000025000","completion":"0.0000100000","audio":"0.0000320000"},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-audio/endpoints"},"aliases":["openai/gpt-audio"]},{"id":"openai/gpt-audio-mini","canonical_slug":"openai/gpt-audio-mini","hugging_face_id":"","name":"OpenAI: GPT Audio Mini","created":1768859419,"description":"A cost-efficient version of GPT Audio. The new snapshot features an upgraded decoder for more natural sounding voices and maintains better voice consistency. Input is priced at $0.60 per million...","context_length":128000,"architecture":{"modality":"text+audio->text+audio","input_modalities":["text","audio"],"output_modalities":["text","audio"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000006000","completion":"0.0000024000","audio":"0.0000006000"},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-audio-mini/endpoints"},"aliases":["openai/gpt-audio-mini"]},{"id":"z-ai/glm-4.7-flash","canonical_slug":"z-ai/glm-4.7-flash-20260119","hugging_face_id":"zai-org/GLM-4.7-Flash","name":"Z.ai: GLM 4.7 Flash","created":1768833913,"description":"As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning,...","context_length":202752,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000600","completion":"0.0000004000","input_cache_read":"0.0000000100"},"top_provider":{"context_length":202752,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/z-ai/glm-4.7-flash-20260119/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1199,"win_rate":51.2,"rank":45},{"arena":"models","category":"codecategories","elo":1228,"win_rate":53.1,"rank":36},{"arena":"models","category":"dataviz","elo":1167,"win_rate":45.3,"rank":62},{"arena":"models","category":"gamedev","elo":1202,"win_rate":49.7,"rank":48},{"arena":"models","category":"svg","elo":1096,"win_rate":44.2,"rank":51},{"arena":"models","category":"uicomponent","elo":1263,"win_rate":57.6,"rank":26},{"arena":"models","category":"website","elo":1238,"win_rate":54,"rank":35}]},"reasoning":{"mandatory":false,"default_enabled":true},"aliases":["z-ai/glm-4.7-flash-20260119"]},{"id":"openai/gpt-5.2-codex","canonical_slug":"openai/gpt-5.2-codex-20260114","hugging_face_id":"","name":"OpenAI: GPT-5.2-Codex","created":1768409315,"description":"GPT-5.2-Codex is an upgraded version of GPT-5.1-Codex optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks....","context_length":400000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000017500","completion":"0.0000140000","web_search":"0.0100000000","input_cache_read":"0.0000001750"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5.2-codex-20260114/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"androidnative","elo":1176,"win_rate":47.5,"rank":14},{"arena":"agents","category":"fullstack","elo":1060,"win_rate":37,"rank":27},{"arena":"agents","category":"godotgamedev","elo":1187,"win_rate":48,"rank":12},{"arena":"agents","category":"mobileapps","elo":1172,"win_rate":47.7,"rank":24},{"arena":"agents","category":"webapps","elo":1126,"win_rate":40.4,"rank":22}]},"reasoning":{"mandatory":true,"supported_efforts":["xhigh","high","medium","low"],"default_effort":"medium"},"aliases":["openai/gpt-5.2-codex-20260114"]},{"id":"bytedance-seed/seed-1.6-flash","canonical_slug":"bytedance-seed/seed-1.6-flash-20250625","hugging_face_id":"","name":"ByteDance Seed: Seed 1.6 Flash","created":1766505011,"description":"Seed 1.6 Flash is an ultra-fast multimodal deep thinking model by ByteDance Seed, supporting both text and visual understanding. It features a 256k context window and can generate outputs of...","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["image","text","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000750","completion":"0.0000003000"},"top_provider":{"context_length":262144,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/bytedance-seed/seed-1.6-flash-20250625/endpoints"},"reasoning":{"mandatory":false},"aliases":["bytedance-seed/seed-1.6-flash-20250625"]},{"id":"bytedance-seed/seed-1.6","canonical_slug":"bytedance-seed/seed-1.6-20250625","hugging_face_id":"","name":"ByteDance Seed: Seed 1.6","created":1766504997,"description":"Seed 1.6 is a general-purpose model released by the ByteDance Seed team. It incorporates multimodal capabilities and adaptive deep thinking with a 256K context window.","context_length":262144,"architecture":{"modality":"text+image+video->text","input_modalities":["image","text","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000002500","completion":"0.0000020000"},"top_provider":{"context_length":262144,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/bytedance-seed/seed-1.6-20250625/endpoints"},"reasoning":{"mandatory":false},"aliases":["bytedance-seed/seed-1.6-20250625"]},{"id":"minimax/minimax-m2.1","canonical_slug":"minimax/minimax-m2.1","hugging_face_id":"MiniMaxAI/MiniMax-M2.1","name":"MiniMax: MiniMax M2.1","created":1766454997,"description":"MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world...","context_length":204800,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000002900","completion":"0.0000009500","input_cache_read":"0.0000000300"},"top_provider":{"context_length":196608,"max_completion_tokens":196608,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":1,"top_p":0.9,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/minimax/minimax-m2.1/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1244,"win_rate":57.5,"rank":32},{"arena":"models","category":"codecategories","elo":1241,"win_rate":55.3,"rank":32},{"arena":"models","category":"dataviz","elo":1250,"win_rate":57,"rank":28},{"arena":"models","category":"gamedev","elo":1201,"win_rate":50.4,"rank":49},{"arena":"models","category":"svg","elo":1189,"win_rate":55.4,"rank":33},{"arena":"models","category":"uicomponent","elo":1278,"win_rate":60.9,"rank":23},{"arena":"models","category":"website","elo":1245,"win_rate":55.4,"rank":32}]},"reasoning":{"mandatory":true},"aliases":["minimax/minimax-m2.1"]},{"id":"z-ai/glm-4.7","canonical_slug":"z-ai/glm-4.7-20251222","hugging_face_id":"zai-org/GLM-4.7","name":"Z.ai: GLM 4.7","created":1766378014,"description":"GLM-4.7 is Z.ai’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while...","context_length":202752,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000004000","completion":"0.0000017500","input_cache_read":"0.0000000800"},"top_provider":{"context_length":202752,"max_completion_tokens":131072,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/z-ai/glm-4.7-20251222/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"androidnative","elo":1128,"win_rate":56,"rank":18},{"arena":"agents","category":"fullstack","elo":1118,"win_rate":44.9,"rank":20},{"arena":"agents","category":"godotgamedev","elo":1131,"win_rate":39.1,"rank":17},{"arena":"agents","category":"mobileapps","elo":1189,"win_rate":49.9,"rank":16},{"arena":"models","category":"3d","elo":1271,"win_rate":54.6,"rank":25},{"arena":"models","category":"asciiart","elo":1211,"win_rate":48.3,"rank":19},{"arena":"models","category":"codecategories","elo":1267,"win_rate":55.1,"rank":27},{"arena":"models","category":"dataviz","elo":1239,"win_rate":51.6,"rank":32},{"arena":"models","category":"gamedev","elo":1256,"win_rate":55.6,"rank":30},{"arena":"models","category":"svg","elo":1201,"win_rate":54.5,"rank":28},{"arena":"models","category":"uicomponent","elo":1251,"win_rate":50.9,"rank":29},{"arena":"models","category":"website","elo":1270,"win_rate":55.7,"rank":25}]},"reasoning":{"mandatory":false,"default_enabled":true},"aliases":["z-ai/glm-4.7-20251222"]},{"id":"google/gemini-3-flash-preview","canonical_slug":"google/gemini-3-flash-preview-20251217","hugging_face_id":"","name":"Google: Gemini 3 Flash Preview","created":1765987078,"description":"Gemini 3 Flash Preview is a high speed, high value thinking model designed for agentic workflows, multi turn chat, and coding assistance. It delivers near Pro level reasoning and tool...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","file","audio","video"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":"0.0000005000","completion":"0.0000030000","image":"0.0000005000","audio":"0.0000010000","web_search":"0.0140000000","internal_reasoning":"0.000003","input_cache_read":"0.0000000500","input_cache_write":"0.0000000833"},"top_provider":{"context_length":1048576,"max_completion_tokens":65535,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/google/gemini-3-flash-preview-20251217/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"agenticslides","elo":1073,"win_rate":39.3,"rank":9},{"arena":"agents","category":"agenticslides(python-pptx)","elo":1075,"win_rate":39.3,"rank":9},{"arena":"agents","category":"androidnative","elo":1064,"win_rate":47.9,"rank":24},{"arena":"agents","category":"fullstack","elo":1130,"win_rate":47.1,"rank":17},{"arena":"agents","category":"godotgamedev","elo":1218,"win_rate":52.3,"rank":8},{"arena":"agents","category":"mobileapps","elo":1187,"win_rate":49.8,"rank":17},{"arena":"agents","category":"webapps","elo":1185,"win_rate":49.5,"rank":17},{"arena":"models","category":"3d","elo":1262,"win_rate":62.7,"rank":28},{"arena":"models","category":"codecategories","elo":1240,"win_rate":57.6,"rank":33},{"arena":"models","category":"gamedev","elo":1233,"win_rate":58.3,"rank":38},{"arena":"models","category":"website","elo":1240,"win_rate":57,"rank":33}]},"reasoning":{"mandatory":false,"supported_efforts":["high","medium","low","minimal"],"default_effort":"medium"},"aliases":["gemini-3-flash","google/gemini-3-flash-preview-20251217"]},{"id":"nvidia/nemotron-3-nano-30b-a3b","canonical_slug":"nvidia/nemotron-3-nano-30b-a3b","hugging_face_id":"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16","name":"NVIDIA: Nemotron 3 Nano 30B A3B","created":1765731275,"description":"NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems. The model is fully...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000500","completion":"0.0000002000"},"top_provider":{"context_length":262144,"max_completion_tokens":228000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/nvidia/nemotron-3-nano-30b-a3b/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":14.2,"coding_index":14.4,"agentic_index":2}},"reasoning":{"mandatory":false},"aliases":["nvidia/nemotron-3-nano-30b-a3b"]},{"id":"openai/gpt-5.2-chat","canonical_slug":"openai/gpt-5.2-chat-20251211","hugging_face_id":"","name":"OpenAI: GPT-5.2 Chat","created":1765389783,"description":"GPT-5.2 Chat (AKA Instant) is the fast, lightweight member of the 5.2 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on...","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000017500","completion":"0.0000140000","web_search":"0.0100000000","input_cache_read":"0.0000001750"},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"per_request_limits":null,"supported_parameters":["max_completion_tokens","max_tokens","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":"2026-08-10","links":{"details":"/api/v1/models/openai/gpt-5.2-chat-20251211/endpoints"},"aliases":["openai/gpt-5.2-chat-20251211"]},{"id":"openai/gpt-5.2-pro","canonical_slug":"openai/gpt-5.2-pro-20251211","hugging_face_id":"","name":"OpenAI: GPT-5.2 Pro","created":1765389780,"description":"GPT-5.2 Pro is OpenAI’s most advanced model, offering major improvements in agentic coding and long context performance over GPT-5 Pro. It is optimized for complex tasks that require step-by-step reasoning,...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000210000","completion":"0.0001680000","web_search":"0.0100000000"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5.2-pro-20251211/endpoints"},"reasoning":{"mandatory":true,"supported_efforts":["xhigh","high","medium"],"default_effort":"medium"},"aliases":["openai/gpt-5.2-pro-20251211"]},{"id":"openai/gpt-5.2","canonical_slug":"openai/gpt-5.2-20251211","hugging_face_id":"","name":"OpenAI: GPT-5.2","created":1765389775,"description":"GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context perfomance compared to GPT-5.1. It uses adaptive reasoning to allocate computation dynamically, responding quickly...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000017500","completion":"0.0000140000","web_search":"0.0100000000","input_cache_read":"0.0000001750"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5.2-20251211/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"website","elo":1238,"win_rate":54.5,"rank":34},{"arena":"agents","category":"androidnative","elo":1074,"win_rate":49.2,"rank":22},{"arena":"agents","category":"fullstack","elo":1110,"win_rate":44,"rank":21},{"arena":"agents","category":"godotgamedev","elo":1183,"win_rate":48.1,"rank":13},{"arena":"agents","category":"mobileapps","elo":1173,"win_rate":47.4,"rank":23},{"arena":"agents","category":"webapps","elo":1157,"win_rate":45.7,"rank":19},{"arena":"models","category":"3d","elo":1157,"win_rate":42.2,"rank":60},{"arena":"models","category":"codecategories","elo":1220,"win_rate":50.4,"rank":42},{"arena":"models","category":"dataviz","elo":1245,"win_rate":56.1,"rank":31},{"arena":"models","category":"gamedev","elo":1262,"win_rate":56,"rank":29},{"arena":"models","category":"uicomponent","elo":1244,"win_rate":52,"rank":32},{"arena":"models","category":"asciiart","elo":1197,"win_rate":50.7,"rank":23},{"arena":"models","category":"svg","elo":1198,"win_rate":54.1,"rank":30}]},"reasoning":{"mandatory":false,"supported_efforts":["xhigh","high","medium","low","none"],"default_effort":"medium"},"aliases":["gpt-5.2","openai/gpt-5.2-20251211"]},{"id":"mistralai/devstral-2512","canonical_slug":"mistralai/devstral-2512","hugging_face_id":"mistralai/Devstral-2-123B-Instruct-2512","name":"Mistral: Devstral 2 2512","created":1765285419,"description":"Devstral 2 is a state-of-the-art open-source model by Mistral AI specializing in agentic coding. It is a 123B-parameter dense transformer model supporting a 256K context window. Devstral 2 supports exploring...","context_length":262144,"architecture":{"modality":"text+file->text","input_modalities":["text","file"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":"0.0000004000","completion":"0.0000020000","input_cache_read":"0.0000000400"},"top_provider":{"context_length":262144,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":0.3,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/mistralai/devstral-2512/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":19.2,"coding_index":31.3,"agentic_index":10.6}},"aliases":["mistralai/devstral-2512"]},{"id":"relace/relace-search","canonical_slug":"relace/relace-search-20251208","hugging_face_id":null,"name":"Relace: Relace Search","created":1765213560,"description":"The relace-search model uses 4-12 `view_file` and `grep` tools in parallel to explore a codebase and return relevant files to the user request. In contrast to RAG, relace-search performs agentic...","context_length":256000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000010000","completion":"0.0000030000"},"top_provider":{"context_length":256000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["max_tokens","response_format","seed","stop","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/relace/relace-search-20251208/endpoints"},"aliases":["relace/relace-search-20251208"]},{"id":"z-ai/glm-4.6v","canonical_slug":"z-ai/glm-4.6-20251208","hugging_face_id":"zai-org/GLM-4.6V","name":"Z.ai: GLM 4.6V","created":1765207462,"description":"GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts...","context_length":131072,"architecture":{"modality":"text+image+video->text","input_modalities":["image","text","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000003000","completion":"0.0000009000","input_cache_read":"0.0000000550"},"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","max_tokens","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":0.8,"top_p":0.6,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/z-ai/glm-4.6-20251208/endpoints"},"reasoning":{"mandatory":false},"aliases":["z-ai/glm-4.6-20251208"]},{"id":"openai/gpt-5.1-codex-max","canonical_slug":"openai/gpt-5.1-codex-max-20251204","hugging_face_id":"","name":"OpenAI: GPT-5.1-Codex-Max","created":1764878934,"description":"GPT-5.1-Codex-Max is OpenAI’s latest agentic coding model, designed for long-running, high-context software development tasks. It is based on an updated version of the 5.1 reasoning stack and trained on agentic...","context_length":400000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000012500","completion":"0.0000100000","web_search":"0.0100000000","input_cache_read":"0.0000001250"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5.1-codex-max-20251204/endpoints"},"reasoning":{"mandatory":true,"supported_efforts":["xhigh","high","medium","low"],"default_effort":"medium"},"aliases":["openai/gpt-5.1-codex-max-20251204"]},{"id":"amazon/nova-2-lite-v1","canonical_slug":"amazon/nova-2-lite-v1","hugging_face_id":"","name":"Amazon: Nova 2 Lite","created":1764696672,"description":"Nova 2 Lite is a fast, cost-effective reasoning model for everyday workloads that can process text, images, and videos to generate text. Nova 2 Lite demonstrates standout capabilities in processing...","context_length":1000000,"architecture":{"modality":"text+image+file+video->text","input_modalities":["text","image","video","file"],"output_modalities":["text"],"tokenizer":"Nova","instruct_type":null},"pricing":{"prompt":"0.0000003000","completion":"0.0000025000"},"top_provider":{"context_length":1000000,"max_completion_tokens":65535,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","stop","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/amazon/nova-2-lite-v1/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":18.2,"coding_index":23,"agentic_index":3.1}},"reasoning":{"mandatory":false},"aliases":["amazon/nova-2-lite-v1"]},{"id":"mistralai/ministral-14b-2512","canonical_slug":"mistralai/ministral-14b-2512","hugging_face_id":"mistralai/Ministral-3-14B-Instruct-2512","name":"Mistral: Ministral 3 14B 2512","created":1764681735,"description":"The largest model in the Ministral 3 family, Ministral 3 14B offers frontier capabilities and performance comparable to its larger Mistral Small 3.2 24B counterpart. A powerful and efficient language...","context_length":262144,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":"0.0000002000","completion":"0.0000002000","input_cache_read":"0.0000000200"},"top_provider":{"context_length":262144,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logprobs","max_tokens","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":0.3,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/mistralai/ministral-14b-2512/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1069,"win_rate":39.6,"rank":81},{"arena":"models","category":"codecategories","elo":1115,"win_rate":44,"rank":78},{"arena":"models","category":"gamedev","elo":1105,"win_rate":43.6,"rank":77},{"arena":"models","category":"website","elo":1125,"win_rate":44.8,"rank":79}],"artificial_analysis":{"intelligence_index":11.1,"coding_index":14.4,"agentic_index":2.2}},"aliases":["mistralai/ministral-14b-2512"]},{"id":"mistralai/ministral-8b-2512","canonical_slug":"mistralai/ministral-8b-2512","hugging_face_id":"mistralai/Ministral-3-8B-Instruct-2512","name":"Mistral: Ministral 3 8B 2512","created":1764681654,"description":"A balanced model in the Ministral 3 family, Ministral 3 8B is a powerful, efficient tiny language model with vision capabilities.","context_length":262144,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":"0.0000001500","completion":"0.0000001500","input_cache_read":"0.0000000150"},"top_provider":{"context_length":262144,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logprobs","max_tokens","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":0.3,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/mistralai/ministral-8b-2512/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1110,"win_rate":46.2,"rank":75},{"arena":"models","category":"codecategories","elo":1103,"win_rate":42.9,"rank":80},{"arena":"models","category":"gamedev","elo":1057,"win_rate":38.7,"rank":86},{"arena":"models","category":"website","elo":1109,"win_rate":42.9,"rank":81}],"artificial_analysis":{"intelligence_index":9,"coding_index":9.7,"agentic_index":1.2}},"aliases":["mistralai/ministral-8b-2512"]},{"id":"mistralai/ministral-3b-2512","canonical_slug":"mistralai/ministral-3b-2512","hugging_face_id":"mistralai/Ministral-3-3B-Instruct-2512","name":"Mistral: Ministral 3 3B 2512","created":1764681560,"description":"The smallest model in the Ministral 3 family, Ministral 3 3B is a powerful, efficient tiny language model with vision capabilities.","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":"0.0000001000","completion":"0.0000001000","input_cache_read":"0.0000000100"},"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logprobs","max_tokens","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":0.3,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/mistralai/ministral-3b-2512/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1041,"win_rate":35.9,"rank":86},{"arena":"models","category":"codecategories","elo":1061,"win_rate":37.3,"rank":88},{"arena":"models","category":"gamedev","elo":1016,"win_rate":33,"rank":96},{"arena":"models","category":"website","elo":1072,"win_rate":38.2,"rank":89}],"artificial_analysis":{"intelligence_index":6.8,"coding_index":4.8,"agentic_index":1.5}},"aliases":["mistralai/ministral-3b-2512"]},{"id":"mistralai/mistral-large-2512","canonical_slug":"mistralai/mistral-large-2512","hugging_face_id":"","name":"Mistral: Mistral Large 3 2512","created":1764624472,"description":"Mistral Large 3 2512 is Mistral’s most capable model to date, featuring a sparse mixture-of-experts architecture with 41B active parameters (675B total), and released under the Apache 2.0 license.","context_length":262144,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":"0.0000005000","completion":"0.0000015000","input_cache_read":"0.0000000500"},"top_provider":{"context_length":262144,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":0.0645,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/mistralai/mistral-large-2512/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1178,"win_rate":46.9,"rank":51},{"arena":"models","category":"asciiart","elo":1113,"win_rate":40.3,"rank":43},{"arena":"models","category":"codecategories","elo":1192,"win_rate":47.6,"rank":56},{"arena":"models","category":"dataviz","elo":1180,"win_rate":45.8,"rank":55},{"arena":"models","category":"gamedev","elo":1147,"win_rate":41.5,"rank":66},{"arena":"models","category":"svg","elo":1049,"win_rate":38,"rank":63},{"arena":"models","category":"uicomponent","elo":1157,"win_rate":43.1,"rank":59},{"arena":"models","category":"website","elo":1206,"win_rate":49.4,"rank":53}],"artificial_analysis":{"intelligence_index":15.9,"coding_index":20.1,"agentic_index":5.5}},"aliases":["mistralai/mistral-large-2512"]},{"id":"arcee-ai/trinity-mini","canonical_slug":"arcee-ai/trinity-mini-20251201","hugging_face_id":"arcee-ai/Trinity-Mini","name":"Arcee AI: Trinity Mini","created":1764601720,"description":"Trinity Mini is a 26B-parameter (3B active) sparse mixture-of-experts language model featuring 128 experts with 8 active per token. Engineered for efficient reasoning over long contexts (131k) with robust function...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000450","completion":"0.0000001500"},"top_provider":{"context_length":131072,"max_completion_tokens":131072,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","logprobs","max_completion_tokens","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":0.15,"top_p":0.75,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/arcee-ai/trinity-mini-20251201/endpoints"},"reasoning":{"mandatory":true},"aliases":["arcee-ai/trinity-mini-20251201"]},{"id":"deepseek/deepseek-v3.2","canonical_slug":"deepseek/deepseek-v3.2-20251201","hugging_face_id":"deepseek-ai/DeepSeek-V3.2","name":"DeepSeek: DeepSeek V3.2","created":1764594642,"description":"DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":null},"pricing":{"prompt":"0.0000002288","completion":"0.0000003432","input_cache_read":"0.0000000229"},"top_provider":{"context_length":128000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/deepseek/deepseek-v3.2-20251201/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1211,"win_rate":50.2,"rank":40},{"arena":"models","category":"asciiart","elo":1124,"win_rate":41.2,"rank":42},{"arena":"models","category":"codecategories","elo":1214,"win_rate":49.9,"rank":48},{"arena":"models","category":"dataviz","elo":1200,"win_rate":48.6,"rank":48},{"arena":"models","category":"gamedev","elo":1198,"win_rate":46.9,"rank":51},{"arena":"models","category":"svg","elo":1090,"win_rate":41.1,"rank":52},{"arena":"models","category":"uicomponent","elo":1205,"win_rate":47.7,"rank":47},{"arena":"models","category":"website","elo":1218,"win_rate":50.8,"rank":46}]},"reasoning":{"mandatory":false,"default_enabled":false},"aliases":["deepseek-v3.2","deepseek/deepseek-v3.2-20251201"]},{"id":"anthropic/claude-opus-4.5","canonical_slug":"anthropic/claude-4.5-opus-20251124","hugging_face_id":"","name":"Anthropic: Claude Opus 4.5","created":1764010580,"description":"Claude Opus 4.5 is Anthropic’s frontier reasoning model optimized for complex software engineering, agentic workflows, and long-horizon computer use. It offers strong multimodal capabilities, competitive performance across real-world coding and...","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.0000050000","completion":"0.0000250000","web_search":"0.0100000000","input_cache_read":"0.0000005000","input_cache_write":"0.0000062500","input_cache_write_1h":"0.00001"},"top_provider":{"context_length":200000,"max_completion_tokens":64000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_k","verbosity"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/anthropic/claude-4.5-opus-20251124/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1291,"win_rate":60.2,"rank":19},{"arena":"models","category":"asciiart","elo":1234,"win_rate":55.4,"rank":12},{"arena":"models","category":"codecategories","elo":1293,"win_rate":60.2,"rank":17},{"arena":"models","category":"dataviz","elo":1286,"win_rate":59.3,"rank":14},{"arena":"models","category":"gamedev","elo":1296,"win_rate":59.6,"rank":18},{"arena":"models","category":"svg","elo":1236,"win_rate":58.6,"rank":15},{"arena":"models","category":"uicomponent","elo":1298,"win_rate":59.7,"rank":15},{"arena":"models","category":"website","elo":1291,"win_rate":60.2,"rank":18},{"arena":"agents","category":"androidnative","elo":1192,"win_rate":65.5,"rank":13},{"arena":"agents","category":"fullstack","elo":1224,"win_rate":59.9,"rank":9},{"arena":"agents","category":"mobileapps","elo":1240,"win_rate":57.5,"rank":6},{"arena":"agents","category":"webapps","elo":1221,"win_rate":54.2,"rank":11}]},"reasoning":{"mandatory":false},"aliases":["anthropic/claude-4.5-opus-20251124"]},{"id":"thenlper/gte-base","canonical_slug":"thenlper/gte-base-20251117","hugging_face_id":"thenlper/gte-base","name":"Thenlper: GTE-Base","created":1763433820,"description":"The gte-base embedding model encodes English sentences and paragraphs into a 768-dimensional dense vector space, delivering efficient and effective semantic embeddings optimized for textual similarity, semantic search, and clustering applications.","context_length":8192,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000050","completion":"0.0000000000"},"top_provider":{"context_length":512,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","temperature","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/thenlper/gte-base-20251117/endpoints"},"aliases":["thenlper/gte-base-20251117"]},{"id":"thenlper/gte-large","canonical_slug":"thenlper/gte-large-20251117","hugging_face_id":"thenlper/gte-large","name":"Thenlper: GTE-Large","created":1763433655,"description":"The gte-large embedding model converts English sentences, paragraphs and moderate-length documents into a 1024-dimensional dense vector space, delivering high-quality semantic embeddings optimized for information retrieval, semantic textual similarity, reranking and...","context_length":8192,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000100","completion":"0.0000000000"},"top_provider":{"context_length":512,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","temperature","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/thenlper/gte-large-20251117/endpoints"},"aliases":["thenlper/gte-large-20251117"]},{"id":"intfloat/e5-large-v2","canonical_slug":"intfloat/e5-large-v2-20251117","hugging_face_id":"intfloat/e5-large-v2","name":"Intfloat: E5-Large-v2","created":1763433432,"description":"The e5-large-v2 embedding model maps English sentences, paragraphs, and documents into a 1024-dimensional dense vector space, delivering high-accuracy semantic embeddings optimized for retrieval, semantic search, reranking, and similarity-scoring tasks.","context_length":8192,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000100","completion":"0.0000000000"},"top_provider":{"context_length":512,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","temperature","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/intfloat/e5-large-v2-20251117/endpoints"},"aliases":["intfloat/e5-large-v2-20251117"]},{"id":"intfloat/e5-base-v2","canonical_slug":"intfloat/e5-base-v2-20251117","hugging_face_id":"intfloat/e5-base-v2","name":"Intfloat: E5-Base-v2","created":1763433192,"description":"The e5-base-v2 embedding model encodes English sentences and paragraphs into a 768-dimensional dense vector space, producing efficient and high-quality semantic embeddings optimized for tasks such as semantic search, similarity scoring,...","context_length":8192,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000050","completion":"0.0000000000"},"top_provider":{"context_length":512,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","temperature","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/intfloat/e5-base-v2-20251117/endpoints"},"aliases":["intfloat/e5-base-v2-20251117"]},{"id":"intfloat/multilingual-e5-large","canonical_slug":"intfloat/multilingual-e5-large-20251117","hugging_face_id":"intfloat/multilingual-e5-large","name":"Intfloat: Multilingual-E5-Large","created":1763433047,"description":"The multilingual-e5-large embedding model encodes sentences, paragraphs, and documents across over 90 languages into a 1024-dimensional dense vector space, delivering robust semantic embeddings optimized for multilingual retrieval, cross-language similarity, and...","context_length":8192,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000100","completion":"0.0000000000"},"top_provider":{"context_length":512,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","temperature","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/intfloat/multilingual-e5-large-20251117/endpoints"},"aliases":["intfloat/multilingual-e5-large-20251117"]},{"id":"sentence-transformers/paraphrase-minilm-l6-v2","canonical_slug":"sentence-transformers/paraphrase-minilm-l6-v2-20251117","hugging_face_id":"sentence-transformers/paraphrase-MiniLM-L6-v2","name":"Sentence Transformers: paraphrase-MiniLM-L6-v2","created":1763432454,"description":"The paraphrase-MiniLM-L6-v2 embedding model converts sentences and short paragraphs into a 384-dimensional dense vector space, producing high-quality semantic embeddings optimized for paraphrase detection, semantic similarity scoring, clustering, and lightweight retrieval...","context_length":8192,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000050","completion":"0.0000000000"},"top_provider":{"context_length":512,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","temperature","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/sentence-transformers/paraphrase-minilm-l6-v2-20251117/endpoints"},"aliases":["sentence-transformers/paraphrase-minilm-l6-v2-20251117"]},{"id":"sentence-transformers/all-minilm-l12-v2","canonical_slug":"sentence-transformers/all-minilm-l12-v2-20251117","hugging_face_id":"sentence-transformers/all-MiniLM-L12-v2","name":"Sentence Transformers: all-MiniLM-L12-v2","created":1763432155,"description":"The all-MiniLM-L12-v2 embedding model maps sentences and short paragraphs into a 384-dimensional dense vector space, producing efficient and high-quality semantic embeddings optimized for tasks such as semantic search, clustering, and...","context_length":8192,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000050","completion":"0.0000000000"},"top_provider":{"context_length":512,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","temperature","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/sentence-transformers/all-minilm-l12-v2-20251117/endpoints"},"aliases":["sentence-transformers/all-minilm-l12-v2-20251117"]},{"id":"baai/bge-base-en-v1.5","canonical_slug":"baai/bge-base-en-v1.5-20251117","hugging_face_id":"BAAI/bge-base-en-v1.5","name":"BAAI: bge-base-en-v1.5","created":1763431837,"description":"The bge-base-en-v1.5 embedding model converts English sentences and paragraphs into 768-dimensional dense vectors, delivering efficient, high-quality semantic embeddings optimized for retrieval, semantic search, and document-matching workflows. This version (v1.5) features...","context_length":8192,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000050","completion":"0.0000000000"},"top_provider":{"context_length":512,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","temperature","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/baai/bge-base-en-v1.5-20251117/endpoints"},"aliases":["baai/bge-base-en-v1.5-20251117"]},{"id":"sentence-transformers/multi-qa-mpnet-base-dot-v1","canonical_slug":"sentence-transformers/multi-qa-mpnet-base-dot-v1-20251117","hugging_face_id":"sentence-transformers/multi-qa-mpnet-base-dot-v1","name":"Sentence Transformers: multi-qa-mpnet-base-dot-v1","created":1763431339,"description":"The multi-qa-mpnet-base-dot-v1 embedding model transforms sentences and short paragraphs into a 768-dimensional dense vector space, generating high-quality semantic embeddings optimized for question-and-answer retrieval, semantic search, and similarity-scoring across diverse content.","context_length":8192,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000050","completion":"0.0000000000"},"top_provider":{"context_length":512,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","temperature","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/sentence-transformers/multi-qa-mpnet-base-dot-v1-20251117/endpoints"},"aliases":["sentence-transformers/multi-qa-mpnet-base-dot-v1-20251117"]},{"id":"baai/bge-large-en-v1.5","canonical_slug":"baai/bge-large-en-v1.5-20251117","hugging_face_id":"BAAI/bge-large-en-v1.5","name":"BAAI: bge-large-en-v1.5","created":1763431087,"description":"The bge-large-en-v1.5 embedding model maps English sentences, paragraphs, and documents into a 1024-dimensional dense vector space, delivering high-fidelity semantic embeddings optimized for semantic search, document retrieval, and downstream NLP tasks...","context_length":8192,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000100","completion":"0.0000000000"},"top_provider":{"context_length":512,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","temperature","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/baai/bge-large-en-v1.5-20251117/endpoints"},"aliases":["baai/bge-large-en-v1.5-20251117"]},{"id":"baai/bge-m3","canonical_slug":"baai/bge-m3-20251117","hugging_face_id":"BAAI/bge-m3","name":"BAAI: bge-m3","created":1763424372,"description":"The bge-m3 embedding model encodes sentences, paragraphs, and long documents into a 1024-dimensional dense vector space, delivering high-quality semantic embeddings optimized for multilingual retrieval, semantic search, and large-context applications.","context_length":8192,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000100","completion":"0.0000000000"},"top_provider":{"context_length":8192,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","temperature","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/baai/bge-m3-20251117/endpoints"},"aliases":["baai/bge-m3-20251117"]},{"id":"sentence-transformers/all-mpnet-base-v2","canonical_slug":"sentence-transformers/all-mpnet-base-v2-20251117","hugging_face_id":"sentence-transformers/all-mpnet-base-v2","name":"Sentence Transformers: all-mpnet-base-v2","created":1763421830,"description":"The all-mpnet-base-v2 embedding model encodes sentences and short paragraphs into a 768-dimensional dense vector space, providing high-fidelity semantic embeddings well suited for tasks like information retrieval, clustering, similarity scoring, and...","context_length":8192,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000050","completion":"0.0000000000"},"top_provider":{"context_length":512,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","temperature","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/sentence-transformers/all-mpnet-base-v2-20251117/endpoints"},"aliases":["sentence-transformers/all-mpnet-base-v2-20251117"]},{"id":"sentence-transformers/all-minilm-l6-v2","canonical_slug":"sentence-transformers/all-minilm-l6-v2-20251117","hugging_face_id":"sentence-transformers/all-MiniLM-L6-v2","name":"Sentence Transformers: all-MiniLM-L6-v2","created":1763421176,"description":"The all-MiniLM-L6-v2 embedding model maps sentences and short paragraphs into a 384-dimensional dense vector space, enabling high-quality semantic representations that are ideal for downstream tasks such as information retrieval, clustering,...","context_length":8192,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000050","completion":"0.0000000000"},"top_provider":{"context_length":512,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","temperature","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/sentence-transformers/all-minilm-l6-v2-20251117/endpoints"},"aliases":["sentence-transformers/all-minilm-l6-v2-20251117"]},{"id":"openai/gpt-5.1","canonical_slug":"openai/gpt-5.1-20251113","hugging_face_id":"","name":"OpenAI: GPT-5.1","created":1763060305,"description":"GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5. It uses adaptive reasoning...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000012500","completion":"0.0000100000","web_search":"0.0100000000","input_cache_read":"0.0000001300"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5.1-20251113/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"mobileapps","elo":1126,"win_rate":43.8,"rank":29},{"arena":"models","category":"3d","elo":1140,"win_rate":43.9,"rank":66},{"arena":"models","category":"asciiart","elo":1162,"win_rate":48.6,"rank":35},{"arena":"models","category":"codecategories","elo":1221,"win_rate":53.1,"rank":41},{"arena":"models","category":"dataviz","elo":1248,"win_rate":58,"rank":30},{"arena":"models","category":"gamedev","elo":1246,"win_rate":55.9,"rank":32},{"arena":"models","category":"svg","elo":1207,"win_rate":57.4,"rank":26},{"arena":"models","category":"uicomponent","elo":1222,"win_rate":53,"rank":40},{"arena":"models","category":"website","elo":1231,"win_rate":54.1,"rank":40}]},"reasoning":{"mandatory":false,"default_enabled":true,"supported_efforts":["high","medium","low","none"],"default_effort":"none"},"aliases":["openai/gpt-5.1-20251113"]},{"id":"openai/gpt-5.1-chat","canonical_slug":"openai/gpt-5.1-chat-20251113","hugging_face_id":"","name":"OpenAI: GPT-5.1 Chat","created":1763060302,"description":"GPT-5.1 Chat (AKA Instant is the fast, lightweight member of the 5.1 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on...","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000012500","completion":"0.0000100000","web_search":"0.0100000000","input_cache_read":"0.0000001300"},"top_provider":{"context_length":128000,"max_completion_tokens":32000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["max_completion_tokens","max_tokens","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5.1-chat-20251113/endpoints"},"aliases":["openai/gpt-5.1-chat-20251113"]},{"id":"openai/gpt-5.1-codex","canonical_slug":"openai/gpt-5.1-codex-20251113","hugging_face_id":"","name":"OpenAI: GPT-5.1-Codex","created":1763060298,"description":"GPT-5.1-Codex is a specialized version of GPT-5.1 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks....","context_length":400000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000012500","completion":"0.0000100000","web_search":"0.0100000000","input_cache_read":"0.0000001300"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5.1-codex-20251113/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"fullstack","elo":1092,"win_rate":44.5,"rank":23},{"arena":"agents","category":"mobileapps","elo":1202,"win_rate":54,"rank":14},{"arena":"agents","category":"webapps","elo":1087,"win_rate":44.1,"rank":24},{"arena":"models","category":"codecategories","elo":1201,"win_rate":55.2,"rank":52},{"arena":"models","category":"dataviz","elo":1218,"win_rate":50.7,"rank":39},{"arena":"models","category":"gamedev","elo":1206,"win_rate":52.3,"rank":46},{"arena":"models","category":"website","elo":1205,"win_rate":56,"rank":54}]},"reasoning":{"mandatory":true,"supported_efforts":["high","medium","low"],"default_effort":"medium"},"aliases":["openai/gpt-5.1-codex-20251113"]},{"id":"openai/gpt-5.1-codex-mini","canonical_slug":"openai/gpt-5.1-codex-mini-20251113","hugging_face_id":"","name":"OpenAI: GPT-5.1-Codex-Mini","created":1763057820,"description":"GPT-5.1-Codex-Mini is a smaller and faster version of GPT-5.1-Codex","context_length":400000,"architecture":{"modality":"text+image->text","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000002500","completion":"0.0000020000","web_search":"0.0100000000","input_cache_read":"0.0000000250"},"top_provider":{"context_length":400000,"max_completion_tokens":100000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5.1-codex-mini-20251113/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1067,"win_rate":32.9,"rank":82},{"arena":"models","category":"asciiart","elo":1148,"win_rate":43,"rank":39},{"arena":"models","category":"codecategories","elo":1144,"win_rate":41.5,"rank":72},{"arena":"models","category":"dataviz","elo":1140,"win_rate":40.6,"rank":70},{"arena":"models","category":"gamedev","elo":1160,"win_rate":43.5,"rank":63},{"arena":"models","category":"svg","elo":1035,"win_rate":35.3,"rank":65},{"arena":"models","category":"uicomponent","elo":1135,"win_rate":40.8,"rank":65},{"arena":"models","category":"website","elo":1154,"win_rate":42.8,"rank":71}]},"reasoning":{"mandatory":false,"supported_efforts":["high","medium","low"],"default_effort":"medium"},"aliases":["openai/gpt-5.1-codex-mini-20251113"]},{"id":"moonshotai/kimi-k2-thinking","canonical_slug":"moonshotai/kimi-k2-thinking-20251106","hugging_face_id":"moonshotai/Kimi-K2-Thinking","name":"MoonshotAI: Kimi K2 Thinking","created":1762440622,"description":"Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000006000","completion":"0.0000025000","input_cache_read":"0.0000006000"},"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/moonshotai/kimi-k2-thinking-20251106/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"website","elo":1157,"win_rate":48.8,"rank":70}],"artificial_analysis":{"intelligence_index":null,"coding_index":21,"agentic_index":null}},"reasoning":{"mandatory":true},"aliases":["moonshotai/kimi-k2-thinking-20251106"]},{"id":"amazon/nova-premier-v1","canonical_slug":"amazon/nova-premier-v1","hugging_face_id":"","name":"Amazon: Nova Premier 1.0","created":1761950332,"description":"Amazon Nova Premier is the most capable of Amazon’s multimodal models for complex reasoning tasks and for use as the best teacher for distilling custom models.","context_length":1000000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Nova","instruct_type":null},"pricing":{"prompt":"0.0000025000","completion":"0.0000125000","input_cache_read":"0.0000006250"},"top_provider":{"context_length":1000000,"max_completion_tokens":32000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["max_tokens","stop","temperature","tools","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/amazon/nova-premier-v1/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"website","elo":879,"win_rate":26.2,"rank":108}]},"aliases":["amazon/nova-premier-v1"]},{"id":"mistralai/mistral-embed-2312","canonical_slug":"mistralai/mistral-embed-2312","hugging_face_id":null,"name":"Mistral: Mistral Embed 2312","created":1761944622,"description":"Mistral Embed is a specialized embedding model for text data, optimized for semantic search and RAG applications. Developed by Mistral AI in late 2023, it produces 1024-dimensional vectors that effectively...","context_length":8192,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":"0.0000001000","completion":"0.0000000000"},"top_provider":{"context_length":8192,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/mistralai/mistral-embed-2312/endpoints"},"aliases":["mistralai/mistral-embed-2312"]},{"id":"google/gemini-embedding-001","canonical_slug":"google/gemini-embedding-001","hugging_face_id":"","name":"Google: Gemini Embedding 001","created":1761943410,"description":"gemini-embedding-001 provides a unified cutting edge experience across domains, including science, legal, finance, and coding. This embedding model has consistently held a top spot on the Massive Text Embedding Benchmark...","context_length":20000,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":"0.0000001500","completion":"0.0000000000"},"top_provider":{"context_length":20000,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["max_tokens","response_format","seed","temperature","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/google/gemini-embedding-001/endpoints"},"aliases":["google/gemini-embedding-001"]},{"id":"openai/text-embedding-ada-002","canonical_slug":"openai/text-embedding-ada-002","hugging_face_id":"","name":"OpenAI: Text Embedding Ada 002","created":1761865798,"description":"text-embedding-ada-002 is OpenAI's legacy text embedding model.","context_length":8192,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000001000","completion":"0.0000000000"},"top_provider":{"context_length":8192,"max_completion_tokens":null,"is_moderated":true},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/text-embedding-ada-002/endpoints"},"aliases":["openai/text-embedding-ada-002"]},{"id":"mistralai/codestral-embed-2505","canonical_slug":"mistralai/codestral-embed-2505","hugging_face_id":"","name":"Mistral: Codestral Embed 2505","created":1761864460,"description":"Mistral Codestral Embed is specially designed for code, perfect for embedding code databases, repositories, and powering coding assistants with state-of-the-art retrieval.","context_length":8192,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":"0.0000001500","completion":"0.0000000000"},"top_provider":{"context_length":8192,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/mistralai/codestral-embed-2505/endpoints"},"aliases":["mistralai/codestral-embed-2505"]},{"id":"openai/text-embedding-3-large","canonical_slug":"openai/text-embedding-3-large","hugging_face_id":"","name":"OpenAI: Text Embedding 3 Large","created":1761862866,"description":"text-embedding-3-large is OpenAI's most capable embedding model for both english and non-english tasks. Embeddings are a numerical representation of text that can be used to measure the relatedness between two...","context_length":8192,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000001300","completion":"0.0000000000"},"top_provider":{"context_length":8192,"max_completion_tokens":null,"is_moderated":true},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_completion_tokens","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/text-embedding-3-large/endpoints"},"aliases":["openai/text-embedding-3-large"]},{"id":"openai/text-embedding-3-small","canonical_slug":"openai/text-embedding-3-small","hugging_face_id":"","name":"OpenAI: Text Embedding 3 Small","created":1761857455,"description":"text-embedding-3-small is OpenAI's improved, more performant version of the ada embedding model. Embeddings are a numerical representation of text that can be used to measure the relatedness between two pieces...","context_length":8192,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000200","completion":"0.0000000000"},"top_provider":{"context_length":8192,"max_completion_tokens":null,"is_moderated":true},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_completion_tokens","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/text-embedding-3-small/endpoints"},"aliases":["openai/text-embedding-3-small"]},{"id":"mistralai/voxtral-small-24b-2507","canonical_slug":"mistralai/voxtral-small-24b-2507","hugging_face_id":"mistralai/Voxtral-Small-24B-2507","name":"Mistral: Voxtral Small 24B 2507","created":1761835144,"description":"Voxtral Small is an enhancement of Mistral Small 3, incorporating state-of-the-art audio input capabilities while retaining best-in-class text performance. It excels at speech transcription, translation and audio understanding. Input audio...","context_length":32000,"architecture":{"modality":"text+file+audio->text","input_modalities":["text","audio","file"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":"0.0000001000","completion":"0.0000003000","audio":"0.0001000000","input_cache_read":"0.0000000100"},"top_provider":{"context_length":32000,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":0.2,"top_p":0.95,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/mistralai/voxtral-small-24b-2507/endpoints"},"aliases":["mistralai/voxtral-small-24b-2507"]},{"id":"openai/gpt-oss-safeguard-20b","canonical_slug":"openai/gpt-oss-safeguard-20b","hugging_face_id":"openai/gpt-oss-safeguard-20b","name":"OpenAI: gpt-oss-safeguard-20b","created":1761752836,"description":"gpt-oss-safeguard-20b is a safety reasoning model from OpenAI built upon gpt-oss-20b. This open-weight, 21B-parameter Mixture-of-Experts (MoE) model offers lower latency for safety tasks like content classification, LLM filtering, and trust...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000000750","completion":"0.0000003000","input_cache_read":"0.0000000375"},"top_provider":{"context_length":131072,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-oss-safeguard-20b/endpoints"},"reasoning":{"mandatory":true},"aliases":["openai/gpt-oss-safeguard-20b"]},{"id":"qwen/qwen3-embedding-8b","canonical_slug":"qwen/qwen3-embedding-8b","hugging_face_id":"Qwen/Qwen3-Embedding-8B","name":"Qwen: Qwen3 Embedding 8B","created":1761680622,"description":"The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text...","context_length":32000,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000100","completion":"0.0000000000"},"top_provider":{"context_length":32000,"max_completion_tokens":32000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","temperature","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-embedding-8b/endpoints"},"aliases":["qwen/qwen3-embedding-8b"]},{"id":"qwen/qwen3-embedding-4b","canonical_slug":"qwen/qwen3-embedding-4b","hugging_face_id":"Qwen/Qwen3-Embedding-4B","name":"Qwen: Qwen3 Embedding 4B","created":1761662922,"description":"The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text...","context_length":32768,"architecture":{"modality":"text->embeddings","input_modalities":["text"],"output_modalities":["embeddings"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000200","completion":"0.0000000000"},"top_provider":{"context_length":32768,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","temperature","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-embedding-4b/endpoints"},"aliases":["qwen/qwen3-embedding-4b"]},{"id":"minimax/minimax-m2","canonical_slug":"minimax/minimax-m2","hugging_face_id":"MiniMaxAI/MiniMax-M2","name":"MiniMax: MiniMax M2","created":1761252093,"description":"MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning,...","context_length":204800,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000002550","completion":"0.0000010000","input_cache_read":"0.0000000300"},"top_provider":{"context_length":196608,"max_completion_tokens":196608,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/minimax/minimax-m2/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1176,"win_rate":48.3,"rank":52},{"arena":"models","category":"codecategories","elo":1185,"win_rate":48.1,"rank":59},{"arena":"models","category":"dataviz","elo":1184,"win_rate":50,"rank":54},{"arena":"models","category":"gamedev","elo":1185,"win_rate":48.1,"rank":55},{"arena":"models","category":"svg","elo":1158,"win_rate":55.3,"rank":39},{"arena":"models","category":"uicomponent","elo":1192,"win_rate":49.2,"rank":52},{"arena":"models","category":"website","elo":1186,"win_rate":48,"rank":59}]},"reasoning":{"mandatory":true},"aliases":["minimax/minimax-m2"]},{"id":"qwen/qwen3-vl-32b-instruct","canonical_slug":"qwen/qwen3-vl-32b-instruct","hugging_face_id":"Qwen/Qwen3-VL-32B-Instruct","name":"Qwen: Qwen3 VL 32B Instruct","created":1761231332,"description":"Qwen3-VL-32B-Instruct is a large-scale multimodal vision-language model designed for high-precision understanding and reasoning across text, images, and video. With 32 billion parameters, it combines deep visual perception with advanced text...","context_length":262144,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":"0.0000001040","completion":"0.0000004160"},"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["logprobs","max_tokens","presence_penalty","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":0.7,"top_p":0.8,"top_k":20,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":1},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-vl-32b-instruct/endpoints"},"aliases":["qwen/qwen3-vl-32b-instruct"]},{"id":"anthropic/claude-haiku-4.5","canonical_slug":"anthropic/claude-4.5-haiku-20251001","hugging_face_id":"","name":"Anthropic: Claude Haiku 4.5","created":1760547638,"description":"Claude Haiku 4.5 is Anthropic’s fastest and most efficient model, delivering near-frontier intelligence at a fraction of the cost and latency of larger Claude models. Matching Claude Sonnet 4’s performance...","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.0000010000","completion":"0.0000050000","web_search":"0.0100000000","input_cache_read":"0.0000001000","input_cache_write":"0.0000012500","input_cache_write_1h":"0.000002"},"top_provider":{"context_length":200000,"max_completion_tokens":64000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/anthropic/claude-4.5-haiku-20251001/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1152,"win_rate":41.2,"rank":62},{"arena":"models","category":"asciiart","elo":1184,"win_rate":49.3,"rank":30},{"arena":"models","category":"codecategories","elo":1165,"win_rate":44.8,"rank":63},{"arena":"models","category":"dataviz","elo":1168,"win_rate":45.6,"rank":61},{"arena":"models","category":"gamedev","elo":1164,"win_rate":44.6,"rank":60},{"arena":"models","category":"svg","elo":1083,"win_rate":39.1,"rank":55},{"arena":"models","category":"uicomponent","elo":1155,"win_rate":42.7,"rank":60},{"arena":"models","category":"website","elo":1166,"win_rate":45,"rank":63}],"artificial_analysis":{"intelligence_index":29.6,"coding_index":43.9,"agentic_index":16.4}},"reasoning":{"mandatory":false},"aliases":["claude-haiku-4-5","anthropic/claude-4.5-haiku-20251001"]},{"id":"qwen/qwen3-vl-8b-thinking","canonical_slug":"qwen/qwen3-vl-8b-thinking","hugging_face_id":"Qwen/Qwen3-VL-8B-Thinking","name":"Qwen: Qwen3 VL 8B Thinking","created":1760463746,"description":"Qwen3-VL-8B-Thinking is the reasoning-optimized variant of the Qwen3-VL-8B multimodal model, designed for advanced visual and textual reasoning across complex scenes, documents, and temporal sequences. It integrates enhanced multimodal alignment and...","context_length":256000,"architecture":{"modality":"text+image->text","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000001170","completion":"0.0000013650"},"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","logprobs","max_tokens","presence_penalty","reasoning","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-vl-8b-thinking/endpoints"},"reasoning":{"mandatory":true},"aliases":["qwen/qwen3-vl-8b-thinking"]},{"id":"qwen/qwen3-vl-8b-instruct","canonical_slug":"qwen/qwen3-vl-8b-instruct","hugging_face_id":"Qwen/Qwen3-VL-8B-Instruct","name":"Qwen: Qwen3 VL 8B Instruct","created":1760463308,"description":"Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon...","context_length":256000,"architecture":{"modality":"text+image->text","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000000800","completion":"0.0000005000"},"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":0.7,"top_p":0.8,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-vl-8b-instruct/endpoints"},"aliases":["qwen/qwen3-vl-8b-instruct"]},{"id":"openai/o3-deep-research","canonical_slug":"openai/o3-deep-research-2025-06-26","hugging_face_id":"","name":"OpenAI: o3 Deep Research","created":1760129661,"description":"o3-deep-research is OpenAI's advanced model for deep research, designed to tackle complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000100000","completion":"0.0000400000","web_search":"0.0100000000","input_cache_read":"0.0000025000"},"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","presence_penalty","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/o3-deep-research-2025-06-26/endpoints"},"reasoning":{"mandatory":false},"aliases":["openai/o3-deep-research-2025-06-26"]},{"id":"openai/o4-mini-deep-research","canonical_slug":"openai/o4-mini-deep-research-2025-06-26","hugging_face_id":"","name":"OpenAI: o4 Mini Deep Research","created":1760129642,"description":"o4-mini-deep-research is OpenAI's faster, more affordable deep research model—ideal for tackling complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000020000","completion":"0.0000080000","web_search":"0.0100000000","input_cache_read":"0.0000005000"},"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","presence_penalty","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/o4-mini-deep-research-2025-06-26/endpoints"},"reasoning":{"mandatory":false},"aliases":["openai/o4-mini-deep-research-2025-06-26"]},{"id":"nvidia/llama-3.3-nemotron-super-49b-v1.5","canonical_slug":"nvidia/llama-3.3-nemotron-super-49b-v1.5","hugging_face_id":"nvidia/Llama-3_3-Nemotron-Super-49B-v1_5","name":"NVIDIA: Llama 3.3 Nemotron Super 49B V1.5","created":1760101395,"description":"Llama-3.3-Nemotron-Super-49B-v1.5 is a 49B-parameter, English-centric reasoning/chat model derived from Meta’s Llama-3.3-70B-Instruct with a 128K context. It’s post-trained for agentic workflows (RAG, tool calling) via SFT across math, code, science, and...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":null},"pricing":{"prompt":"0.0000004000","completion":"0.0000004000"},"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":0.6,"top_p":0.95,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2024-03-31","expiration_date":null,"links":{"details":"/api/v1/models/nvidia/llama-3.3-nemotron-super-49b-v1.5/endpoints"},"reasoning":{"mandatory":false},"aliases":["nvidia/llama-3.3-nemotron-super-49b-v1.5"]},{"id":"qwen/qwen3-vl-30b-a3b-thinking","canonical_slug":"qwen/qwen3-vl-30b-a3b-thinking","hugging_face_id":"Qwen/Qwen3-VL-30B-A3B-Thinking","name":"Qwen: Qwen3 VL 30B A3B Thinking","created":1759794479,"description":"Qwen3-VL-30B-A3B-Thinking is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Thinking variant enhances reasoning in STEM, math, and complex tasks. It excels...","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000001300","completion":"0.0000015600"},"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logprobs","max_tokens","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":0.8,"top_p":0.95,"top_k":20,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":1},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-vl-30b-a3b-thinking/endpoints"},"reasoning":{"mandatory":true},"aliases":["qwen/qwen3-vl-30b-a3b-thinking"]},{"id":"qwen/qwen3-vl-30b-a3b-instruct","canonical_slug":"qwen/qwen3-vl-30b-a3b-instruct","hugging_face_id":"Qwen/Qwen3-VL-30B-A3B-Instruct","name":"Qwen: Qwen3 VL 30B A3B Instruct","created":1759794476,"description":"Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception...","context_length":262144,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000001300","completion":"0.0000005200"},"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":0.7,"top_p":0.8,"top_k":20,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":1},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-vl-30b-a3b-instruct/endpoints"},"aliases":["qwen/qwen3-vl-30b-a3b-instruct"]},{"id":"openai/gpt-5-pro","canonical_slug":"openai/gpt-5-pro-2025-10-06","hugging_face_id":"","name":"OpenAI: GPT-5 Pro","created":1759776663,"description":"GPT-5 Pro is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000150000","completion":"0.0001200000","web_search":"0.0100000000"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2024-09-30","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5-pro-2025-10-06/endpoints"},"reasoning":{"mandatory":true,"supported_efforts":["high"],"default_effort":"high"},"aliases":["openai/gpt-5-pro-2025-10-06"]},{"id":"z-ai/glm-4.6","canonical_slug":"z-ai/glm-4.6","hugging_face_id":"zai-org/GLM-4.6","name":"Z.ai: GLM 4.6","created":1759235576,"description":"Compared with GLM-4.5, this generation brings several key improvements: Longer context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex...","context_length":202752,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000004300","completion":"0.0000017400","input_cache_read":"0.0000000800"},"top_provider":{"context_length":202752,"max_completion_tokens":131072,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":0.6,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/z-ai/glm-4.6/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"androidnative","elo":1098,"win_rate":52.5,"rank":20},{"arena":"agents","category":"fullstack","elo":1099,"win_rate":42.3,"rank":22},{"arena":"agents","category":"godotgamedev","elo":1219,"win_rate":53.2,"rank":7},{"arena":"agents","category":"mobileapps","elo":1186,"win_rate":49.3,"rank":18},{"arena":"models","category":"3d","elo":1206,"win_rate":54,"rank":42},{"arena":"models","category":"codecategories","elo":1216,"win_rate":54.3,"rank":46},{"arena":"models","category":"dataviz","elo":1208,"win_rate":52.8,"rank":44},{"arena":"models","category":"gamedev","elo":1217,"win_rate":54.6,"rank":40},{"arena":"models","category":"svg","elo":1167,"win_rate":52.1,"rank":37},{"arena":"models","category":"uicomponent","elo":1214,"win_rate":54,"rank":43},{"arena":"models","category":"website","elo":1218,"win_rate":54.4,"rank":47}]},"reasoning":{"mandatory":false},"aliases":["z-ai/glm-4.6"]},{"id":"anthropic/claude-sonnet-4.5","canonical_slug":"anthropic/claude-4.5-sonnet-20250929","hugging_face_id":"","name":"Anthropic: Claude Sonnet 4.5","created":1759161676,"description":"Claude Sonnet 4.5 is Anthropic’s most advanced Sonnet model to date, optimized for real-world agents and coding workflows. It delivers state-of-the-art performance on coding benchmarks such as SWE-bench Verified, with...","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.0000030000","completion":"0.0000150000","web_search":"0.0100000000","input_cache_read":"0.0000003000","input_cache_write":"0.0000037500","input_cache_write_1h":"0.000006"},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":1,"top_p":1,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-01-31","expiration_date":null,"links":{"details":"/api/v1/models/anthropic/claude-4.5-sonnet-20250929/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1238,"win_rate":53.8,"rank":34},{"arena":"models","category":"asciiart","elo":1244,"win_rate":56.3,"rank":10},{"arena":"models","category":"codecategories","elo":1233,"win_rate":53.2,"rank":34},{"arena":"models","category":"dataviz","elo":1211,"win_rate":49.5,"rank":41},{"arena":"models","category":"gamedev","elo":1235,"win_rate":52.6,"rank":37},{"arena":"models","category":"svg","elo":1173,"win_rate":53.1,"rank":36},{"arena":"models","category":"uicomponent","elo":1233,"win_rate":52.5,"rank":35},{"arena":"models","category":"website","elo":1233,"win_rate":53.5,"rank":38},{"arena":"agents","category":"fullstack","elo":1120,"win_rate":43.5,"rank":18},{"arena":"agents","category":"mobileapps","elo":1184,"win_rate":48.9,"rank":21},{"arena":"agents","category":"webapps","elo":1129,"win_rate":43.1,"rank":21}]},"reasoning":{"mandatory":false},"aliases":["anthropic/claude-4.5-sonnet-20250929"]},{"id":"deepseek/deepseek-v3.2-exp","canonical_slug":"deepseek/deepseek-v3.2-exp","hugging_face_id":"deepseek-ai/DeepSeek-V3.2-Exp","name":"DeepSeek: DeepSeek V3.2 Exp","created":1759150481,"description":"DeepSeek-V3.2-Exp is an experimental large language model released by DeepSeek as an intermediate step between V3.1 and future architectures. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism...","context_length":163840,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":"deepseek-v3.1"},"pricing":{"prompt":"0.0000002700","completion":"0.0000004100"},"top_provider":{"context_length":163840,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":0.6,"top_p":0.95,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-07-31","expiration_date":null,"links":{"details":"/api/v1/models/deepseek/deepseek-v3.2-exp/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1227,"win_rate":56.4,"rank":36},{"arena":"models","category":"codecategories","elo":1221,"win_rate":54.2,"rank":40},{"arena":"models","category":"dataviz","elo":1196,"win_rate":50.6,"rank":51},{"arena":"models","category":"gamedev","elo":1212,"win_rate":53.1,"rank":41},{"arena":"models","category":"svg","elo":1090,"win_rate":42,"rank":53},{"arena":"models","category":"uicomponent","elo":1222,"win_rate":53.3,"rank":39},{"arena":"models","category":"website","elo":1222,"win_rate":54.2,"rank":44}]},"reasoning":{"mandatory":false},"aliases":["deepseek/deepseek-v3.2-exp"]},{"id":"google/gemini-2.5-flash-lite-preview-09-2025","canonical_slug":"google/gemini-2.5-flash-lite-preview-09-2025","hugging_face_id":"","name":"Google: Gemini 2.5 Flash Lite Preview 09-2025","created":1758819686,"description":"Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","file","audio","video"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":"0.0000001000","completion":"0.0000004000","image":"0.0000001000","audio":"0.0000003000","web_search":"0.0140000000","internal_reasoning":"0.0000004","input_cache_read":"0.0000000100","input_cache_write":"0.0000000833"},"top_provider":{"context_length":1048576,"max_completion_tokens":65535,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-01-31","expiration_date":null,"links":{"details":"/api/v1/models/google/gemini-2.5-flash-lite-preview-09-2025/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1046,"win_rate":36.5,"rank":84},{"arena":"models","category":"codecategories","elo":1132,"win_rate":47,"rank":75},{"arena":"models","category":"dataviz","elo":1133,"win_rate":45.5,"rank":71},{"arena":"models","category":"gamedev","elo":1114,"win_rate":45.9,"rank":75},{"arena":"models","category":"uicomponent","elo":1077,"win_rate":41.4,"rank":77},{"arena":"models","category":"website","elo":1143,"win_rate":48.1,"rank":75}]},"reasoning":{"mandatory":false},"aliases":["google/gemini-2.5-flash-lite-preview-09-2025"]},{"id":"qwen/qwen3-vl-235b-a22b-thinking","canonical_slug":"qwen/qwen3-vl-235b-a22b-thinking","hugging_face_id":"Qwen/Qwen3-VL-235B-A22B-Thinking","name":"Qwen: Qwen3 VL 235B A22B Thinking","created":1758668690,"description":"Qwen3-VL-235B-A22B Thinking is a multimodal model that unifies strong text generation with visual understanding across images and video. The Thinking model is optimized for multimodal reasoning in STEM and math....","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000002600","completion":"0.0000026000"},"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logprobs","max_tokens","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":0.8,"top_p":0.95,"top_k":20,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":1},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-vl-235b-a22b-thinking/endpoints"},"reasoning":{"mandatory":true},"aliases":["qwen/qwen3-vl-235b-a22b-thinking"]},{"id":"qwen/qwen3-vl-235b-a22b-instruct","canonical_slug":"qwen/qwen3-vl-235b-a22b-instruct","hugging_face_id":"Qwen/Qwen3-VL-235B-A22B-Instruct","name":"Qwen: Qwen3 VL 235B A22B Instruct","created":1758668687,"description":"Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table...","context_length":262144,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000002000","completion":"0.0000008800","input_cache_read":"0.0000001100"},"top_provider":{"context_length":262144,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":0.7,"top_p":0.8,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-vl-235b-a22b-instruct/endpoints"},"aliases":["qwen/qwen3-vl-235b-a22b-instruct"]},{"id":"qwen/qwen3-max","canonical_slug":"qwen/qwen3-max","hugging_face_id":"","name":"Qwen: Qwen3 Max","created":1758662808,"description":"Qwen3-Max is an updated release built on the Qwen3 series, offering major improvements in reasoning, instruction following, multilingual support, and long-tail knowledge coverage compared to the January 2025 version. It...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000007800","completion":"0.0000039000","input_cache_read":"0.0000001560","input_cache_write":"0.0000009750"},"top_provider":{"context_length":262144,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["logprobs","max_tokens","presence_penalty","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":1,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-06-30","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-max/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1152,"win_rate":43.5,"rank":63},{"arena":"models","category":"asciiart","elo":1172,"win_rate":47.2,"rank":32},{"arena":"models","category":"codecategories","elo":1160,"win_rate":44,"rank":66},{"arena":"models","category":"dataviz","elo":1148,"win_rate":41.8,"rank":64},{"arena":"models","category":"gamedev","elo":1162,"win_rate":43.9,"rank":61},{"arena":"models","category":"svg","elo":1069,"win_rate":37.3,"rank":59},{"arena":"models","category":"uicomponent","elo":1132,"win_rate":40.1,"rank":67},{"arena":"models","category":"website","elo":1162,"win_rate":44.4,"rank":66}]},"reasoning":{"mandatory":false},"aliases":["qwen/qwen3-max"]},{"id":"qwen/qwen3-coder-plus","canonical_slug":"qwen/qwen3-coder-plus","hugging_face_id":"","name":"Qwen: Qwen3 Coder Plus","created":1758662707,"description":"Qwen3 Coder Plus is Alibaba's proprietary version of the Open Source Qwen3 Coder 480B A35B. It is a powerful coding agent model specializing in autonomous programming via tool calling and...","context_length":1000000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000006500","completion":"0.0000032500","input_cache_read":"0.0000001300","input_cache_write":"0.0000008125"},"top_provider":{"context_length":1000000,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["logprobs","max_tokens","presence_penalty","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-06-30","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-coder-plus/endpoints"},"reasoning":{"mandatory":false},"aliases":["qwen/qwen3-coder-plus"]},{"id":"openai/gpt-5-codex","canonical_slug":"openai/gpt-5-codex","hugging_face_id":"","name":"OpenAI: GPT-5 Codex","created":1758643403,"description":"GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks....","context_length":400000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000012500","completion":"0.0000100000","web_search":"0.0100000000","input_cache_read":"0.0000001250"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2024-09-30","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5-codex/endpoints"},"benchmarks":{"design_arena":[{"arena":"agents","category":"mobileapps","elo":1131,"win_rate":43.2,"rank":28}]},"reasoning":{"mandatory":true},"aliases":["openai/gpt-5-codex"]},{"id":"deepseek/deepseek-v3.1-terminus","canonical_slug":"deepseek/deepseek-v3.1-terminus","hugging_face_id":"deepseek-ai/DeepSeek-V3.1-Terminus","name":"DeepSeek: DeepSeek V3.1 Terminus","created":1758548275,"description":"DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's...","context_length":163840,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":"deepseek-v3.1"},"pricing":{"prompt":"0.0000002700","completion":"0.0000009500","input_cache_read":"0.0000001300"},"top_provider":{"context_length":163840,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/deepseek/deepseek-v3.1-terminus/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1219,"win_rate":56,"rank":37},{"arena":"models","category":"codecategories","elo":1226,"win_rate":56,"rank":38},{"arena":"models","category":"dataviz","elo":1207,"win_rate":53,"rank":45},{"arena":"models","category":"gamedev","elo":1198,"win_rate":52.5,"rank":50},{"arena":"models","category":"svg","elo":1122,"win_rate":50.1,"rank":48},{"arena":"models","category":"uicomponent","elo":1237,"win_rate":59.3,"rank":34},{"arena":"models","category":"website","elo":1231,"win_rate":56.4,"rank":39}]},"reasoning":{"mandatory":false},"aliases":["deepseek/deepseek-v3.1-terminus"]},{"id":"qwen/qwen3-coder-flash","canonical_slug":"qwen/qwen3-coder-flash","hugging_face_id":"","name":"Qwen: Qwen3 Coder Flash","created":1758115536,"description":"Qwen3 Coder Flash is Alibaba's fast and cost efficient version of their proprietary Qwen3 Coder Plus. It is a powerful coding agent model specializing in autonomous programming via tool calling...","context_length":1000000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000001950","completion":"0.0000009750","input_cache_read":"0.0000000390","input_cache_write":"0.0000002437"},"top_provider":{"context_length":1000000,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["logprobs","max_tokens","presence_penalty","response_format","seed","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-06-30","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-coder-flash/endpoints"},"aliases":["qwen/qwen3-coder-flash"]},{"id":"qwen/qwen3-next-80b-a3b-thinking","canonical_slug":"qwen/qwen3-next-80b-a3b-thinking-2509","hugging_face_id":"Qwen/Qwen3-Next-80B-A3B-Thinking","name":"Qwen: Qwen3 Next 80B A3B Thinking","created":1757612284,"description":"Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000000975","completion":"0.0000007800"},"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-09-30","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-next-80b-a3b-thinking-2509/endpoints"},"reasoning":{"mandatory":true},"aliases":["qwen/qwen3-next-80b-a3b-thinking-2509"]},{"id":"qwen/qwen3-next-80b-a3b-instruct","canonical_slug":"qwen/qwen3-next-80b-a3b-instruct-2509","hugging_face_id":"Qwen/Qwen3-Next-80B-A3B-Instruct","name":"Qwen: Qwen3 Next 80B A3B Instruct","created":1757612213,"description":"Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000000900","completion":"0.0000011000"},"top_provider":{"context_length":262144,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2025-09-30","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-next-80b-a3b-instruct-2509/endpoints"},"aliases":["qwen/qwen3-next-80b-a3b-instruct-2509"]},{"id":"qwen/qwen-plus-2025-07-28:thinking","canonical_slug":"qwen/qwen-plus-2025-07-28","hugging_face_id":"","name":"Qwen: Qwen Plus 0728 (thinking)","created":1757347599,"description":"Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.","context_length":1000000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000002600","completion":"0.0000007800","input_cache_write":"0.0000003250"},"top_provider":{"context_length":1000000,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","presence_penalty","reasoning","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen-plus-2025-07-28/endpoints"},"reasoning":{"mandatory":false},"aliases":["qwen/qwen-plus-2025-07-28"]},{"id":"qwen/qwen-plus-2025-07-28","canonical_slug":"qwen/qwen-plus-2025-07-28","hugging_face_id":"","name":"Qwen: Qwen Plus 0728","created":1757347599,"description":"Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.","context_length":1000000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000002600","completion":"0.0000007800"},"top_provider":{"context_length":1000000,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["logprobs","max_tokens","presence_penalty","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen-plus-2025-07-28/endpoints"},"reasoning":{"mandatory":false},"aliases":["qwen/qwen-plus-2025-07-28"]},{"id":"moonshotai/kimi-k2-0905","canonical_slug":"moonshotai/kimi-k2-0905","hugging_face_id":"moonshotai/Kimi-K2-Instruct-0905","name":"MoonshotAI: Kimi K2 0905","created":1757021147,"description":"Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000006000","completion":"0.0000025000"},"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-12-31","expiration_date":null,"links":{"details":"/api/v1/models/moonshotai/kimi-k2-0905/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"codecategories","elo":1148,"win_rate":48.5,"rank":71},{"arena":"models","category":"website","elo":1151,"win_rate":48.3,"rank":72}]},"aliases":["moonshotai/kimi-k2-0905"]},{"id":"qwen/qwen3-30b-a3b-thinking-2507","canonical_slug":"qwen/qwen3-30b-a3b-thinking-2507","hugging_face_id":"Qwen/Qwen3-30B-A3B-Thinking-2507","name":"Qwen: Qwen3 30B A3B Thinking 2507","created":1756399192,"description":"Qwen3-30B-A3B-Thinking-2507 is a 30B parameter Mixture-of-Experts reasoning model optimized for complex tasks requiring extended multi-step thinking. The model is designed specifically for “thinking mode,” where internal reasoning traces are separated...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000000800","completion":"0.0000004000","input_cache_read":"0.0000000800"},"top_provider":{"context_length":131072,"max_completion_tokens":131072,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2025-06-30","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-30b-a3b-thinking-2507/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"dataviz","elo":969,"win_rate":33.3,"rank":94},{"arena":"models","category":"website","elo":974,"win_rate":35.5,"rank":102}]},"reasoning":{"mandatory":true},"aliases":["qwen/qwen3-30b-a3b-thinking-2507"]},{"id":"nousresearch/hermes-4-70b","canonical_slug":"nousresearch/hermes-4-70b","hugging_face_id":"NousResearch/Hermes-4-70B","name":"Nous: Hermes 4 70B","created":1756236182,"description":"Hermes 4 70B is a hybrid reasoning model from Nous Research, built on Meta-Llama-3.1-70B. It introduces the same hybrid mode as the larger 405B release, allowing the model to either...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":null},"pricing":{"prompt":"0.0000000500","completion":"0.0000002000"},"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","max_tokens","presence_penalty","reasoning","repetition_penalty","response_format","temperature","top_k","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-08-31","expiration_date":null,"links":{"details":"/api/v1/models/nousresearch/hermes-4-70b/endpoints"},"reasoning":{"mandatory":false},"aliases":["nousresearch/hermes-4-70b","Hermes-4-70B","NousResearch/Hermes-4-70B"]},{"id":"nousresearch/hermes-4-405b","canonical_slug":"nousresearch/hermes-4-405b","hugging_face_id":"NousResearch/Hermes-4-405B","name":"Nous: Hermes 4 405B","created":1756235463,"description":"Hermes 4 is a large-scale reasoning model built on Meta-Llama-3.1-405B and released by Nous Research. It introduces a hybrid reasoning mode, where the model can choose to deliberate internally with...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000000900","completion":"0.0000003700"},"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","max_tokens","presence_penalty","reasoning","repetition_penalty","response_format","temperature","top_k","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-08-31","expiration_date":null,"links":{"details":"/api/v1/models/nousresearch/hermes-4-405b/endpoints"},"reasoning":{"mandatory":false},"aliases":["nousresearch/hermes-4-405b","Hermes-4-405B","NousResearch/Hermes-4-405B"]},{"id":"deepseek/deepseek-chat-v3.1","canonical_slug":"deepseek/deepseek-chat-v3.1","hugging_face_id":"deepseek-ai/DeepSeek-V3.1","name":"DeepSeek: DeepSeek V3.1","created":1755779628,"description":"DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context...","context_length":163840,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":"deepseek-v3.1"},"pricing":{"prompt":"0.0000002100","completion":"0.0000007900","input_cache_read":"0.0000001300"},"top_provider":{"context_length":163840,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/deepseek/deepseek-chat-v3.1/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1156,"win_rate":48,"rank":61},{"arena":"models","category":"codecategories","elo":1162,"win_rate":47.9,"rank":65},{"arena":"models","category":"dataviz","elo":1143,"win_rate":46.8,"rank":66},{"arena":"models","category":"gamedev","elo":1152,"win_rate":47.2,"rank":64},{"arena":"models","category":"svg","elo":1024,"win_rate":38.2,"rank":68},{"arena":"models","category":"uicomponent","elo":1140,"win_rate":47.5,"rank":64},{"arena":"models","category":"website","elo":1166,"win_rate":48,"rank":64}]},"reasoning":{"mandatory":false},"aliases":["deepseek/deepseek-chat-v3.1"]},{"id":"mistralai/mistral-medium-3.1","canonical_slug":"mistralai/mistral-medium-3.1","hugging_face_id":"","name":"Mistral: Mistral Medium 3.1","created":1755095639,"description":"Mistral Medium 3.1 is an updated version of Mistral Medium 3, which is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances...","context_length":131072,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":"0.0000004000","completion":"0.0000020000","input_cache_read":"0.0000000400"},"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":0.3},"supported_voices":null,"knowledge_cutoff":"2025-06-30","expiration_date":null,"links":{"details":"/api/v1/models/mistralai/mistral-medium-3.1/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1161,"win_rate":44.7,"rank":58},{"arena":"models","category":"asciiart","elo":1042,"win_rate":30.8,"rank":47},{"arena":"models","category":"codecategories","elo":1171,"win_rate":45.1,"rank":61},{"arena":"models","category":"dataviz","elo":1191,"win_rate":47.6,"rank":52},{"arena":"models","category":"gamedev","elo":1141,"win_rate":40.7,"rank":69},{"arena":"models","category":"svg","elo":1050,"win_rate":38.2,"rank":61},{"arena":"models","category":"uicomponent","elo":1154,"win_rate":43.5,"rank":61},{"arena":"models","category":"website","elo":1177,"win_rate":46,"rank":61}]},"aliases":["mistralai/mistral-medium-3.1"]},{"id":"z-ai/glm-4.5v","canonical_slug":"z-ai/glm-4.5v","hugging_face_id":"zai-org/GLM-4.5V","name":"Z.ai: GLM 4.5V","created":1754922288,"description":"GLM-4.5V is a vision-language foundation model for multimodal agent applications. Built on a Mixture-of-Experts (MoE) architecture with 106B parameters and 12B activated parameters, it achieves state-of-the-art results in video understanding,...","context_length":65536,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000006000","completion":"0.0000018000","input_cache_read":"0.0000001100"},"top_provider":{"context_length":65536,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","max_tokens","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":0.75,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2024-12-31","expiration_date":null,"links":{"details":"/api/v1/models/z-ai/glm-4.5v/endpoints"},"reasoning":{"mandatory":false},"aliases":["z-ai/glm-4.5v"]},{"id":"ai21/jamba-large-1.7","canonical_slug":"ai21/jamba-large-1.7","hugging_face_id":"ai21labs/AI21-Jamba-Large-1.7","name":"AI21: Jamba Large 1.7","created":1754669020,"description":"Jamba Large 1.7 is the latest model in the Jamba open family, offering improvements in grounding, instruction-following, and overall efficiency. Built on a hybrid SSM-Transformer architecture with a 256K context...","context_length":256000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000020000","completion":"0.0000080000"},"top_provider":{"context_length":256000,"max_completion_tokens":4096,"is_moderated":false},"per_request_limits":null,"supported_parameters":["max_tokens","response_format","stop","temperature","tool_choice","tools","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-08-31","expiration_date":null,"links":{"details":"/api/v1/models/ai21/jamba-large-1.7/endpoints"},"aliases":["ai21/jamba-large-1.7"]},{"id":"openai/gpt-5","canonical_slug":"openai/gpt-5-2025-08-07","hugging_face_id":"","name":"OpenAI: GPT-5","created":1754587413,"description":"GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000012500","completion":"0.0000100000","web_search":"0.0100000000","input_cache_read":"0.0000001250"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2024-09-30","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5-2025-08-07/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1135,"win_rate":41.4,"rank":68},{"arena":"models","category":"asciiart","elo":1185,"win_rate":49,"rank":29},{"arena":"models","category":"codecategories","elo":1219,"win_rate":54.7,"rank":43},{"arena":"models","category":"dataviz","elo":1281,"win_rate":63.3,"rank":15},{"arena":"models","category":"gamedev","elo":1253,"win_rate":59.5,"rank":31},{"arena":"models","category":"svg","elo":1246,"win_rate":64.1,"rank":14},{"arena":"models","category":"uicomponent","elo":1238,"win_rate":58.3,"rank":33},{"arena":"models","category":"website","elo":1228,"win_rate":53.7,"rank":42}]},"reasoning":{"mandatory":true,"supported_efforts":["high","medium","low","minimal"],"default_effort":"medium"},"aliases":["openai/gpt-5-2025-08-07"]},{"id":"openai/gpt-5-mini","canonical_slug":"openai/gpt-5-mini-2025-08-07","hugging_face_id":"","name":"OpenAI: GPT-5 Mini","created":1754587407,"description":"GPT-5 Mini is a compact version of GPT-5, designed to handle lighter-weight reasoning tasks. It provides the same instruction-following and safety-tuning benefits as GPT-5, but with reduced latency and cost....","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000002500","completion":"0.0000020000","web_search":"0.0100000000","input_cache_read":"0.0000000250"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2024-05-31","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5-mini-2025-08-07/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1113,"win_rate":37.3,"rank":73},{"arena":"models","category":"asciiart","elo":1167,"win_rate":45,"rank":34},{"arena":"models","category":"codecategories","elo":1165,"win_rate":44,"rank":64},{"arena":"models","category":"dataviz","elo":1165,"win_rate":43.4,"rank":63},{"arena":"models","category":"gamedev","elo":1195,"win_rate":46.7,"rank":53},{"arena":"models","category":"svg","elo":1149,"win_rate":46,"rank":41},{"arena":"models","category":"uicomponent","elo":1160,"win_rate":42.6,"rank":58},{"arena":"models","category":"website","elo":1169,"win_rate":44.8,"rank":62}]},"reasoning":{"mandatory":true,"supported_efforts":["high","medium","low","minimal"],"default_effort":"medium"},"aliases":["openai/gpt-5-mini-2025-08-07"]},{"id":"openai/gpt-5-nano","canonical_slug":"openai/gpt-5-nano-2025-08-07","hugging_face_id":"","name":"OpenAI: GPT-5 Nano","created":1754587402,"description":"GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, optimized for developer tools, rapid interactions, and ultra-low latency environments. While limited in reasoning depth compared to its larger...","context_length":400000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000000500","completion":"0.0000004000","web_search":"0.0100000000","input_cache_read":"0.0000000100"},"top_provider":{"context_length":400000,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2024-05-31","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5-nano-2025-08-07/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1043,"win_rate":36.1,"rank":85},{"arena":"models","category":"codecategories","elo":1135,"win_rate":48.1,"rank":74},{"arena":"models","category":"dataviz","elo":1103,"win_rate":46.7,"rank":76},{"arena":"models","category":"gamedev","elo":1113,"win_rate":46.6,"rank":76},{"arena":"models","category":"uicomponent","elo":1122,"win_rate":52,"rank":70},{"arena":"models","category":"website","elo":1145,"win_rate":48.9,"rank":74}]},"reasoning":{"mandatory":true,"supported_efforts":["high","medium","low","minimal"],"default_effort":"medium"},"aliases":["openai/gpt-5-nano-2025-08-07"]},{"id":"openai/gpt-oss-120b","canonical_slug":"openai/gpt-oss-120b","hugging_face_id":"openai/gpt-oss-120b","name":"OpenAI: gpt-oss-120b","created":1754414231,"description":"gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000000300","completion":"0.0000001500"},"top_provider":{"context_length":131072,"max_completion_tokens":131072,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_a","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2024-06-30","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-oss-120b/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":979,"win_rate":29.4,"rank":90},{"arena":"models","category":"codecategories","elo":1014,"win_rate":33.4,"rank":96},{"arena":"models","category":"dataviz","elo":1043,"win_rate":45.1,"rank":86},{"arena":"models","category":"gamedev","elo":1060,"win_rate":40.6,"rank":84},{"arena":"models","category":"uicomponent","elo":980,"win_rate":35.5,"rank":91},{"arena":"models","category":"website","elo":1012,"win_rate":32.5,"rank":99}],"artificial_analysis":{"intelligence_index":23.8,"coding_index":30.4,"agentic_index":13.2}},"reasoning":{"mandatory":true,"supported_efforts":["high","medium","low"],"default_effort":"medium"},"aliases":["openai/gpt-oss-120b"]},{"id":"openai/gpt-oss-20b","canonical_slug":"openai/gpt-oss-20b","hugging_face_id":"openai/gpt-oss-20b","name":"OpenAI: gpt-oss-20b","created":1754414229,"description":"gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000000290","completion":"0.0000001400"},"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2024-06-30","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-oss-20b/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"dataviz","elo":977,"win_rate":39.7,"rank":92},{"arena":"models","category":"website","elo":896,"win_rate":27.9,"rank":107}],"artificial_analysis":{"intelligence_index":14.9,"coding_index":20.7,"agentic_index":3.1}},"reasoning":{"mandatory":true,"supported_efforts":["high","medium","low"],"default_effort":"medium"},"aliases":["openai/gpt-oss-20b"]},{"id":"anthropic/claude-opus-4.1","canonical_slug":"anthropic/claude-4.1-opus-20250805","hugging_face_id":"","name":"Anthropic: Claude Opus 4.1","created":1754411591,"description":"Claude Opus 4.1 is an updated version of Anthropic’s flagship model, offering improved performance in coding, reasoning, and agentic tasks. It achieves 74.5% on SWE-bench Verified and shows notable gains...","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.0000150000","completion":"0.0000750000","web_search":"0.0100000000","input_cache_read":"0.0000015000","input_cache_write":"0.0000187500","input_cache_write_1h":"0.00003"},"top_provider":{"context_length":200000,"max_completion_tokens":32000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-01-31","expiration_date":null,"links":{"details":"/api/v1/models/anthropic/claude-4.1-opus-20250805/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1230,"win_rate":52.5,"rank":35},{"arena":"models","category":"asciiart","elo":1210,"win_rate":51.9,"rank":20},{"arena":"models","category":"codecategories","elo":1221,"win_rate":55.8,"rank":39},{"arena":"models","category":"dataviz","elo":1209,"win_rate":56.4,"rank":43},{"arena":"models","category":"gamedev","elo":1239,"win_rate":58.5,"rank":35},{"arena":"models","category":"svg","elo":1210,"win_rate":60.8,"rank":24},{"arena":"models","category":"uicomponent","elo":1219,"win_rate":58,"rank":41},{"arena":"models","category":"website","elo":1221,"win_rate":55.3,"rank":45}]},"reasoning":{"mandatory":false},"aliases":["anthropic/claude-4.1-opus-20250805"]},{"id":"mistralai/codestral-2508","canonical_slug":"mistralai/codestral-2508","hugging_face_id":"","name":"Mistral: Codestral 2508","created":1754079630,"description":"Mistral's cutting-edge language model for coding released end of July 2025. Codestral specializes in low-latency, high-frequency tasks such as fill-in-the-middle (FIM), code correction and test generation.\n\n[Blog Post](https://mistral.ai/news/codestral-25-08)","context_length":256000,"architecture":{"modality":"text+file->text","input_modalities":["text","file"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":"0.0000003000","completion":"0.0000009000","input_cache_read":"0.0000000300"},"top_provider":{"context_length":256000,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":0.3},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/mistralai/codestral-2508/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"codecategories","elo":1058,"win_rate":38.5,"rank":89},{"arena":"models","category":"dataviz","elo":1061,"win_rate":41.7,"rank":84},{"arena":"models","category":"gamedev","elo":1034,"win_rate":36.2,"rank":91},{"arena":"models","category":"uicomponent","elo":1073,"win_rate":46.9,"rank":78},{"arena":"models","category":"website","elo":1057,"win_rate":37.8,"rank":92},{"arena":"models","category":"3d","elo":1099,"win_rate":45.5,"rank":76}]},"aliases":["mistralai/codestral-2508"]},{"id":"qwen/qwen3-coder-30b-a3b-instruct","canonical_slug":"qwen/qwen3-coder-30b-a3b-instruct","hugging_face_id":"Qwen/Qwen3-Coder-30B-A3B-Instruct","name":"Qwen: Qwen3 Coder 30B A3B Instruct","created":1753972379,"description":"Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding, and agentic tool use. Built on the...","context_length":160000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000000700","completion":"0.0000002700"},"top_provider":{"context_length":160000,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logprobs","max_tokens","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2025-06-30","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-coder-30b-a3b-instruct/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"dataviz","elo":1128,"win_rate":54.7,"rank":73},{"arena":"models","category":"uicomponent","elo":1102,"win_rate":54.1,"rank":72},{"arena":"models","category":"website","elo":1131,"win_rate":57.1,"rank":77}]},"aliases":["qwen/qwen3-coder-30b-a3b-instruct"]},{"id":"qwen/qwen3-30b-a3b-instruct-2507","canonical_slug":"qwen/qwen3-30b-a3b-instruct-2507","hugging_face_id":"Qwen/Qwen3-30B-A3B-Instruct-2507","name":"Qwen: Qwen3 30B A3B Instruct 2507","created":1753806965,"description":"Qwen3-30B-A3B-Instruct-2507 is a 30.5B-parameter mixture-of-experts language model from Qwen, with 3.3B active parameters per inference. It operates in non-thinking mode and is designed for high-quality instruction following, multilingual understanding, and...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000000481","completion":"0.0000001930"},"top_provider":{"context_length":128000,"max_completion_tokens":32000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2025-06-30","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-30b-a3b-instruct-2507/endpoints"},"aliases":["qwen/qwen3-30b-a3b-instruct-2507"]},{"id":"z-ai/glm-4.5","canonical_slug":"z-ai/glm-4.5","hugging_face_id":"zai-org/GLM-4.5","name":"Z.ai: GLM 4.5","created":1753471347,"description":"GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128k tokens. GLM-4.5 delivers significantly...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000006000","completion":"0.0000022000","input_cache_read":"0.0000001100"},"top_provider":{"context_length":131072,"max_completion_tokens":98304,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":0.75,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2024-12-31","expiration_date":"2026-12-31","links":{"details":"/api/v1/models/z-ai/glm-4.5/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1249,"win_rate":59.7,"rank":30},{"arena":"models","category":"codecategories","elo":1216,"win_rate":54.4,"rank":45},{"arena":"models","category":"dataviz","elo":1204,"win_rate":53.3,"rank":47},{"arena":"models","category":"gamedev","elo":1212,"win_rate":54.4,"rank":42},{"arena":"models","category":"svg","elo":1154,"win_rate":50.8,"rank":40},{"arena":"models","category":"uicomponent","elo":1200,"win_rate":55.1,"rank":49},{"arena":"models","category":"website","elo":1213,"win_rate":53.8,"rank":48}]},"reasoning":{"mandatory":false},"aliases":["z-ai/glm-4.5"]},{"id":"z-ai/glm-4.5-air","canonical_slug":"z-ai/glm-4.5-air","hugging_face_id":"zai-org/GLM-4.5-Air","name":"Z.ai: GLM 4.5 Air","created":1753471258,"description":"GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000001300","completion":"0.0000008500","input_cache_read":"0.0000000250"},"top_provider":{"context_length":131072,"max_completion_tokens":98304,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","max_tokens","presence_penalty","reasoning","repetition_penalty","seed","stop","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":0.75,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2024-12-31","expiration_date":null,"links":{"details":"/api/v1/models/z-ai/glm-4.5-air/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1201,"win_rate":54.1,"rank":44},{"arena":"models","category":"codecategories","elo":1188,"win_rate":51.5,"rank":58},{"arena":"models","category":"dataviz","elo":1236,"win_rate":59.4,"rank":34},{"arena":"models","category":"gamedev","elo":1161,"win_rate":48.4,"rank":62},{"arena":"models","category":"svg","elo":1129,"win_rate":50.8,"rank":46},{"arena":"models","category":"uicomponent","elo":1179,"win_rate":54.6,"rank":54},{"arena":"models","category":"website","elo":1190,"win_rate":51.3,"rank":57}]},"reasoning":{"mandatory":false},"aliases":["z-ai/glm-4.5-air"]},{"id":"qwen/qwen3-235b-a22b-thinking-2507","canonical_slug":"qwen/qwen3-235b-a22b-thinking-2507","hugging_face_id":"Qwen/Qwen3-235B-A22B-Thinking-2507","name":"Qwen: Qwen3 235B A22B Thinking 2507","created":1753449557,"description":"Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":"qwen3"},"pricing":{"prompt":"0.0000001000","completion":"0.0000001000","input_cache_read":"0.0000001000"},"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-06-30","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-235b-a22b-thinking-2507/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1078,"win_rate":40.7,"rank":79},{"arena":"models","category":"codecategories","elo":1083,"win_rate":40.9,"rank":83},{"arena":"models","category":"dataviz","elo":991,"win_rate":32.3,"rank":90},{"arena":"models","category":"gamedev","elo":1025,"win_rate":34.3,"rank":93},{"arena":"models","category":"uicomponent","elo":998,"win_rate":33.9,"rank":90},{"arena":"models","category":"website","elo":1096,"win_rate":42.1,"rank":84}]},"reasoning":{"mandatory":true},"aliases":["qwen/qwen3-235b-a22b-thinking-2507"]},{"id":"qwen/qwen3-coder","canonical_slug":"qwen/qwen3-coder-480b-a35b-07-25","hugging_face_id":"Qwen/Qwen3-Coder-480B-A35B-Instruct","name":"Qwen: Qwen3 Coder 480B A35B","created":1753230546,"description":"Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over...","context_length":1048576,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000002200","completion":"0.0000018000"},"top_provider":{"context_length":262144,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2025-06-30","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-coder-480b-a35b-07-25/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"codecategories","elo":1193,"win_rate":61.2,"rank":54},{"arena":"models","category":"dataviz","elo":1126,"win_rate":54.9,"rank":74},{"arena":"models","category":"gamedev","elo":1168,"win_rate":59,"rank":58},{"arena":"models","category":"uicomponent","elo":1170,"win_rate":61.5,"rank":56},{"arena":"models","category":"website","elo":1202,"win_rate":61.7,"rank":55}]},"aliases":["qwen/qwen3-coder-480b-a35b-07-25"]},{"id":"google/gemini-2.5-flash-lite","canonical_slug":"google/gemini-2.5-flash-lite","hugging_face_id":"","name":"Google: Gemini 2.5 Flash Lite","created":1753200276,"description":"Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","file","audio","video"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":"0.0000001000","completion":"0.0000004000","image":"0.0000001000","audio":"0.0000003000","web_search":"0.0140000000","internal_reasoning":"0.0000004","input_cache_read":"0.0000000100","input_cache_write":"0.0000000833"},"top_provider":{"context_length":1048576,"max_completion_tokens":65535,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-01-31","expiration_date":null,"links":{"details":"/api/v1/models/google/gemini-2.5-flash-lite/endpoints"},"reasoning":{"mandatory":false},"aliases":["google/gemini-2.5-flash-lite"]},{"id":"qwen/qwen3-235b-a22b-2507","canonical_slug":"qwen/qwen3-235b-a22b-07-25","hugging_face_id":"Qwen/Qwen3-235B-A22B-Instruct-2507","name":"Qwen: Qwen3 235B A22B Instruct 2507","created":1753119555,"description":"Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following,...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":null},"pricing":{"prompt":"0.0000000900","completion":"0.0000001000"},"top_provider":{"context_length":262144,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2025-06-30","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-235b-a22b-07-25/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1073,"win_rate":41.1,"rank":80},{"arena":"models","category":"codecategories","elo":1089,"win_rate":42.7,"rank":81},{"arena":"models","category":"dataviz","elo":1102,"win_rate":47.7,"rank":77},{"arena":"models","category":"gamedev","elo":1019,"win_rate":35.2,"rank":94},{"arena":"models","category":"uicomponent","elo":1022,"win_rate":38.8,"rank":86},{"arena":"models","category":"website","elo":1102,"win_rate":43.7,"rank":83}]},"aliases":["qwen/qwen3-235b-a22b-07-25"]},{"id":"moonshotai/kimi-k2","canonical_slug":"moonshotai/kimi-k2","hugging_face_id":"moonshotai/Kimi-K2-Instruct","name":"MoonshotAI: Kimi K2 0711","created":1752263252,"description":"Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000005700","completion":"0.0000023000"},"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","presence_penalty","repetition_penalty","seed","stop","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-12-31","expiration_date":null,"links":{"details":"/api/v1/models/moonshotai/kimi-k2/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"codecategories","elo":1084,"win_rate":51.7,"rank":82},{"arena":"models","category":"dataviz","elo":1062,"win_rate":49.4,"rank":83},{"arena":"models","category":"gamedev","elo":1041,"win_rate":46.4,"rank":89},{"arena":"models","category":"uicomponent","elo":1087,"win_rate":55.1,"rank":75},{"arena":"models","category":"website","elo":1094,"win_rate":53.1,"rank":85}]},"aliases":["moonshotai/kimi-k2"]},{"id":"mistralai/mistral-small-3.2-24b-instruct","canonical_slug":"mistralai/mistral-small-3.2-24b-instruct-2506","hugging_face_id":"mistralai/Mistral-Small-3.2-24B-Instruct-2506","name":"Mistral: Mistral Small 3.2 24B","created":1750443016,"description":"Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on...","context_length":128000,"architecture":{"modality":"text+image->text","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":"0.0000000750","completion":"0.0000002000"},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":0.3},"supported_voices":null,"knowledge_cutoff":"2023-10-31","expiration_date":null,"links":{"details":"/api/v1/models/mistralai/mistral-small-3.2-24b-instruct-2506/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"codecategories","elo":957,"win_rate":39.8,"rank":101},{"arena":"models","category":"dataviz","elo":971,"win_rate":43.3,"rank":93},{"arena":"models","category":"gamedev","elo":955,"win_rate":39.4,"rank":101},{"arena":"models","category":"uicomponent","elo":963,"win_rate":40.5,"rank":93},{"arena":"models","category":"website","elo":939,"win_rate":38.3,"rank":104}]},"aliases":["mistralai/mistral-small-3.2-24b-instruct-2506"]},{"id":"minimax/minimax-m1","canonical_slug":"minimax/minimax-m1","hugging_face_id":"","name":"MiniMax: MiniMax M1","created":1750200414,"description":"MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference. It leverages a hybrid Mixture-of-Experts (MoE) architecture paired with a custom \"lightning attention\" mechanism, allowing it...","context_length":1000000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000004000","completion":"0.0000022000"},"top_provider":{"context_length":1000000,"max_completion_tokens":40000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","max_tokens","presence_penalty","reasoning","repetition_penalty","seed","stop","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2024-06-30","expiration_date":null,"links":{"details":"/api/v1/models/minimax/minimax-m1/endpoints"},"reasoning":{"mandatory":false},"aliases":["minimax/minimax-m1"]},{"id":"google/gemini-2.5-flash","canonical_slug":"google/gemini-2.5-flash","hugging_face_id":"","name":"Google: Gemini 2.5 Flash","created":1750172488,"description":"Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["file","image","text","audio","video"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":"0.0000003000","completion":"0.0000025000","image":"0.0000003000","audio":"0.0000010000","web_search":"0.0140000000","internal_reasoning":"0.0000025","input_cache_read":"0.0000000300","input_cache_write":"0.0000000833"},"top_provider":{"context_length":1048576,"max_completion_tokens":65535,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-01-31","expiration_date":null,"links":{"details":"/api/v1/models/google/gemini-2.5-flash/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1150,"win_rate":47.4,"rank":64},{"arena":"models","category":"codecategories","elo":1154,"win_rate":46.9,"rank":69},{"arena":"models","category":"dataviz","elo":1170,"win_rate":48.4,"rank":59},{"arena":"models","category":"gamedev","elo":1133,"win_rate":44.3,"rank":72},{"arena":"models","category":"uicomponent","elo":1148,"win_rate":48.9,"rank":63},{"arena":"models","category":"website","elo":1159,"win_rate":47.1,"rank":68},{"arena":"models","category":"svg","elo":1077,"win_rate":43.1,"rank":57}]},"reasoning":{"mandatory":false},"aliases":["google/gemini-2.5-flash"]},{"id":"google/gemini-2.5-pro","canonical_slug":"google/gemini-2.5-pro","hugging_face_id":"","name":"Google: Gemini 2.5 Pro","created":1750169544,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","file","audio","video"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":"0.0000012500","completion":"0.0000100000","image":"0.0000012500","audio":"0.0000012500","web_search":"0.0140000000","internal_reasoning":"0.00001","input_cache_read":"0.0000001250","input_cache_write":"0.0000003750"},"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-01-31","expiration_date":null,"links":{"details":"/api/v1/models/google/gemini-2.5-pro/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1161,"win_rate":52.2,"rank":56},{"arena":"models","category":"codecategories","elo":1204,"win_rate":58.3,"rank":51},{"arena":"models","category":"dataviz","elo":1289,"win_rate":71.8,"rank":11},{"arena":"models","category":"gamedev","elo":1179,"win_rate":55.1,"rank":56},{"arena":"models","category":"uicomponent","elo":1200,"win_rate":60.5,"rank":48},{"arena":"models","category":"website","elo":1211,"win_rate":58.8,"rank":49}],"artificial_analysis":{"intelligence_index":25.8,"coding_index":33.3,"agentic_index":7.1}},"reasoning":{"mandatory":true},"aliases":["google/gemini-2.5-pro"]},{"id":"openai/o3-pro","canonical_slug":"openai/o3-pro-2025-06-10","hugging_face_id":"","name":"OpenAI: o3 Pro","created":1749598352,"description":"The o-series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o3-pro model uses more compute to think harder and provide consistently...","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","file","image"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000200000","completion":"0.0000800000","web_search":"0.0100000000"},"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2024-06-30","expiration_date":null,"links":{"details":"/api/v1/models/openai/o3-pro-2025-06-10/endpoints"},"reasoning":{"mandatory":false},"aliases":["openai/o3-pro-2025-06-10"]},{"id":"google/gemini-2.5-pro-preview","canonical_slug":"google/gemini-2.5-pro-preview-06-05","hugging_face_id":"","name":"Google: Gemini 2.5 Pro Preview 06-05","created":1749137257,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy...","context_length":1048576,"architecture":{"modality":"text+image+file+audio->text","input_modalities":["file","image","text","audio"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":"0.0000012500","completion":"0.0000100000","image":"0.0000012500","audio":"0.0000012500","web_search":"0.0140000000","internal_reasoning":"0.00001","input_cache_read":"0.0000001250","input_cache_write":"0.0000003750"},"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2025-01-31","expiration_date":null,"links":{"details":"/api/v1/models/google/gemini-2.5-pro-preview-06-05/endpoints"},"reasoning":{"mandatory":true},"aliases":["google/gemini-2.5-pro-preview-06-05"]},{"id":"deepseek/deepseek-r1-0528","canonical_slug":"deepseek/deepseek-r1-0528","hugging_face_id":"deepseek-ai/DeepSeek-R1-0528","name":"DeepSeek: R1 0528","created":1748455170,"description":"May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active...","context_length":163840,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":"deepseek-r1"},"pricing":{"prompt":"0.0000005000","completion":"0.0000021500","input_cache_read":"0.0000003500"},"top_provider":{"context_length":163840,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/deepseek/deepseek-r1-0528/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1190,"win_rate":53.4,"rank":49},{"arena":"models","category":"codecategories","elo":1189,"win_rate":52.6,"rank":57},{"arena":"models","category":"dataviz","elo":1222,"win_rate":60.7,"rank":36},{"arena":"models","category":"gamedev","elo":1165,"win_rate":49.5,"rank":59},{"arena":"models","category":"svg","elo":1096,"win_rate":48.7,"rank":50},{"arena":"models","category":"uicomponent","elo":1160,"win_rate":55.1,"rank":57},{"arena":"models","category":"website","elo":1193,"win_rate":52.7,"rank":56}]},"reasoning":{"mandatory":true},"aliases":["deepseek/deepseek-r1-0528"]},{"id":"anthropic/claude-opus-4","canonical_slug":"anthropic/claude-4-opus-20250522","hugging_face_id":"","name":"Anthropic: Claude Opus 4","created":1747931245,"description":"Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows. It sets new benchmarks in...","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.0000150000","completion":"0.0000750000","web_search":"0.0100000000","input_cache_read":"0.0000015000","input_cache_write":"0.0000187500","input_cache_write_1h":"0.00003"},"top_provider":{"context_length":200000,"max_completion_tokens":32000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","stop","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-01-31","expiration_date":null,"links":{"details":"/api/v1/models/anthropic/claude-4-opus-20250522/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1217,"win_rate":57.7,"rank":39},{"arena":"models","category":"codecategories","elo":1211,"win_rate":55.6,"rank":49},{"arena":"models","category":"dataviz","elo":1189,"win_rate":57.9,"rank":53},{"arena":"models","category":"gamedev","elo":1238,"win_rate":59.9,"rank":36},{"arena":"models","category":"svg","elo":1185,"win_rate":57.7,"rank":35},{"arena":"models","category":"uicomponent","elo":1209,"win_rate":59.2,"rank":45},{"arena":"models","category":"website","elo":1209,"win_rate":54.6,"rank":51}]},"reasoning":{"mandatory":false},"aliases":["anthropic/claude-4-opus-20250522"]},{"id":"anthropic/claude-sonnet-4","canonical_slug":"anthropic/claude-4-sonnet-20250522","hugging_face_id":"","name":"Anthropic: Claude Sonnet 4","created":1747930371,"description":"Claude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability. Achieving state-of-the-art performance on SWE-bench (72.7%),...","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.0000030000","completion":"0.0000150000","web_search":"0.0100000000","input_cache_read":"0.0000003000","input_cache_write":"0.0000037500","input_cache_write_1h":"0.000006"},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","stop","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-01-31","expiration_date":null,"links":{"details":"/api/v1/models/anthropic/claude-4-sonnet-20250522/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1218,"win_rate":57.8,"rank":38},{"arena":"models","category":"codecategories","elo":1192,"win_rate":53.4,"rank":55},{"arena":"models","category":"dataviz","elo":1196,"win_rate":55.8,"rank":50},{"arena":"models","category":"gamedev","elo":1206,"win_rate":54.9,"rank":45},{"arena":"models","category":"svg","elo":1136,"win_rate":51.1,"rank":45},{"arena":"models","category":"uicomponent","elo":1185,"win_rate":58,"rank":53},{"arena":"models","category":"website","elo":1189,"win_rate":52.4,"rank":58}]},"reasoning":{"mandatory":false},"aliases":["anthropic/claude-4-sonnet-20250522"]},{"id":"mistralai/mistral-medium-3","canonical_slug":"mistralai/mistral-medium-3","hugging_face_id":"","name":"Mistral: Mistral Medium 3","created":1746627341,"description":"Mistral Medium 3 is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost...","context_length":131072,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":"0.0000004000","completion":"0.0000020000","input_cache_read":"0.0000000400"},"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":0.3},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/mistralai/mistral-medium-3/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1161,"win_rate":54.6,"rank":57},{"arena":"models","category":"codecategories","elo":1119,"win_rate":48.1,"rank":77},{"arena":"models","category":"dataviz","elo":1075,"win_rate":45.7,"rank":81},{"arena":"models","category":"gamedev","elo":1085,"win_rate":45.3,"rank":81},{"arena":"models","category":"uicomponent","elo":1083,"win_rate":50,"rank":76},{"arena":"models","category":"website","elo":1123,"win_rate":47.7,"rank":80}]},"aliases":["mistralai/mistral-medium-3"]},{"id":"google/gemini-2.5-pro-preview-05-06","canonical_slug":"google/gemini-2.5-pro-preview-03-25","hugging_face_id":"","name":"Google: Gemini 2.5 Pro Preview 05-06","created":1746578513,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","file","audio","video"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":"0.0000012500","completion":"0.0000100000","image":"0.0000012500","audio":"0.0000012500","web_search":"0.0140000000","internal_reasoning":"0.00001","input_cache_read":"0.0000001250","input_cache_write":"0.0000003750"},"top_provider":{"context_length":1048576,"max_completion_tokens":65535,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-01-31","expiration_date":null,"links":{"details":"/api/v1/models/google/gemini-2.5-pro-preview-03-25/endpoints"},"reasoning":{"mandatory":true},"aliases":["google/gemini-2.5-pro-preview-03-25"]},{"id":"arcee-ai/virtuoso-large","canonical_slug":"arcee-ai/virtuoso-large","hugging_face_id":"","name":"Arcee AI: Virtuoso Large","created":1746478885,"description":"Virtuoso‑Large is Arcee's top‑tier general‑purpose LLM at 72 B parameters, tuned to tackle cross‑domain reasoning, creative writing and enterprise QA. Unlike many 70 B peers, it retains the 128 k...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000007500","completion":"0.0000012000"},"top_provider":{"context_length":131072,"max_completion_tokens":64000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","max_tokens","min_p","presence_penalty","repetition_penalty","stop","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/arcee-ai/virtuoso-large/endpoints"},"aliases":["arcee-ai/virtuoso-large"]},{"id":"qwen/qwen3-30b-a3b","canonical_slug":"qwen/qwen3-30b-a3b-04-28","hugging_face_id":"Qwen/Qwen3-30B-A3B","name":"Qwen: Qwen3 30B A3B","created":1745878604,"description":"Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":"qwen3"},"pricing":{"prompt":"0.0000001200","completion":"0.0000005000"},"top_provider":{"context_length":40960,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-30b-a3b-04-28/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"codecategories","elo":992,"win_rate":37.5,"rank":98},{"arena":"models","category":"dataviz","elo":1011,"win_rate":39,"rank":88},{"arena":"models","category":"gamedev","elo":965,"win_rate":33.8,"rank":99},{"arena":"models","category":"uicomponent","elo":1002,"win_rate":42.4,"rank":89},{"arena":"models","category":"website","elo":998,"win_rate":37.7,"rank":100}]},"reasoning":{"mandatory":false,"default_enabled":true},"aliases":["qwen/qwen3-30b-a3b-04-28"]},{"id":"qwen/qwen3-8b","canonical_slug":"qwen/qwen3-8b-04-28","hugging_face_id":"Qwen/Qwen3-8B","name":"Qwen: Qwen3 8B","created":1745876632,"description":"Qwen3-8B is a dense 8.2B parameter causal language model from the Qwen3 series, designed for both reasoning-heavy tasks and efficient dialogue. It supports seamless switching between \"thinking\" mode for math,...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":"qwen3"},"pricing":{"prompt":"0.0000000500","completion":"0.0000004000","input_cache_read":"0.0000000500"},"top_provider":{"context_length":40960,"max_completion_tokens":8192,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":0.6,"top_p":0.95,"top_k":20,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-8b-04-28/endpoints"},"reasoning":{"mandatory":false,"default_enabled":true},"aliases":["qwen/qwen3-8b-04-28"]},{"id":"qwen/qwen3-14b","canonical_slug":"qwen/qwen3-14b-04-28","hugging_face_id":"Qwen/Qwen3-14B","name":"Qwen: Qwen3 14B","created":1745876478,"description":"Qwen3-14B is a dense 14.8B parameter causal language model from the Qwen3 series, designed for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for...","context_length":131702,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":"qwen3"},"pricing":{"prompt":"0.0000001000","completion":"0.0000002400"},"top_provider":{"context_length":40960,"max_completion_tokens":40960,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-14b-04-28/endpoints"},"reasoning":{"mandatory":false},"aliases":["qwen/qwen3-14b-04-28"]},{"id":"qwen/qwen3-32b","canonical_slug":"qwen/qwen3-32b-04-28","hugging_face_id":"Qwen/Qwen3-32B","name":"Qwen: Qwen3 32B","created":1745875945,"description":"Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":"qwen3"},"pricing":{"prompt":"0.0000000800","completion":"0.0000002800"},"top_provider":{"context_length":40960,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-32b-04-28/endpoints"},"reasoning":{"mandatory":false},"aliases":["qwen/qwen3-32b-04-28"]},{"id":"qwen/qwen3-235b-a22b","canonical_slug":"qwen/qwen3-235b-a22b-04-28","hugging_face_id":"Qwen/Qwen3-235B-A22B","name":"Qwen: Qwen3 235B A22B","created":1745875757,"description":"Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":"qwen3"},"pricing":{"prompt":"0.0000004550","completion":"0.0000018200"},"top_provider":{"context_length":131072,"max_completion_tokens":8192,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","presence_penalty","reasoning","response_format","seed","temperature","tool_choice","tools","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen3-235b-a22b-04-28/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":937,"win_rate":24.5,"rank":93},{"arena":"models","category":"codecategories","elo":1053,"win_rate":38.3,"rank":90},{"arena":"models","category":"dataviz","elo":1045,"win_rate":41,"rank":85},{"arena":"models","category":"gamedev","elo":996,"win_rate":33.1,"rank":97},{"arena":"models","category":"uicomponent","elo":1018,"win_rate":39.1,"rank":87},{"arena":"models","category":"website","elo":1074,"win_rate":40.5,"rank":88}]},"reasoning":{"mandatory":false},"aliases":["qwen/qwen3-235b-a22b-04-28"]},{"id":"openai/o4-mini-high","canonical_slug":"openai/o4-mini-high-2025-04-16","hugging_face_id":"","name":"OpenAI: o4 Mini High","created":1744824212,"description":"OpenAI o4-mini-high is the same model as [o4-mini](/openai/o4-mini) with reasoning_effort set to high. OpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining...","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000011000","completion":"0.0000044000","web_search":"0.0100000000","input_cache_read":"0.0000002750"},"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2024-06-30","expiration_date":null,"links":{"details":"/api/v1/models/openai/o4-mini-high-2025-04-16/endpoints"},"reasoning":{"mandatory":true,"supported_efforts":["high"],"default_effort":"high"},"aliases":["openai/o4-mini-high-2025-04-16"]},{"id":"openai/o3","canonical_slug":"openai/o3-2025-04-16","hugging_face_id":"","name":"OpenAI: o3","created":1744823457,"description":"o3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following....","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000020000","completion":"0.0000080000","web_search":"0.0100000000","input_cache_read":"0.0000005000"},"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-06-30","expiration_date":null,"links":{"details":"/api/v1/models/openai/o3-2025-04-16/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"codecategories","elo":1070,"win_rate":51.9,"rank":85},{"arena":"models","category":"dataviz","elo":1200,"win_rate":48.1,"rank":49},{"arena":"models","category":"gamedev","elo":1101,"win_rate":56.9,"rank":78},{"arena":"models","category":"uicomponent","elo":1073,"win_rate":53.3,"rank":79},{"arena":"models","category":"website","elo":1080,"win_rate":53.8,"rank":87}]},"reasoning":{"mandatory":false},"aliases":["openai/o3-2025-04-16"]},{"id":"openai/o4-mini","canonical_slug":"openai/o4-mini-2025-04-16","hugging_face_id":"","name":"OpenAI: o4 Mini","created":1744820942,"description":"OpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning...","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000011000","completion":"0.0000044000","web_search":"0.0100000000","input_cache_read":"0.0000002750"},"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-06-30","expiration_date":null,"links":{"details":"/api/v1/models/openai/o4-mini-2025-04-16/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":932,"win_rate":34,"rank":94},{"arena":"models","category":"codecategories","elo":1026,"win_rate":46.4,"rank":94},{"arena":"models","category":"dataviz","elo":1034,"win_rate":50,"rank":87},{"arena":"models","category":"gamedev","elo":1070,"win_rate":50,"rank":82},{"arena":"models","category":"uicomponent","elo":1040,"win_rate":46.9,"rank":83},{"arena":"models","category":"website","elo":1029,"win_rate":47.1,"rank":95}]},"reasoning":{"mandatory":false},"aliases":["openai/o4-mini-2025-04-16"]},{"id":"openai/gpt-4.1","canonical_slug":"openai/gpt-4.1-2025-04-14","hugging_face_id":"","name":"OpenAI: GPT-4.1","created":1744651385,"description":"GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and...","context_length":1047576,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000020000","completion":"0.0000080000","web_search":"0.0100000000","input_cache_read":"0.0000005000"},"top_provider":{"context_length":1047576,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["max_completion_tokens","max_tokens","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-06-30","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-4.1-2025-04-14/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":928,"win_rate":30.9,"rank":95},{"arena":"models","category":"codecategories","elo":1076,"win_rate":50.9,"rank":84},{"arena":"models","category":"dataviz","elo":1147,"win_rate":59.5,"rank":65},{"arena":"models","category":"gamedev","elo":1147,"win_rate":59.1,"rank":65},{"arena":"models","category":"uicomponent","elo":1057,"win_rate":49.7,"rank":80},{"arena":"models","category":"website","elo":1082,"win_rate":52.3,"rank":86}]},"aliases":["openai/gpt-4.1-2025-04-14"]},{"id":"openai/gpt-4.1-mini","canonical_slug":"openai/gpt-4.1-mini-2025-04-14","hugging_face_id":"","name":"OpenAI: GPT-4.1 Mini","created":1744651381,"description":"GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard...","context_length":1047576,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000004000","completion":"0.0000016000","web_search":"0.0100000000","input_cache_read":"0.0000001000"},"top_provider":{"context_length":1047576,"max_completion_tokens":32768,"is_moderated":true},"per_request_limits":null,"supported_parameters":["max_completion_tokens","max_tokens","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-06-30","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-4.1-mini-2025-04-14/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":915,"win_rate":30.5,"rank":96},{"arena":"models","category":"codecategories","elo":1044,"win_rate":47.5,"rank":92},{"arena":"models","category":"dataviz","elo":1077,"win_rate":49.2,"rank":79},{"arena":"models","category":"gamedev","elo":1138,"win_rate":58.5,"rank":70},{"arena":"models","category":"uicomponent","elo":1017,"win_rate":45.4,"rank":88},{"arena":"models","category":"website","elo":1041,"win_rate":47.8,"rank":93}]},"aliases":["openai/gpt-4.1-mini-2025-04-14"]},{"id":"openai/gpt-4.1-nano","canonical_slug":"openai/gpt-4.1-nano-2025-04-14","hugging_face_id":"","name":"OpenAI: GPT-4.1 Nano","created":1744651369,"description":"For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million...","context_length":1047576,"architecture":{"modality":"text+image+file->text","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000001000","completion":"0.0000004000","web_search":"0.0100000000","input_cache_read":"0.0000000250"},"top_provider":{"context_length":1047576,"max_completion_tokens":32768,"is_moderated":true},"per_request_limits":null,"supported_parameters":["max_completion_tokens","max_tokens","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-06-30","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-4.1-nano-2025-04-14/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1003,"win_rate":46,"rank":89},{"arena":"models","category":"codecategories","elo":1013,"win_rate":47.3,"rank":97},{"arena":"models","category":"dataviz","elo":935,"win_rate":41.1,"rank":98},{"arena":"models","category":"gamedev","elo":1037,"win_rate":49.6,"rank":90},{"arena":"models","category":"uicomponent","elo":972,"win_rate":43.9,"rank":92},{"arena":"models","category":"website","elo":1017,"win_rate":48.1,"rank":98}]},"aliases":["openai/gpt-4.1-nano-2025-04-14"]},{"id":"meta-llama/llama-4-maverick","canonical_slug":"meta-llama/llama-4-maverick-17b-128e-instruct","hugging_face_id":"meta-llama/Llama-4-Maverick-17B-128E-Instruct","name":"Meta: Llama 4 Maverick","created":1743881822,"description":"Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward...","context_length":1048576,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Llama4","instruct_type":null},"pricing":{"prompt":"0.0000001500","completion":"0.0000006000"},"top_provider":{"context_length":1048576,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-08-31","expiration_date":null,"links":{"details":"/api/v1/models/meta-llama/llama-4-maverick-17b-128e-instruct/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":978,"win_rate":40.2,"rank":91},{"arena":"models","category":"codecategories","elo":930,"win_rate":35.8,"rank":102},{"arena":"models","category":"dataviz","elo":926,"win_rate":38.4,"rank":100},{"arena":"models","category":"gamedev","elo":905,"win_rate":33.7,"rank":103},{"arena":"models","category":"uicomponent","elo":955,"win_rate":40.8,"rank":94},{"arena":"models","category":"website","elo":915,"win_rate":34.4,"rank":105}],"artificial_analysis":{"intelligence_index":14.3,"coding_index":16.3,"agentic_index":1.3}},"aliases":["meta-llama/llama-4-maverick-17b-128e-instruct"]},{"id":"meta-llama/llama-4-scout","canonical_slug":"meta-llama/llama-4-scout-17b-16e-instruct","hugging_face_id":"meta-llama/Llama-4-Scout-17B-16E-Instruct","name":"Meta: Llama 4 Scout","created":1743881519,"description":"Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input...","context_length":10000000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Llama4","instruct_type":null},"pricing":{"prompt":"0.0000001000","completion":"0.0000003000"},"top_provider":{"context_length":327680,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-08-31","expiration_date":null,"links":{"details":"/api/v1/models/meta-llama/llama-4-scout-17b-16e-instruct/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"codecategories","elo":840,"win_rate":26.6,"rank":106},{"arena":"models","category":"dataviz","elo":939,"win_rate":39.3,"rank":96},{"arena":"models","category":"gamedev","elo":840,"win_rate":27.4,"rank":105},{"arena":"models","category":"uicomponent","elo":824,"win_rate":25.5,"rank":100},{"arena":"models","category":"website","elo":794,"win_rate":22.7,"rank":112}],"artificial_analysis":{"intelligence_index":10,"coding_index":8.2,"agentic_index":1.1}},"aliases":["meta-llama/llama-4-scout-17b-16e-instruct"]},{"id":"deepseek/deepseek-chat-v3-0324","canonical_slug":"deepseek/deepseek-chat-v3-0324","hugging_face_id":"deepseek-ai/DeepSeek-V3-0324","name":"DeepSeek: DeepSeek V3 0324","created":1742824755,"description":"DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team. It succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well...","context_length":163840,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":null},"pricing":{"prompt":"0.0000002000","completion":"0.0000007700","input_cache_read":"0.0000001350"},"top_provider":{"context_length":163840,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-07-31","expiration_date":null,"links":{"details":"/api/v1/models/deepseek/deepseek-chat-v3-0324/endpoints"},"aliases":["deepseek/deepseek-chat-v3-0324"]},{"id":"google/gemma-3-12b-it","canonical_slug":"google/gemma-3-12b-it","hugging_face_id":"google/gemma-3-12b-it","name":"Google: Gemma 3 12B","created":1741902625,"description":"Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities,...","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":"gemma"},"pricing":{"prompt":"0.0000000500","completion":"0.0000001500"},"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-08-31","expiration_date":null,"links":{"details":"/api/v1/models/google/gemma-3-12b-it/endpoints"},"aliases":["google/gemma-3-12b-it"]},{"id":"google/gemma-3-27b-it","canonical_slug":"google/gemma-3-27b-it","hugging_face_id":"google/gemma-3-27b-it","name":"Google: Gemma 3 27B","created":1741756359,"description":"Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities,...","context_length":131072,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":"gemma"},"pricing":{"prompt":"0.0000000800","completion":"0.0000001600"},"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2024-08-31","expiration_date":null,"links":{"details":"/api/v1/models/google/gemma-3-27b-it/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":7.4,"coding_index":10.1,"agentic_index":0.3}},"aliases":["google/gemma-3-27b-it"]},{"id":"mistralai/mistral-saba","canonical_slug":"mistralai/mistral-saba-2502","hugging_face_id":"","name":"Mistral: Saba","created":1739803239,"description":"Mistral Saba is a 24B-parameter language model specifically designed for the Middle East and South Asia, delivering accurate and contextually relevant responses while maintaining efficient performance. Trained on curated regional...","context_length":32768,"architecture":{"modality":"text+file->text","input_modalities":["text","file"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":"0.0000002000","completion":"0.0000006000","input_cache_read":"0.0000000200"},"top_provider":{"context_length":32768,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":0.3},"supported_voices":null,"knowledge_cutoff":"2024-09-30","expiration_date":null,"links":{"details":"/api/v1/models/mistralai/mistral-saba-2502/endpoints"},"aliases":["mistralai/mistral-saba-2502"]},{"id":"openai/o3-mini-high","canonical_slug":"openai/o3-mini-high-2025-01-31","hugging_face_id":"","name":"OpenAI: o3 Mini High","created":1739372611,"description":"OpenAI o3-mini-high is the same model as [o3-mini](/openai/o3-mini) with reasoning_effort set to high. o3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and...","context_length":200000,"architecture":{"modality":"text+file->text","input_modalities":["text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000011000","completion":"0.0000044000","web_search":"0.0100000000","input_cache_read":"0.0000005500"},"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2023-10-31","expiration_date":null,"links":{"details":"/api/v1/models/openai/o3-mini-high-2025-01-31/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":null,"coding_index":42.1,"agentic_index":null}},"reasoning":{"mandatory":true,"supported_efforts":["high"],"default_effort":"high"},"aliases":["openai/o3-mini-high-2025-01-31"]},{"id":"qwen/qwen-plus","canonical_slug":"qwen/qwen-plus-2025-01-25","hugging_face_id":"","name":"Qwen: Qwen-Plus","created":1738409840,"description":"Qwen-Plus, based on the Qwen2.5 foundation model, is a 131K context model with a balanced performance, speed, and cost combination.","context_length":1000000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":null},"pricing":{"prompt":"0.0000002600","completion":"0.0000007800","input_cache_read":"0.0000000520","input_cache_write":"0.0000003250"},"top_provider":{"context_length":1000000,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["logprobs","max_tokens","presence_penalty","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2025-03-31","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen-plus-2025-01-25/endpoints"},"aliases":["qwen/qwen-plus-2025-01-25"]},{"id":"openai/o3-mini","canonical_slug":"openai/o3-mini-2025-01-31","hugging_face_id":"","name":"OpenAI: o3 Mini","created":1738351721,"description":"OpenAI o3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding. This model supports the `reasoning_effort` parameter, which can be set to...","context_length":200000,"architecture":{"modality":"text+file->text","input_modalities":["text","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000011000","completion":"0.0000044000","web_search":"0.0100000000","input_cache_read":"0.0000005500"},"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2023-10-31","expiration_date":null,"links":{"details":"/api/v1/models/openai/o3-mini-2025-01-31/endpoints"},"reasoning":{"mandatory":false},"aliases":["openai/o3-mini-2025-01-31"]},{"id":"deepseek/deepseek-r1","canonical_slug":"deepseek/deepseek-r1","hugging_face_id":"deepseek-ai/DeepSeek-R1","name":"DeepSeek: R1","created":1737381095,"description":"DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass....","context_length":163840,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":"deepseek-r1"},"pricing":{"prompt":"0.0000007000","completion":"0.0000025000"},"top_provider":{"context_length":64000,"max_completion_tokens":16000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","max_completion_tokens","max_tokens","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2024-07-31","expiration_date":null,"links":{"details":"/api/v1/models/deepseek/deepseek-r1/endpoints"},"reasoning":{"mandatory":true},"aliases":["deepseek/deepseek-r1"]},{"id":"deepseek/deepseek-chat","canonical_slug":"deepseek/deepseek-chat-v3","hugging_face_id":"deepseek-ai/DeepSeek-V3","name":"DeepSeek: DeepSeek V3","created":1735241320,"description":"DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":null},"pricing":{"prompt":"0.0000002002","completion":"0.0000008001"},"top_provider":{"context_length":128000,"max_completion_tokens":16000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-07-31","expiration_date":null,"links":{"details":"/api/v1/models/deepseek/deepseek-chat-v3/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1166,"win_rate":50.7,"rank":54},{"arena":"models","category":"codecategories","elo":1159,"win_rate":48.5,"rank":67},{"arena":"models","category":"dataviz","elo":1141,"win_rate":51.4,"rank":69},{"arena":"models","category":"gamedev","elo":1121,"win_rate":43.9,"rank":74},{"arena":"models","category":"svg","elo":1034,"win_rate":38.8,"rank":66},{"arena":"models","category":"uicomponent","elo":1150,"win_rate":52.8,"rank":62},{"arena":"models","category":"website","elo":1164,"win_rate":48.5,"rank":65}]},"aliases":["deepseek/deepseek-chat-v3"]},{"id":"openai/o1","canonical_slug":"openai/o1-2024-12-17","hugging_face_id":"","name":"OpenAI: o1","created":1734459999,"description":"The latest and strongest model family from OpenAI, o1 is designed to spend more time thinking before responding. The o1 model series is trained with large-scale reinforcement learning to reason...","context_length":200000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000150000","completion":"0.0000600000","web_search":"0.0100000000","input_cache_read":"0.0000075000"},"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2023-10-31","expiration_date":null,"links":{"details":"/api/v1/models/openai/o1-2024-12-17/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":null,"coding_index":39.7,"agentic_index":null}},"reasoning":{"mandatory":false},"aliases":["openai/o1-2024-12-17"]},{"id":"meta-llama/llama-3.3-70b-instruct","canonical_slug":"meta-llama/llama-3.3-70b-instruct","hugging_face_id":"meta-llama/Llama-3.3-70B-Instruct","name":"Meta: Llama 3.3 70B Instruct","created":1733506137,"description":"The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":"0.0000001000","completion":"0.0000003200"},"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2023-12-31","expiration_date":null,"links":{"details":"/api/v1/models/meta-llama/llama-3.3-70b-instruct/endpoints"},"aliases":["meta-llama/llama-3.3-70b-instruct"]},{"id":"amazon/nova-lite-v1","canonical_slug":"amazon/nova-lite-v1","hugging_face_id":"","name":"Amazon: Nova Lite 1.0","created":1733437363,"description":"Amazon Nova Lite 1.0 is a very low-cost multimodal model from Amazon that focused on fast processing of image, video, and text inputs to generate text output. Amazon Nova Lite...","context_length":300000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Nova","instruct_type":null},"pricing":{"prompt":"0.0000000600","completion":"0.0000002400"},"top_provider":{"context_length":300000,"max_completion_tokens":5120,"is_moderated":true},"per_request_limits":null,"supported_parameters":["max_tokens","stop","temperature","tools","top_k","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-10-31","expiration_date":null,"links":{"details":"/api/v1/models/amazon/nova-lite-v1/endpoints"},"aliases":["amazon/nova-lite-v1"]},{"id":"amazon/nova-micro-v1","canonical_slug":"amazon/nova-micro-v1","hugging_face_id":"","name":"Amazon: Nova Micro 1.0","created":1733437237,"description":"Amazon Nova Micro 1.0 is a text-only model that delivers the lowest latency responses in the Amazon Nova family of models at a very low cost. With a context length...","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Nova","instruct_type":null},"pricing":{"prompt":"0.0000000350","completion":"0.0000001400"},"top_provider":{"context_length":128000,"max_completion_tokens":5120,"is_moderated":true},"per_request_limits":null,"supported_parameters":["max_tokens","stop","temperature","tools","top_k","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-10-31","expiration_date":null,"links":{"details":"/api/v1/models/amazon/nova-micro-v1/endpoints"},"aliases":["amazon/nova-micro-v1"]},{"id":"amazon/nova-pro-v1","canonical_slug":"amazon/nova-pro-v1","hugging_face_id":"","name":"Amazon: Nova Pro 1.0","created":1733436303,"description":"Amazon Nova Pro 1.0 is a capable multimodal model from Amazon focused on providing a combination of accuracy, speed, and cost for a wide range of tasks. As of December...","context_length":300000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Nova","instruct_type":null},"pricing":{"prompt":"0.0000008000","completion":"0.0000032000"},"top_provider":{"context_length":300000,"max_completion_tokens":5120,"is_moderated":true},"per_request_limits":null,"supported_parameters":["max_tokens","stop","temperature","tools","top_k","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-10-31","expiration_date":null,"links":{"details":"/api/v1/models/amazon/nova-pro-v1/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"website","elo":840,"win_rate":21.4,"rank":111}]},"aliases":["amazon/nova-pro-v1"]},{"id":"openai/gpt-4o-2024-11-20","canonical_slug":"openai/gpt-4o-2024-11-20","hugging_face_id":"","name":"OpenAI: GPT-4o (2024-11-20)","created":1732127594,"description":"The 2024-11-20 version of GPT-4o offers a leveled-up creative writing ability with more natural, engaging, and tailored writing to improve relevance & readability. It’s also better at working with uploaded...","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000025000","completion":"0.0000100000","input_cache_read":"0.0000012500"},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p","web_search_options"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2023-10-31","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-4o-2024-11-20/endpoints"},"aliases":["openai/gpt-4o-2024-11-20"]},{"id":"mistralai/mistral-large-2407","canonical_slug":"mistralai/mistral-large-2407","hugging_face_id":"","name":"Mistral Large 2407","created":1731978415,"description":"This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/)....","context_length":131072,"architecture":{"modality":"text+file->text","input_modalities":["text","file"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":"0.0000020000","completion":"0.0000060000","input_cache_read":"0.0000002000"},"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":0.3},"supported_voices":null,"knowledge_cutoff":"2024-03-31","expiration_date":null,"links":{"details":"/api/v1/models/mistralai/mistral-large-2407/endpoints"},"aliases":["mistralai/mistral-large-2407"]},{"id":"thedrummer/unslopnemo-12b","canonical_slug":"thedrummer/unslopnemo-12b","hugging_face_id":"TheDrummer/UnslopNemo-12B-v4.1","name":"TheDrummer: UnslopNemo 12B","created":1731103448,"description":"UnslopNemo v4.1 is the latest addition from the creator of Rocinante, designed for adventure writing and role-play scenarios.","context_length":32768,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"mistral"},"pricing":{"prompt":"0.0000004000","completion":"0.0000004000"},"top_provider":{"context_length":32768,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logprobs","max_tokens","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-04-30","expiration_date":null,"links":{"details":"/api/v1/models/thedrummer/unslopnemo-12b/endpoints"},"aliases":["thedrummer/unslopnemo-12b"]},{"id":"qwen/qwen-2.5-7b-instruct","canonical_slug":"qwen/qwen-2.5-7b-instruct","hugging_face_id":"Qwen/Qwen2.5-7B-Instruct","name":"Qwen: Qwen2.5 7B Instruct","created":1729036800,"description":"Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2: - Significantly more knowledge and has greatly improved capabilities in coding and...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":"chatml"},"pricing":{"prompt":"0.0000000400","completion":"0.0000001000"},"top_provider":{"context_length":32768,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":"2024-06-30","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen-2.5-7b-instruct/endpoints"},"aliases":["qwen/qwen-2.5-7b-instruct"]},{"id":"qwen/qwen-2.5-72b-instruct","canonical_slug":"qwen/qwen-2.5-72b-instruct","hugging_face_id":"Qwen/Qwen2.5-72B-Instruct","name":"Qwen2.5 72B Instruct","created":1726704000,"description":"Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2: - Significantly more knowledge and has greatly improved capabilities in coding and...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":"chatml"},"pricing":{"prompt":"0.0000003600","completion":"0.0000004000"},"top_provider":{"context_length":32768,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-06-30","expiration_date":null,"links":{"details":"/api/v1/models/qwen/qwen-2.5-72b-instruct/endpoints"},"aliases":["qwen/qwen-2.5-72b-instruct"]},{"id":"cohere/command-r-08-2024","canonical_slug":"cohere/command-r-08-2024","hugging_face_id":null,"name":"Cohere: Command R (08-2024)","created":1724976000,"description":"command-r-08-2024 is an update of the [Command R](/models/cohere/command-r) with improved performance for multilingual retrieval-augmented generation (RAG) and tool use. More broadly, it is better at math, code and reasoning and...","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Cohere","instruct_type":null},"pricing":{"prompt":"0.0000001500","completion":"0.0000006000"},"top_provider":{"context_length":128000,"max_completion_tokens":4000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-03-31","expiration_date":null,"links":{"details":"/api/v1/models/cohere/command-r-08-2024/endpoints"},"aliases":["cohere/command-r-08-2024"]},{"id":"cohere/command-r-plus-08-2024","canonical_slug":"cohere/command-r-plus-08-2024","hugging_face_id":null,"name":"Cohere: Command R+ (08-2024)","created":1724976000,"description":"command-r-plus-08-2024 is an update of the [Command R+](/models/cohere/command-r-plus) with roughly 50% higher throughput and 25% lower latencies as compared to the previous Command R+ version, while keeping the hardware footprint...","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Cohere","instruct_type":null},"pricing":{"prompt":"0.0000025000","completion":"0.0000100000"},"top_provider":{"context_length":128000,"max_completion_tokens":4000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2024-03-31","expiration_date":null,"links":{"details":"/api/v1/models/cohere/command-r-plus-08-2024/endpoints"},"aliases":["cohere/command-r-plus-08-2024"]},{"id":"sao10k/l3.1-euryale-70b","canonical_slug":"sao10k/l3.1-euryale-70b","hugging_face_id":"Sao10K/L3.1-70B-Euryale-v2.2","name":"Sao10K: Llama 3.1 Euryale 70B v2.2","created":1724803200,"description":"Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":"0.0000008500","completion":"0.0000008500"},"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2023-12-31","expiration_date":null,"links":{"details":"/api/v1/models/sao10k/l3.1-euryale-70b/endpoints"},"aliases":["sao10k/l3.1-euryale-70b"]},{"id":"openai/gpt-4o-2024-08-06","canonical_slug":"openai/gpt-4o-2024-08-06","hugging_face_id":null,"name":"OpenAI: GPT-4o (2024-08-06)","created":1722902400,"description":"The 2024-08-06 version of GPT-4o offers improved performance in structured outputs, with the ability to supply a JSON schema in the respone_format. Read more [here](https://openai.com/index/introducing-structured-outputs-in-the-api/). GPT-4o (\"o\" for \"omni\") is...","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000025000","completion":"0.0000100000","input_cache_read":"0.0000012500"},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_completion_tokens","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p","web_search_options"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2023-10-31","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-4o-2024-08-06/endpoints"},"aliases":["openai/gpt-4o-2024-08-06"]},{"id":"meta-llama/llama-3.1-8b-instruct","canonical_slug":"meta-llama/llama-3.1-8b-instruct","hugging_face_id":"meta-llama/Meta-Llama-3.1-8B-Instruct","name":"Meta: Llama 3.1 8B Instruct","created":1721692800,"description":"Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient. It has demonstrated strong performance compared to...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":"0.0000000200","completion":"0.0000000300"},"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2023-12-31","expiration_date":null,"links":{"details":"/api/v1/models/meta-llama/llama-3.1-8b-instruct/endpoints"},"aliases":["meta-llama/llama-3.1-8b-instruct"]},{"id":"meta-llama/llama-3.1-70b-instruct","canonical_slug":"meta-llama/llama-3.1-70b-instruct","hugging_face_id":"meta-llama/Meta-Llama-3.1-70B-Instruct","name":"Meta: Llama 3.1 70B Instruct","created":1721692800,"description":"Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases. It has demonstrated strong...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"llama3"},"pricing":{"prompt":"0.0000004000","completion":"0.0000004000"},"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2023-12-31","expiration_date":null,"links":{"details":"/api/v1/models/meta-llama/llama-3.1-70b-instruct/endpoints"},"aliases":["meta-llama/llama-3.1-70b-instruct"]},{"id":"mistralai/mistral-nemo","canonical_slug":"mistralai/mistral-nemo","hugging_face_id":"mistralai/Mistral-Nemo-Instruct-2407","name":"Mistral: Mistral Nemo","created":1721347200,"description":"A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA. The model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese,...","context_length":131072,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"mistral"},"pricing":{"prompt":"0.0000000200","completion":"0.0000000300"},"top_provider":{"context_length":131072,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":0.3},"supported_voices":null,"knowledge_cutoff":"2024-04-30","expiration_date":null,"links":{"details":"/api/v1/models/mistralai/mistral-nemo/endpoints"},"aliases":["mistralai/mistral-nemo"]},{"id":"openai/gpt-4o-mini","canonical_slug":"openai/gpt-4o-mini","hugging_face_id":null,"name":"OpenAI: GPT-4o-mini","created":1721260800,"description":"GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs. As their most advanced small model, it is many multiples more affordable...","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000001500","completion":"0.0000006000","input_cache_read":"0.0000000750"},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_completion_tokens","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p","web_search_options"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2023-10-31","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-4o-mini/endpoints"},"aliases":["openai/gpt-4o-mini"]},{"id":"openai/gpt-4o-mini-2024-07-18","canonical_slug":"openai/gpt-4o-mini-2024-07-18","hugging_face_id":null,"name":"OpenAI: GPT-4o-mini (2024-07-18)","created":1721260800,"description":"GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs. As their most advanced small model, it is many multiples more affordable...","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000001500","completion":"0.0000006000","input_cache_read":"0.0000000750"},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p","web_search_options"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2023-10-31","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-4o-mini-2024-07-18/endpoints"},"aliases":["openai/gpt-4o-mini-2024-07-18"]},{"id":"openai/gpt-4o-2024-05-13","canonical_slug":"openai/gpt-4o-2024-05-13","hugging_face_id":null,"name":"OpenAI: GPT-4o (2024-05-13)","created":1715558400,"description":"GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as...","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000050000","completion":"0.0000150000"},"top_provider":{"context_length":128000,"max_completion_tokens":4096,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_completion_tokens","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p","web_search_options"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2023-10-31","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-4o-2024-05-13/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":null,"coding_index":24.2,"agentic_index":null}},"aliases":["openai/gpt-4o-2024-05-13"]},{"id":"openai/gpt-4o","canonical_slug":"openai/gpt-4o","hugging_face_id":null,"name":"OpenAI: GPT-4o","created":1715558400,"description":"GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as...","context_length":128000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000025000","completion":"0.0000100000"},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_completion_tokens","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p","web_search_options"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2023-10-31","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-4o/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":947,"win_rate":39.2,"rank":92},{"arena":"models","category":"codecategories","elo":911,"win_rate":34.8,"rank":104},{"arena":"models","category":"dataviz","elo":900,"win_rate":36,"rank":101},{"arena":"models","category":"gamedev","elo":973,"win_rate":42.3,"rank":98},{"arena":"models","category":"uicomponent","elo":942,"win_rate":38.1,"rank":96},{"arena":"models","category":"website","elo":875,"win_rate":31.5,"rank":109}]},"aliases":["openai/gpt-4o"]},{"id":"mistralai/mixtral-8x22b-instruct","canonical_slug":"mistralai/mixtral-8x22b-instruct","hugging_face_id":"mistralai/Mixtral-8x22B-Instruct-v0.1","name":"Mistral: Mixtral 8x22B Instruct","created":1713312000,"description":"Mistral's official instruct fine-tuned version of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b). It uses 39B active parameters out of 141B, offering unparalleled cost efficiency for its size. Its strengths include: - strong math, coding,...","context_length":65536,"architecture":{"modality":"text+file->text","input_modalities":["text","file"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"mistral"},"pricing":{"prompt":"0.0000020000","completion":"0.0000060000","input_cache_read":"0.0000002000"},"top_provider":{"context_length":65536,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":0.3},"supported_voices":null,"knowledge_cutoff":"2024-01-31","expiration_date":null,"links":{"details":"/api/v1/models/mistralai/mixtral-8x22b-instruct/endpoints"},"aliases":["mistralai/mixtral-8x22b-instruct"]},{"id":"openai/gpt-4-turbo","canonical_slug":"openai/gpt-4-turbo","hugging_face_id":null,"name":"OpenAI: GPT-4 Turbo","created":1712620800,"description":"The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to December 2023.","context_length":128000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000100000","completion":"0.0000300000"},"top_provider":{"context_length":128000,"max_completion_tokens":4096,"is_moderated":true},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2023-12-31","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-4-turbo/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":null,"coding_index":21.5,"agentic_index":null}},"aliases":["openai/gpt-4-turbo"]},{"id":"anthropic/claude-3-haiku","canonical_slug":"anthropic/claude-3-haiku","hugging_face_id":null,"name":"Anthropic: Claude 3 Haiku","created":1710288000,"description":"Claude 3 Haiku is Anthropic's fastest and most compact model for\nnear-instant responsiveness. Quick and accurate targeted performance.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-haiku)\n\n#multimodal","context_length":200000,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.0000002500","completion":"0.0000012500","web_search":"0.0100000000","input_cache_read":"0.0000000300","input_cache_write":"0.0000003000","input_cache_write_1h":"0.0000005"},"top_provider":{"context_length":200000,"max_completion_tokens":4096,"is_moderated":true},"per_request_limits":null,"supported_parameters":["max_tokens","stop","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2023-08-31","expiration_date":null,"links":{"details":"/api/v1/models/anthropic/claude-3-haiku/endpoints"},"aliases":["anthropic/claude-3-haiku"]},{"id":"mistralai/mistral-large","canonical_slug":"mistralai/mistral-large","hugging_face_id":null,"name":"Mistral Large","created":1708905600,"description":"This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/)....","context_length":128000,"architecture":{"modality":"text+file->text","input_modalities":["text","file"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":null},"pricing":{"prompt":"0.0000020000","completion":"0.0000060000","input_cache_read":"0.0000002000"},"top_provider":{"context_length":128000,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":0.3},"supported_voices":null,"knowledge_cutoff":"2024-11-30","expiration_date":null,"links":{"details":"/api/v1/models/mistralai/mistral-large/endpoints"},"aliases":["mistralai/mistral-large"]},{"id":"openai/gpt-4-turbo-preview","canonical_slug":"openai/gpt-4-turbo-preview","hugging_face_id":null,"name":"OpenAI: GPT-4 Turbo Preview","created":1706140800,"description":"The preview GPT-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Dec 2023. **Note:** heavily rate limited by OpenAI while...","context_length":128000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000100000","completion":"0.0000300000"},"top_provider":{"context_length":128000,"max_completion_tokens":4096,"is_moderated":true},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2023-12-31","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-4-turbo-preview/endpoints"},"aliases":["openai/gpt-4-turbo-preview"]},{"id":"openai/gpt-3.5-turbo-0613","canonical_slug":"openai/gpt-3.5-turbo-0613","hugging_face_id":null,"name":"OpenAI: GPT-3.5 Turbo (older v0613)","created":1706140800,"description":"GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.","context_length":4095,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000010000","completion":"0.0000020000"},"top_provider":{"context_length":4095,"max_completion_tokens":4096,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_completion_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2021-09-30","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-3.5-turbo-0613/endpoints"},"aliases":["openai/gpt-3.5-turbo-0613"]},{"id":"openai/gpt-3.5-turbo-16k","canonical_slug":"openai/gpt-3.5-turbo-16k","hugging_face_id":null,"name":"OpenAI: GPT-3.5 Turbo 16k","created":1693180800,"description":"This model offers four times the context length of gpt-3.5-turbo, allowing it to support approximately 20 pages of text in a single request at a higher cost. Training data: up...","context_length":16385,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000030000","completion":"0.0000040000"},"top_provider":{"context_length":16385,"max_completion_tokens":4096,"is_moderated":true},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_completion_tokens","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2021-09-30","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-3.5-turbo-16k/endpoints"},"aliases":["openai/gpt-3.5-turbo-16k"]},{"id":"openai/gpt-3.5-turbo","canonical_slug":"openai/gpt-3.5-turbo","hugging_face_id":null,"name":"OpenAI: GPT-3.5 Turbo","created":1685232000,"description":"GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.","context_length":16385,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000005000","completion":"0.0000015000"},"top_provider":{"context_length":16385,"max_completion_tokens":4096,"is_moderated":true},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2021-09-30","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-3.5-turbo/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":null,"coding_index":10.7,"agentic_index":null}},"aliases":["openai/gpt-3.5-turbo"]},{"id":"openai/gpt-4","canonical_slug":"openai/gpt-4","hugging_face_id":null,"name":"OpenAI: GPT-4","created":1685232000,"description":"OpenAI's flagship model, GPT-4 is a large-scale multimodal language model capable of solving difficult problems with greater accuracy than previous models due to its broader general knowledge and advanced reasoning...","context_length":8191,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000300000","completion":"0.0000600000"},"top_provider":{"context_length":8191,"max_completion_tokens":4096,"is_moderated":true},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_completion_tokens","max_tokens","presence_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"default_parameters":{},"supported_voices":null,"knowledge_cutoff":"2021-09-30","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-4/endpoints"},"benchmarks":{"design_arena":[],"artificial_analysis":{"intelligence_index":null,"coding_index":13.1,"agentic_index":null}},"aliases":["openai/gpt-4"]},{"id":"stepfun/step-3.7-flash:free","canonical_slug":"stepfun/step-3.7-flash-20260528","hugging_face_id":"stepfun-ai/Step-3.7-Flash","name":"StepFun: Step 3.7 Flash","created":1779985069,"description":"Step 3.7 Flash is StepFun's latest high-efficiency multimodal Mixture-of-Experts model. It pairs a 196B-parameter language backbone with a vision encoder for native image and video understanding, activating roughly 11B parameters...","context_length":256000,"architecture":{"modality":"text+image+video->text","input_modalities":["text","image","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0","completion":"0","input_cache_read":"0"},"top_provider":{"context_length":256000,"max_completion_tokens":256000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/stepfun/step-3.7-flash-20260528/endpoints"},"benchmarks":{"design_arena":[{"arena":"models","category":"3d","elo":1195,"win_rate":42.9,"rank":47},{"arena":"models","category":"asciiart","elo":1222,"win_rate":52.1,"rank":15},{"arena":"models","category":"codecategories","elo":1216,"win_rate":45.7,"rank":47},{"arena":"models","category":"dataviz","elo":1210,"win_rate":46.8,"rank":42},{"arena":"models","category":"gamedev","elo":1210,"win_rate":41.9,"rank":43},{"arena":"models","category":"svg","elo":1123,"win_rate":40.3,"rank":47},{"arena":"models","category":"uicomponent","elo":1212,"win_rate":44.7,"rank":44},{"arena":"models","category":"website","elo":1225,"win_rate":47.2,"rank":43}],"artificial_analysis":{"intelligence_index":29.7,"coding_index":37.3,"agentic_index":21.5}},"reasoning":{"mandatory":true,"supported_efforts":["high","medium","low"],"default_effort":"medium"},"aliases":[],"synthesizedFreeVariant":true},{"id":"zai/GLM-5.1:US","canonical_slug":"zai/GLM-5.1:US","hugging_face_id":null,"name":"zai/GLM-5.1","created":0,"description":"Model by lukealonso - 436.8B parameters","context_length":202752,"architecture":{"modality":"text","tokenizer":"transformers","instruct_type":"","parameter_count":753910024032},"pricing":{"prompt":"0.0000012000","completion":"0.0000044000","input_cache_read":"0.0000002500","image":"0.0000000000"},"top_provider":{"context_length":202752,"max_completion_tokens":131072,"is_moderated":false},"supported_parameters":["chat_template_kwargs","frequency_penalty","logit_bias","logprobs","max_completion_tokens","max_tokens","min_p","parallel_tool_calls","presence_penalty","prompt_cache_key","reasoning_effort","repetition_penalty","response_format","safety_identifier","seed","service_tier","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p","user"],"default_parameters":{},"aliases":["zai/GLM-5.1:US"]},{"id":"qwen3.7-plus","canonical_slug":"qwen3.7-plus","hugging_face_id":null,"name":"qwen3.7-plus","created":1782136201,"description":"","context_length":1000000,"architecture":{},"pricing":{"prompt":"0.0000004000","completion":"0.0000016000","input_cache_read":"0.0000000400"},"top_provider":{"context_length":1000000},"supported_parameters":[],"default_parameters":{},"aliases":["qwen3.7-plus"]}]}