Spaces:
Running
Running
"Model Name" "Link" "Comment" "Group" "Med. Len." "Med. Resp. Len." "Parameter Size (B)" "Type" "Model Type" "Think" "Overall" "Content Generation" "Editing" "Data Analysis" "Reasoning" "Hallucination" "Safety" "Repetition" "Summarization" "Translation" "Multi-Turn" | |
"GPT-5 (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5" "Reasoning: medium" "GPT" "" "" "" "Proprietary" "Think" "On" "70.73" "71.0" "74.38" "76.49" "79.75" "64.94" "56.2" "82.86" "80.16" "69.38" "54.36" | |
"o3-pro (Reasoning: medium)" "https://platform.openai.com/docs/models/o3-pro" "Reasoning: medium" "GPT" "" "" "" "Proprietary" "Think" "On" "66.47" "72.5" "70.31" "75.7" "83.88" "64.37" "33.88" "74.29" "65.48" "64.33" "48.32" | |
"Claude 4 Opus (20250514) (think)" "https://www.anthropic.com/claude/opus" "version: 20250514" "Claude" "" "" "" "Proprietary" "Hybrid" "On" "63.29" "60.75" "59.69" "73.31" "69.83" "78.74" "53.72" "55.71" "65.48" "65.45" "48.99" | |
"Claude 4.1 Opus (20250805) (think)" "https://www.anthropic.com/claude/opus" "version: 20250805" "Claude" "" "" "" "Proprietary" "Hybrid" "On" "63.24" "61.25" "60.0" "78.49" "72.73" "77.01" "56.2" "57.14" "61.9" "62.64" "46.98" | |
"GPT-5 mini (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5-mini" "Reasoning: medium" "GPT" "" "" "" "Proprietary" "Think" "On" "62.56" "68.0" "62.5" "74.9" "76.86" "55.17" "47.93" "44.29" "74.6" "56.18" "45.3" | |
"Claude 4 Sonnet (20250514) (think)" "https://www.anthropic.com/claude/sonnet" "version: 20250514" "Claude" "" "" "" "Proprietary" "Hybrid" "On" "61.8" "58.0" "58.44" "76.49" "67.77" "79.31" "57.02" "44.29" "65.08" "62.92" "44.97" | |
"o3" "https://platform.openai.com/docs/models/o3" "" "GPT" "" "" "" "Proprietary" "Think" "On" "60.91" "68.75" "60.0" "73.31" "79.34" "54.02" "34.71" "64.29" "60.71" "55.06" "46.98" | |
"Gemini 2.5 Pro" "https://deepmind.google/models/gemini/pro/" "" "Gemini" "" "" "" "Proprietary" "Think" "On" "59.34" "54.0" "60.94" "78.88" "73.14" "63.22" "17.36" "52.86" "67.86" "53.93" "52.68" | |
"Grok-4" "https://x.ai/news/grok-4" "temperature: 0.6 | |
top-p: 0.95" "Grok" "" "" "" "Proprietary" "Think" "On" "58.74" "61.0" "66.25" "72.51" "63.22" "66.09" "16.53" "58.57" "66.27" "54.21" "44.3" | |
"Gemini 2.5 Flash" "https://deepmind.google/models/gemini/flash/" "" "Gemini" "" "" "" "Proprietary" "Hybrid" "On" "58.62" "57.25" "62.19" "70.52" "72.31" "56.9" "28.93" "47.14" "68.65" "55.06" "46.98" | |
"o4-mini" "https://platform.openai.com/docs/models/o4-mini" "" "GPT" "" "" "" "Proprietary" "Think" "On" "57.57" "67.25" "61.25" "71.71" "75.62" "45.4" "39.67" "44.29" "59.92" "47.19" "41.95" | |
"Qwen3 235B A22B Thinking 2507" "https://huggingface.co/Qwen/Qwen3-235B-A22B-Thinking-2507" "temperature: 0.6 | |
top-p: 0.95" "Qwen" "2404.5" "423.0" "235.0" "Open" "Think" "On" "55.48" "57.5" "53.12" "73.31" "75.21" "55.17" "25.62" "35.71" "55.56" "56.18" "40.27" | |
"GPT-5 nano (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5-nano" "Reasoning: medium" "GPT" "" "" "" "Proprietary" "Think" "On" "55.39" "63.5" "47.19" "68.92" "75.21" "55.17" "52.07" "34.29" "63.49" "40.73" "42.95" | |
"GLM-4.5 FP8 (think)" "https://huggingface.co/zai-org/GLM-4.5-FP8" "temperature: 0.6 | |
top-p: 0.95" "GLM" "1442.0" "604.0" "355.0" "Open" "Hybrid" "On" "54.03" "60.75" "53.75" "68.92" "74.38" "47.13" "33.06" "41.43" "60.32" "46.07" "35.91" | |
"Qwen3 235B A22B Instruct 2507" "https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507" "temperature: 0.7 | |
top-p: 0.8" "Qwen" "433.0" "433.0" "235.0" "Open" "Instruct" "Off" "52.94" "58.0" "49.69" "68.13" "73.97" "55.17" "45.45" "30.0" "55.95" "38.48" "41.61" | |
"DeepSeek V3.1 (think)" "https://huggingface.co/deepseek-ai/DeepSeek-V3.1" "temperature: 0.6 | |
top-p: 0.95" "DeepSeek" "710.5" "356.0" "671.0" "Open" "Hybrid" "On" "51.45" "52.0" "50.0" "67.33" "69.83" "50.0" "33.88" "35.71" "59.52" "41.85" "40.27" | |
"gpt-oss-120B (Reasoning: medium)" "https://huggingface.co/openai/gpt-oss-120b" "Reasoning: medium | |
temperature: 1.0 | |
top-p: 1.0" "GPT" "759.5" "370.5" "117.0" "Open" "Think" "On" "49.11" "58.5" "48.44" "68.92" "69.83" "41.38" "39.67" "25.71" "50.79" "35.67" "32.21" | |
"DeepSeek R1 (0528) (top_p: 0.95, temp:0.6)" "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528" "version: 0528 | |
temperature: 0.6 | |
top-p: 0.95" "DeepSeek" "1177.5" "554.0" "671.0" "Open" "Think" "On" "48.79" "49.75" "50.0" "65.34" "59.09" "48.85" "38.02" "32.86" "57.94" "36.52" "38.93" | |
"Gauss2.3 Hybrid" "" "" "Gauss" "546.0" "308.0" "" "Proprietary" "Hybrid" "On" "46.58" "52.0" "46.25" "59.76" "66.94" "41.95" "34.71" "25.71" "53.17" "34.55" "33.22" | |
"DeepSeek V3 (0324) (top_p: 0.95, temp:1.3)" "https://huggingface.co/deepseek-ai/DeepSeek-V3-0324" "version: 0324 | |
temperature: 1.3 | |
top-p: 0.95" "DeepSeek" "408.0" "408.0" "671.0" "Open" "Instruct" "Off" "45.09" "46.25" "45.0" "58.96" "60.33" "41.95" "21.49" "30.0" "55.95" "38.48" "33.22" | |
"Qwen3 32B (think)" "https://huggingface.co/Qwen/Qwen3-32B" "temperature: 0.6 | |
top-p: 0.95" "Qwen" "1113.0" "390.0" "32.8" "Open" "Hybrid" "On" "44.44" "52.25" "41.56" "68.92" "66.53" "35.06" "19.83" "25.71" "46.43" "30.9" "32.89" | |
"A.X 4.0" "https://huggingface.co/skt/A.X-4.0" "" "SKT" "412.5" "412.5" "71.9" "Open" "Instruct" "Off" "41.59" "56.0" "43.75" "43.43" "42.56" "40.23" "15.7" "24.29" "53.97" "33.43" "32.21" | |
"gpt-oss-20B (Reasoning: medium)" "https://huggingface.co/openai/gpt-oss-20b" "Reasoning: medium | |
temperature: 1.0 | |
top-p: 1.0" "GPT" "953.5" "326.0" "21.0" "Open" "Think" "On" "41.18" "52.0" "40.0" "61.35" "65.7" "43.1" "41.32" "22.86" "36.51" "20.51" "22.82" | |
"EXAONE 4.0 32B (think)" "https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B" "temperature: 0.6 | |
top-p: 0.95" "Exaone" "1274.5" "503.0" "32.0" "Open" "Hybrid" "On" "33.82" "34.25" "29.38" "56.97" "57.44" "24.71" "27.27" "17.14" "38.49" "18.54" "25.5" | |
"HyperCLOVAX SEED Think 14B (think)" "https://huggingface.co/naver-hyperclovax/HyperCLOVAX-SEED-Think-14B" "temperature: 0.5 | |
top-p: 0.6" "HCX" "1444.0" "382.5" "14.7" "Open" "Hybrid" "On" "31.84" "35.0" "26.56" "53.78" "58.68" "27.59" "26.45" "17.14" "29.76" "17.13" "20.47" | |
"Solar Pro Preview (top_p:0.95, temp: 0.7)" "https://huggingface.co/upstage/solar-pro-preview-instruct" "temperature: 0.7 | |
top-p: 0.95" "Solar" "260.0" "260.0" "22.0" "Open" "Instruct" "Off" "20.73" "28.0" "24.69" "16.73" "19.42" "17.24" "28.1" "11.43" "31.35" "13.76" "11.74" | |
"Mi:dm 2.0 Base Instruct" "https://huggingface.co/K-intelligence/Midm-2.0-Base-Instruct" "temperature: 0.8 | |
top-p: 0.7" "kt" "316.0" "316.0" "11.5" "Open" "Instruct" "Off" "20.25" "21.75" "17.5" "16.73" "18.6" "27.59" "59.5" "14.29" "25.4" "12.64" "11.41" | |
"Kanana 1.5 15.7B A3B Instruct" "https://huggingface.co/kakaocorp/kanana-1.5-15.7b-a3b-instruct" "temperature: 1.0 | |
top-p: 0.95" "kakao" "414.0" "414.0" "15.7" "Open" "Instruct" "Off" "11.71" "14.25" "10.62" "13.55" "11.16" "22.41" "22.31" "4.29" "11.9" "6.74" "5.37" | |