File size: 7,239 Bytes
8a254d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
"Model Name"	"Link"	"Comment"	"Group"	"Med. Len."	"Med. Resp. Len."	"Parameter Size (B)"	"Type"	"Model Type"	"Think"	"Overall"	"KO"	"EN"	"JA"	"ZH"	"PL"	"DE"	"PT"	"ES"	"FR"	"IT"	"RU"	"VI"
"GPT-5 (Reasoning: medium)"	"https://platform.openai.com/docs/models/gpt-5"	"Reasoning: medium"	"GPT"	""	""	""	"Proprietary"	"Think"	"On"	"70.73"	"64.72"	"65.83"	"71.69"	"67.68"	"72.78"	"71.27"	"73.74"	"75.68"	"72.83"	"77.05"	"70.79"	"75.61"
"o3-pro (Reasoning: medium)"	"https://platform.openai.com/docs/models/o3-pro"	"Reasoning: medium"	"GPT"	""	""	""	"Proprietary"	"Think"	"On"	"66.47"	"63.61"	"63.61"	"69.28"	"65.24"	"63.89"	"64.09"	"68.16"	"69.19"	"70.11"	"72.13"	"62.36"	"71.95"
"Claude 4 Opus (20250514) (think)"	"https://www.anthropic.com/claude/opus"	"version: 20250514"	"Claude"	""	""	""	"Proprietary"	"Hybrid"	"On"	"63.29"	"57.5"	"62.5"	"64.46"	"62.8"	"59.44"	"65.19"	"65.92"	"60.54"	"65.22"	"65.57"	"65.17"	"72.56"
"Claude 4.1 Opus (20250805) (think)"	"https://www.anthropic.com/claude/opus"	"version: 20250805"	"Claude"	""	""	""	"Proprietary"	"Hybrid"	"On"	"63.24"	"58.33"	"61.39"	"60.84"	"64.02"	"61.67"	"66.85"	"68.16"	"61.08"	"65.76"	"66.67"	"65.73"	"65.24"
"GPT-5 mini (Reasoning: medium)"	"https://platform.openai.com/docs/models/gpt-5-mini"	"Reasoning: medium"	"GPT"	""	""	""	"Proprietary"	"Think"	"On"	"62.56"	"57.5"	"56.39"	"62.65"	"62.2"	"63.89"	"60.22"	"66.48"	"67.03"	"70.11"	"67.76"	"66.29"	"60.98"
"Claude 4 Sonnet (20250514) (think)"	"https://www.anthropic.com/claude/sonnet"	"version: 20250514"	"Claude"	""	""	""	"Proprietary"	"Hybrid"	"On"	"61.8"	"54.17"	"59.17"	"63.86"	"64.63"	"59.44"	"61.33"	"64.8"	"62.16"	"65.22"	"67.21"	"66.29"	"64.02"
"o3"	"https://platform.openai.com/docs/models/o3"	""	"GPT"	""	""	""	"Proprietary"	"Think"	"On"	"60.91"	"57.5"	"59.17"	"61.45"	"58.54"	"61.11"	"64.09"	"60.89"	"62.16"	"63.59"	"65.03"	"54.49"	"68.29"
"Gemini 2.5 Pro"	"https://deepmind.google/models/gemini/pro/"	""	"Gemini"	""	""	""	"Proprietary"	"Think"	"On"	"59.34"	"53.61"	"57.78"	"59.04"	"57.93"	"57.22"	"56.91"	"60.89"	"63.24"	"67.93"	"62.3"	"61.24"	"60.98"
"Grok-4"	"https://x.ai/news/grok-4"	"temperature: 0.6
top-p: 0.95"	"Grok"	""	""	""	"Proprietary"	"Think"	"On"	"58.74"	"57.78"	"56.67"	"62.65"	"60.37"	"58.33"	"60.22"	"59.78"	"56.22"	"62.5"	"60.66"	"52.25"	"60.98"
"Gemini 2.5 Flash"	"https://deepmind.google/models/gemini/flash/"	""	"Gemini"	""	""	""	"Proprietary"	"Hybrid"	"On"	"58.62"	"51.11"	"56.39"	"62.05"	"56.71"	"62.78"	"60.77"	"61.45"	"60.0"	"63.04"	"57.92"	"64.04"	"56.71"
"o4-mini"	"https://platform.openai.com/docs/models/o4-mini"	""	"GPT"	""	""	""	"Proprietary"	"Think"	"On"	"57.57"	"54.17"	"55.0"	"62.05"	"59.76"	"52.78"	"58.56"	"63.69"	"55.68"	"57.61"	"60.66"	"56.74"	"60.98"
"Qwen3 235B A22B Thinking 2507"	"https://huggingface.co/Qwen/Qwen3-235B-A22B-Thinking-2507"	"temperature: 0.6
top-p: 0.95"	"Qwen"	"2404.5"	"423.0"	"235.0"	"Open"	"Think"	"On"	"55.48"	"49.17"	"53.33"	"56.02"	"58.54"	"50.56"	"62.43"	"60.89"	"52.97"	"56.52"	"60.11"	"53.93"	"60.37"
"GPT-5 nano (Reasoning: medium)"	"https://platform.openai.com/docs/models/gpt-5-nano"	"Reasoning: medium"	"GPT"	""	""	""	"Proprietary"	"Think"	"On"	"55.39"	"51.94"	"53.89"	"57.23"	"53.66"	"55.56"	"58.01"	"59.78"	"54.59"	"56.52"	"59.02"	"57.3"	"51.83"
"GLM-4.5 FP8 (think)"	"https://huggingface.co/zai-org/GLM-4.5-FP8"	"temperature: 0.6
top-p: 0.95"	"GLM"	"1442.0"	"604.0"	"355.0"	"Open"	"Hybrid"	"On"	"54.03"	"46.94"	"54.17"	"60.84"	"58.54"	"48.89"	"55.8"	"54.75"	"48.11"	"57.61"	"57.92"	"57.87"	"54.88"
"Qwen3 235B A22B Instruct 2507"	"https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507"	"temperature: 0.7
top-p: 0.8"	"Qwen"	"433.0"	"433.0"	"235.0"	"Open"	"Instruct"	"Off"	"52.94"	"46.67"	"55.28"	"53.61"	"59.15"	"46.11"	"51.38"	"55.87"	"54.59"	"53.26"	"56.28"	"54.49"	"53.05"
"DeepSeek V3.1 (think)"	"https://huggingface.co/deepseek-ai/DeepSeek-V3.1"	"temperature: 0.6
top-p: 0.95"	"DeepSeek"	"710.5"	"356.0"	"671.0"	"Open"	"Hybrid"	"On"	"51.45"	"44.44"	"48.33"	"56.63"	"48.78"	"48.89"	"55.25"	"53.07"	"52.97"	"56.52"	"57.92"	"50.56"	"54.27"
"gpt-oss-120B (Reasoning: medium)"	"https://huggingface.co/openai/gpt-oss-120b"	"Reasoning: medium
temperature: 1.0
top-p: 1.0"	"GPT"	"759.5"	"370.5"	"117.0"	"Open"	"Think"	"On"	"49.11"	"46.67"	"51.39"	"51.81"	"47.56"	"45.0"	"51.38"	"54.75"	"50.27"	"51.63"	"47.54"	"46.07"	"45.12"
"DeepSeek R1 (0528) (top_p: 0.95, temp:0.6)"	"https://huggingface.co/deepseek-ai/DeepSeek-R1-0528"	"version: 0528
temperature: 0.6
top-p: 0.95"	"DeepSeek"	"1177.5"	"554.0"	"671.0"	"Open"	"Think"	"On"	"48.79"	"42.22"	"49.44"	"50.0"	"53.05"	"47.22"	"48.62"	"50.28"	"48.11"	"51.63"	"54.1"	"44.38"	"53.05"
"Gauss2.3 Hybrid"	""	""	"Gauss"	"546.0"	"308.0"	""	"Proprietary"	"Hybrid"	"On"	"46.58"	"39.72"	"45.56"	"48.8"	"48.17"	"45.0"	"44.2"	"53.63"	"45.41"	"52.17"	"51.91"	"44.94"	"47.56"
"DeepSeek V3 (0324) (top_p: 0.95, temp:1.3)"	"https://huggingface.co/deepseek-ai/DeepSeek-V3-0324"	"version: 0324
temperature: 1.3
top-p: 0.95"	"DeepSeek"	"408.0"	"408.0"	"671.0"	"Open"	"Instruct"	"Off"	"45.09"	"37.5"	"43.61"	"46.99"	"51.22"	"45.56"	"44.75"	"44.69"	"44.32"	"48.91"	"49.18"	"44.94"	"49.39"
"Qwen3 32B (think)"	"https://huggingface.co/Qwen/Qwen3-32B"	"temperature: 0.6
top-p: 0.95"	"Qwen"	"1113.0"	"390.0"	"32.8"	"Open"	"Hybrid"	"On"	"44.44"	"38.89"	"41.67"	"48.8"	"50.0"	"38.33"	"46.41"	"44.69"	"44.86"	"44.57"	"50.82"	"46.07"	"47.56"
"A.X 4.0"	"https://huggingface.co/skt/A.X-4.0"	""	"SKT"	"412.5"	"412.5"	"71.9"	"Open"	"Instruct"	"Off"	"41.59"	"38.89"	"41.11"	"43.98"	"49.39"	"36.11"	"45.86"	"43.58"	"44.32"	"39.67"	"43.17"	"39.89"	"36.59"
"gpt-oss-20B (Reasoning: medium)"	"https://huggingface.co/openai/gpt-oss-20b"	"Reasoning: medium
temperature: 1.0
top-p: 1.0"	"GPT"	"953.5"	"326.0"	"21.0"	"Open"	"Think"	"On"	"41.18"	"36.67"	"42.78"	"45.78"	"45.73"	"37.78"	"35.91"	"41.9"	"39.46"	"51.09"	"40.44"	"38.76"	"41.46"
"EXAONE 4.0 32B (think)"	"https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B"	"temperature: 0.6
top-p: 0.95"	"Exaone"	"1274.5"	"503.0"	"32.0"	"Open"	"Hybrid"	"On"	"33.82"	"33.61"	"38.33"	"28.92"	"35.98"	"26.11"	"35.91"	"34.08"	"38.92"	"35.33"	"33.88"	"28.09"	"31.71"
"HyperCLOVAX SEED Think 14B (think)"	"https://huggingface.co/naver-hyperclovax/HyperCLOVAX-SEED-Think-14B"	"temperature: 0.5
top-p: 0.6"	"HCX"	"1444.0"	"382.5"	"14.7"	"Open"	"Hybrid"	"On"	"31.84"	"32.22"	"37.22"	"31.93"	"38.41"	"27.78"	"32.6"	"30.17"	"29.19"	"32.07"	"33.33"	"25.28"	"26.22"
"Solar Pro Preview (top_p:0.95, temp: 0.7)"	"https://huggingface.co/upstage/solar-pro-preview-instruct"	"temperature: 0.7
top-p: 0.95"	"Solar"	"260.0"	"260.0"	"22.0"	"Open"	"Instruct"	"Off"	"20.73"	"9.72"	"22.22"	"21.08"	"24.39"	"9.44"	"18.23"	"24.02"	"29.73"	"29.89"	"33.33"	"22.47"	"12.8"
"Mi:dm 2.0 Base Instruct"	"https://huggingface.co/K-intelligence/Midm-2.0-Base-Instruct"	"temperature: 0.8
top-p: 0.7"	"kt"	"316.0"	"316.0"	"11.5"	"Open"	"Instruct"	"Off"	"20.25"	"26.39"	"26.39"	"17.47"	"26.83"	"13.33"	"18.78"	"20.67"	"16.22"	"20.65"	"21.31"	"12.92"	"9.15"
"Kanana 1.5 15.7B A3B Instruct"	"https://huggingface.co/kakaocorp/kanana-1.5-15.7b-a3b-instruct"	"temperature: 1.0
top-p: 0.95"	"kakao"	"414.0"	"414.0"	"15.7"	"Open"	"Instruct"	"Off"	"11.71"	"21.11"	"20.28"	"10.84"	"15.24"	"5.56"	"7.73"	"8.94"	"9.19"	"8.15"	"5.46"	"5.06"	"4.88"