Spaces:
Running
Running
Update Dataset Groups and add Circumflex Dataset
#7
by
abrek
- opened
- data.py +3 -2
- results/zero-shot/CerebrumTech__cere-llama-3-8b-tr.json +6 -0
- results/zero-shot/Llama-3.3-70B-Instruct.json +6 -0
- results/zero-shot/Ministral-8B-Instruct.json +6 -0
- results/zero-shot/Mistral-7B-Instruct-v0.3.json +6 -0
- results/zero-shot/Mistral-7B-v0.3.json +6 -0
- results/zero-shot/Mixtral-8x7B-Instruct-v0.1.json +6 -0
- results/zero-shot/Qwen2.5-0.5B-Instruct.json +6 -0
- results/zero-shot/Qwen2.5-0.5B.json +6 -0
- results/zero-shot/Qwen2.5-1.5B-Instruct.json +6 -0
- results/zero-shot/Qwen2.5-1.5B.json +6 -0
- results/zero-shot/Qwen2.5-14B-Instruct.json +6 -0
- results/zero-shot/Qwen2.5-14B.json +6 -0
- results/zero-shot/Qwen2.5-3B-Instruct.json +6 -0
- results/zero-shot/Qwen2.5-3B.json +6 -0
- results/zero-shot/Qwen2.5-7B-Instruct.json +6 -0
- results/zero-shot/Qwen2.5-7B.json +6 -0
- results/zero-shot/aya-23-35B.json +6 -0
- results/zero-shot/aya-23-8b.json +6 -0
- results/zero-shot/aya-expanse-32b.json +6 -0
- results/zero-shot/aya-expanse-8b.json +6 -0
- results/zero-shot/aya101.json +6 -0
- results/zero-shot/commencis-7b.json +6 -0
- results/zero-shot/kanarya-2b.json +6 -0
- results/zero-shot/llama-3-8b-instruct.json +6 -0
- results/zero-shot/llama-3-8b.json +6 -0
- results/zero-shot/llama-3.1-8b-instruct.json +6 -0
- results/zero-shot/llama-3.1-8b.json +6 -0
- results/zero-shot/llama-3.2-1b.json +6 -0
- results/zero-shot/llama-3.2-3b-instruct.json +6 -0
- results/zero-shot/llama-3.2-3b.json +6 -0
- results/zero-shot/mistral-7b.json +6 -0
- results/zero-shot/trendyol-7b.json +6 -0
- results/zero-shot/turna.json +6 -0
data.py
CHANGED
@@ -52,6 +52,7 @@ DATASET_TASK_DICT = {
|
|
52 |
'turkce_atasozleri': Tasks.MULTIPLE_CHOICE,
|
53 |
'turkishmmlu':Tasks.MULTIPLE_CHOICE,
|
54 |
'bilmecebench':Tasks.MULTIPLE_CHOICE,
|
|
|
55 |
|
56 |
# fact-checking, not sure whether these are multi-choice
|
57 |
# 'trclaim19': Tasks.MULTIPLE_CHOICE,
|
@@ -93,8 +94,8 @@ DATASET_GROUPS = {
|
|
93 |
'description': 'Turkish splits of SQuAD-like datasets XQuAD and TQUAD.',
|
94 |
},
|
95 |
'MCQA': {
|
96 |
-
'datasets': ['xcopa_tr', 'exams_tr', 'belebele_tr', 'turkce_atasozleri'] + [x for x in DATASET_TASK_DICT.keys() if x.startswith('turkish_plu')],
|
97 |
-
'description': 'Multiple Choice Question Answering datasets: XCOPA, Exams, Belebele, Turkish PLU,
|
98 |
},
|
99 |
'TC': {
|
100 |
'datasets': ['sts_tr', 'offenseval_tr', 'news_cat', 'ironytr', ],
|
|
|
52 |
'turkce_atasozleri': Tasks.MULTIPLE_CHOICE,
|
53 |
'turkishmmlu':Tasks.MULTIPLE_CHOICE,
|
54 |
'bilmecebench':Tasks.MULTIPLE_CHOICE,
|
55 |
+
'circumflex_tr':Tasks.MULTIPLE_CHOICE,
|
56 |
|
57 |
# fact-checking, not sure whether these are multi-choice
|
58 |
# 'trclaim19': Tasks.MULTIPLE_CHOICE,
|
|
|
94 |
'description': 'Turkish splits of SQuAD-like datasets XQuAD and TQUAD.',
|
95 |
},
|
96 |
'MCQA': {
|
97 |
+
'datasets': ['xcopa_tr', 'exams_tr', 'belebele_tr', 'turkce_atasozleri', 'bilmecebench', 'turkishmmlu', 'circumflex_tr'] + [x for x in DATASET_TASK_DICT.keys() if x.startswith('turkish_plu')],
|
98 |
+
'description': 'Multiple Choice Question Answering datasets: XCOPA, Exams, Belebele, Turkish PLU, Turkce-Atasozleri, BilmeceBench, TurkishMMLU, and CircumflexTR'
|
99 |
},
|
100 |
'TC': {
|
101 |
'datasets': ['sts_tr', 'offenseval_tr', 'news_cat', 'ironytr', ],
|
results/zero-shot/CerebrumTech__cere-llama-3-8b-tr.json
CHANGED
@@ -186,6 +186,12 @@
|
|
186 |
"task": "multiple_choice",
|
187 |
"acc": 0.3393665158371041,
|
188 |
"acc_norm": 0.3393665158371041
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
}
|
190 |
]
|
191 |
}
|
|
|
186 |
"task": "multiple_choice",
|
187 |
"acc": 0.3393665158371041,
|
188 |
"acc_norm": 0.3393665158371041
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"name": "circumflex_tr",
|
192 |
+
"task": "multiple_choice",
|
193 |
+
"acc": 0.6142857142857143,
|
194 |
+
"acc_norm": 0.6142857142857143
|
195 |
}
|
196 |
]
|
197 |
}
|
results/zero-shot/Llama-3.3-70B-Instruct.json
CHANGED
@@ -188,6 +188,12 @@
|
|
188 |
"task": "multiple_choice",
|
189 |
"acc": 0.7262443438914027,
|
190 |
"acc_norm": 0.7262443438914027
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
}
|
192 |
]
|
193 |
}
|
|
|
188 |
"task": "multiple_choice",
|
189 |
"acc": 0.7262443438914027,
|
190 |
"acc_norm": 0.7262443438914027
|
191 |
+
},
|
192 |
+
{
|
193 |
+
"name": "circumflex_tr",
|
194 |
+
"task": "multiple_choice",
|
195 |
+
"acc": 0.6,
|
196 |
+
"acc_norm": 0.6
|
197 |
}
|
198 |
]
|
199 |
}
|
results/zero-shot/Ministral-8B-Instruct.json
CHANGED
@@ -185,6 +185,12 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.248868778280543,
|
187 |
"acc_norm": 0.248868778280543
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.248868778280543,
|
187 |
"acc_norm": 0.248868778280543
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "circumflex_tr",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.5571428571428572,
|
193 |
+
"acc_norm": 0.5571428571428572
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/Mistral-7B-Instruct-v0.3.json
CHANGED
@@ -185,6 +185,12 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.2149321266968326,
|
187 |
"acc_norm": 0.2149321266968326
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.2149321266968326,
|
187 |
"acc_norm": 0.2149321266968326
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "circumflex_tr",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.5142857142857142,
|
193 |
+
"acc_norm": 0.5142857142857142
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/Mistral-7B-v0.3.json
CHANGED
@@ -185,6 +185,12 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.23529411764705882,
|
187 |
"acc_norm": 0.23529411764705882
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.23529411764705882,
|
187 |
"acc_norm": 0.23529411764705882
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "circumflex_tr",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.5142857142857142,
|
193 |
+
"acc_norm": 0.5142857142857142
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/Mixtral-8x7B-Instruct-v0.1.json
CHANGED
@@ -187,6 +187,12 @@
|
|
187 |
"task": "multiple_choice",
|
188 |
"acc": 0.3416289592760181,
|
189 |
"acc_norm": 0.3416289592760181
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
}
|
191 |
]
|
192 |
}
|
|
|
187 |
"task": "multiple_choice",
|
188 |
"acc": 0.3416289592760181,
|
189 |
"acc_norm": 0.3416289592760181
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"name": "circumflex_tr",
|
193 |
+
"task": "multiple_choice",
|
194 |
+
"acc": 0.5714285714285714,
|
195 |
+
"acc_norm": 0.5714285714285714
|
196 |
}
|
197 |
]
|
198 |
}
|
results/zero-shot/Qwen2.5-0.5B-Instruct.json
CHANGED
@@ -185,6 +185,12 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.2420814479638009,
|
187 |
"acc_norm": 0.2420814479638009
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.2420814479638009,
|
187 |
"acc_norm": 0.2420814479638009
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "circumflex_tr",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.5857142857142857,
|
193 |
+
"acc_norm": 0.5857142857142857
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/Qwen2.5-0.5B.json
CHANGED
@@ -185,6 +185,12 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.251131221719457,
|
187 |
"acc_norm": 0.251131221719457
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.251131221719457,
|
187 |
"acc_norm": 0.251131221719457
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "circumflex_tr",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.4714285714285714,
|
193 |
+
"acc_norm": 0.4714285714285714
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/Qwen2.5-1.5B-Instruct.json
CHANGED
@@ -185,6 +185,12 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.2918552036199095,
|
187 |
"acc_norm": 0.2918552036199095
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.2918552036199095,
|
187 |
"acc_norm": 0.2918552036199095
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "circumflex_tr",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.5142857142857142,
|
193 |
+
"acc_norm": 0.5142857142857142
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/Qwen2.5-1.5B.json
CHANGED
@@ -185,6 +185,12 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.2986425339366516,
|
187 |
"acc_norm": 0.2986425339366516
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.2986425339366516,
|
187 |
"acc_norm": 0.2986425339366516
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "circumflex_tr",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.5142857142857142,
|
193 |
+
"acc_norm": 0.5142857142857142
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/Qwen2.5-14B-Instruct.json
CHANGED
@@ -187,6 +187,12 @@
|
|
187 |
"task": "multiple_choice",
|
188 |
"acc": 0.5701357466063348,
|
189 |
"acc_norm": 0.5701357466063348
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
}
|
191 |
]
|
192 |
}
|
|
|
187 |
"task": "multiple_choice",
|
188 |
"acc": 0.5701357466063348,
|
189 |
"acc_norm": 0.5701357466063348
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"name": "circumflex_tr",
|
193 |
+
"task": "multiple_choice",
|
194 |
+
"acc": 0.5428571428571428,
|
195 |
+
"acc_norm": 0.5428571428571428
|
196 |
}
|
197 |
]
|
198 |
}
|
results/zero-shot/Qwen2.5-14B.json
CHANGED
@@ -187,6 +187,12 @@
|
|
187 |
"task": "multiple_choice",
|
188 |
"acc": 0.4751131221719457,
|
189 |
"acc_norm": 0.4751131221719457
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
}
|
191 |
]
|
192 |
}
|
|
|
187 |
"task": "multiple_choice",
|
188 |
"acc": 0.4751131221719457,
|
189 |
"acc_norm": 0.4751131221719457
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"name": "circumflex_tr",
|
193 |
+
"task": "multiple_choice",
|
194 |
+
"acc": 0.5714285714285714,
|
195 |
+
"acc_norm": 0.5714285714285714
|
196 |
}
|
197 |
]
|
198 |
}
|
results/zero-shot/Qwen2.5-3B-Instruct.json
CHANGED
@@ -185,6 +185,12 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.33031674208144796,
|
187 |
"acc_norm": 0.33031674208144796
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.33031674208144796,
|
187 |
"acc_norm": 0.33031674208144796
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "circumflex_tr",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.6285714285714286,
|
193 |
+
"acc_norm": 0.6285714285714286
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/Qwen2.5-3B.json
CHANGED
@@ -185,6 +185,12 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.24434389140271492,
|
187 |
"acc_norm": 0.24434389140271492
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.24434389140271492,
|
187 |
"acc_norm": 0.24434389140271492
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "circumflex_tr",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.6285714285714286,
|
193 |
+
"acc_norm": 0.6285714285714286
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/Qwen2.5-7B-Instruct.json
CHANGED
@@ -185,6 +185,12 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.5203619909502263,
|
187 |
"acc_norm": 0.5203619909502263
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.5203619909502263,
|
187 |
"acc_norm": 0.5203619909502263
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "circumflex_tr",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.5142857142857142,
|
193 |
+
"acc_norm": 0.5142857142857142
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/Qwen2.5-7B.json
CHANGED
@@ -185,6 +185,12 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.4841628959276018,
|
187 |
"acc_norm": 0.4841628959276018
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.4841628959276018,
|
187 |
"acc_norm": 0.4841628959276018
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "circumflex_tr",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.5428571428571428,
|
193 |
+
"acc_norm": 0.5428571428571428
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/aya-23-35B.json
CHANGED
@@ -187,6 +187,12 @@
|
|
187 |
"task": "multiple_choice",
|
188 |
"acc": 0.34841628959276016,
|
189 |
"acc_norm": 0.34841628959276016
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
}
|
191 |
]
|
192 |
}
|
|
|
187 |
"task": "multiple_choice",
|
188 |
"acc": 0.34841628959276016,
|
189 |
"acc_norm": 0.34841628959276016
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"name": "circumflex_tr",
|
193 |
+
"task": "multiple_choice",
|
194 |
+
"acc": 0.5285714285714286,
|
195 |
+
"acc_norm": 0.5285714285714286
|
196 |
}
|
197 |
]
|
198 |
}
|
results/zero-shot/aya-23-8b.json
CHANGED
@@ -181,6 +181,12 @@
|
|
181 |
"task": "multiple_choice",
|
182 |
"acc": 0.3438914027149321,
|
183 |
"acc_norm": 0.3438914027149321
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
}
|
185 |
]
|
186 |
}
|
|
|
181 |
"task": "multiple_choice",
|
182 |
"acc": 0.3438914027149321,
|
183 |
"acc_norm": 0.3438914027149321
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"name": "circumflex_tr",
|
187 |
+
"task": "multiple_choice",
|
188 |
+
"acc": 0.4857142857142857,
|
189 |
+
"acc_norm": 0.4857142857142857
|
190 |
}
|
191 |
]
|
192 |
}
|
results/zero-shot/aya-expanse-32b.json
CHANGED
@@ -186,6 +186,12 @@
|
|
186 |
"task": "multiple_choice",
|
187 |
"acc": 0.4117647058823529,
|
188 |
"acc_norm": 0.4117647058823529
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
}
|
190 |
]
|
191 |
}
|
|
|
186 |
"task": "multiple_choice",
|
187 |
"acc": 0.4117647058823529,
|
188 |
"acc_norm": 0.4117647058823529
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"name": "circumflex_tr",
|
192 |
+
"task": "multiple_choice",
|
193 |
+
"acc": 0.6285714285714286,
|
194 |
+
"acc_norm": 0.6285714285714286
|
195 |
}
|
196 |
]
|
197 |
}
|
results/zero-shot/aya-expanse-8b.json
CHANGED
@@ -179,6 +179,12 @@
|
|
179 |
"task": "multiple_choice",
|
180 |
"acc": 0.48868778280542985,
|
181 |
"acc_norm": 0.48868778280542985
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
}
|
183 |
]
|
184 |
}
|
|
|
179 |
"task": "multiple_choice",
|
180 |
"acc": 0.48868778280542985,
|
181 |
"acc_norm": 0.48868778280542985
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"name": "circumflex_tr",
|
185 |
+
"task": "multiple_choice",
|
186 |
+
"acc": 0.5571428571428572,
|
187 |
+
"acc_norm": 0.5571428571428572
|
188 |
}
|
189 |
]
|
190 |
}
|
results/zero-shot/aya101.json
CHANGED
@@ -185,6 +185,12 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.47058823529411764,
|
187 |
"acc_norm": 0.47058823529411764
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.47058823529411764,
|
187 |
"acc_norm": 0.47058823529411764
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "circumflex_tr",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.5571428571428572,
|
193 |
+
"acc_norm": 0.5571428571428572
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/commencis-7b.json
CHANGED
@@ -185,6 +185,12 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.2420814479638009,
|
187 |
"acc_norm": 0.2420814479638009
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.2420814479638009,
|
187 |
"acc_norm": 0.2420814479638009
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "circumflex_tr",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.44285714285714284,
|
193 |
+
"acc_norm": 0.44285714285714284
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/kanarya-2b.json
CHANGED
@@ -184,6 +184,12 @@
|
|
184 |
"task": "multiple_choice",
|
185 |
"acc": 0.27149321266968324,
|
186 |
"acc_norm": 0.27149321266968324
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
}
|
188 |
]
|
189 |
}
|
|
|
184 |
"task": "multiple_choice",
|
185 |
"acc": 0.27149321266968324,
|
186 |
"acc_norm": 0.27149321266968324
|
187 |
+
},
|
188 |
+
{
|
189 |
+
"name": "circumflex_tr",
|
190 |
+
"task": "multiple_choice",
|
191 |
+
"acc": 0.4857142857142857,
|
192 |
+
"acc_norm": 0.4857142857142857
|
193 |
}
|
194 |
]
|
195 |
}
|
results/zero-shot/llama-3-8b-instruct.json
CHANGED
@@ -180,6 +180,12 @@
|
|
180 |
"task": "multiple_choice",
|
181 |
"acc": 0.38461538461538464,
|
182 |
"acc_norm": 0.38461538461538464
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
}
|
184 |
]
|
185 |
}
|
|
|
180 |
"task": "multiple_choice",
|
181 |
"acc": 0.38461538461538464,
|
182 |
"acc_norm": 0.38461538461538464
|
183 |
+
},
|
184 |
+
{
|
185 |
+
"name": "circumflex_tr",
|
186 |
+
"task": "multiple_choice",
|
187 |
+
"acc": 0.5571428571428572,
|
188 |
+
"acc_norm": 0.5571428571428572
|
189 |
}
|
190 |
]
|
191 |
}
|
results/zero-shot/llama-3-8b.json
CHANGED
@@ -179,6 +179,12 @@
|
|
179 |
"task": "multiple_choice",
|
180 |
"acc": 0.29638009049773756,
|
181 |
"acc_norm": 0.29638009049773756
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
}
|
183 |
]
|
184 |
}
|
|
|
179 |
"task": "multiple_choice",
|
180 |
"acc": 0.29638009049773756,
|
181 |
"acc_norm": 0.29638009049773756
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"name": "circumflex_tr",
|
185 |
+
"task": "multiple_choice",
|
186 |
+
"acc": 0.4857142857142857,
|
187 |
+
"acc_norm": 0.4857142857142857
|
188 |
}
|
189 |
]
|
190 |
}
|
results/zero-shot/llama-3.1-8b-instruct.json
CHANGED
@@ -179,6 +179,12 @@
|
|
179 |
"task": "multiple_choice",
|
180 |
"acc": 0.416289592760181,
|
181 |
"acc_norm": 0.416289592760181
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
}
|
183 |
]
|
184 |
}
|
|
|
179 |
"task": "multiple_choice",
|
180 |
"acc": 0.416289592760181,
|
181 |
"acc_norm": 0.416289592760181
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"name": "circumflex_tr",
|
185 |
+
"task": "multiple_choice",
|
186 |
+
"acc": 0.5714285714285714,
|
187 |
+
"acc_norm": 0.5714285714285714
|
188 |
}
|
189 |
]
|
190 |
}
|
results/zero-shot/llama-3.1-8b.json
CHANGED
@@ -179,6 +179,12 @@
|
|
179 |
"task": "multiple_choice",
|
180 |
"acc": 0.3212669683257919,
|
181 |
"acc_norm": 0.3212669683257919
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
}
|
183 |
]
|
184 |
}
|
|
|
179 |
"task": "multiple_choice",
|
180 |
"acc": 0.3212669683257919,
|
181 |
"acc_norm": 0.3212669683257919
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"name": "circumflex_tr",
|
185 |
+
"task": "multiple_choice",
|
186 |
+
"acc": 0.5571428571428572,
|
187 |
+
"acc_norm": 0.5571428571428572
|
188 |
}
|
189 |
]
|
190 |
}
|
results/zero-shot/llama-3.2-1b.json
CHANGED
@@ -211,6 +211,12 @@
|
|
211 |
"task": "multiple_choice",
|
212 |
"acc": 0.2239819004524887,
|
213 |
"acc_norm": 0.2239819004524887
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
}
|
215 |
]
|
216 |
}
|
|
|
211 |
"task": "multiple_choice",
|
212 |
"acc": 0.2239819004524887,
|
213 |
"acc_norm": 0.2239819004524887
|
214 |
+
},
|
215 |
+
{
|
216 |
+
"name": "circumflex_tr",
|
217 |
+
"task": "multiple_choice",
|
218 |
+
"acc": 0.5,
|
219 |
+
"acc_norm": 0.5
|
220 |
}
|
221 |
]
|
222 |
}
|
results/zero-shot/llama-3.2-3b-instruct.json
CHANGED
@@ -211,6 +211,12 @@
|
|
211 |
"task": "multiple_choice",
|
212 |
"acc": 0.30995475113122173,
|
213 |
"acc_norm": 0.30995475113122173
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
}
|
215 |
]
|
216 |
}
|
|
|
211 |
"task": "multiple_choice",
|
212 |
"acc": 0.30995475113122173,
|
213 |
"acc_norm": 0.30995475113122173
|
214 |
+
},
|
215 |
+
{
|
216 |
+
"name": "circumflex_tr",
|
217 |
+
"task": "multiple_choice",
|
218 |
+
"acc": 0.5142857142857142,
|
219 |
+
"acc_norm": 0.5142857142857142
|
220 |
}
|
221 |
]
|
222 |
}
|
results/zero-shot/llama-3.2-3b.json
CHANGED
@@ -179,6 +179,12 @@
|
|
179 |
"task": "multiple_choice",
|
180 |
"acc": 0.2895927601809955,
|
181 |
"acc_norm": 0.2895927601809955
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
}
|
183 |
]
|
184 |
}
|
|
|
179 |
"task": "multiple_choice",
|
180 |
"acc": 0.2895927601809955,
|
181 |
"acc_norm": 0.2895927601809955
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"name": "circumflex_tr",
|
185 |
+
"task": "multiple_choice",
|
186 |
+
"acc": 0.5714285714285714,
|
187 |
+
"acc_norm": 0.5714285714285714
|
188 |
}
|
189 |
]
|
190 |
}
|
results/zero-shot/mistral-7b.json
CHANGED
@@ -185,6 +185,12 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.25339366515837103,
|
187 |
"acc_norm": 0.25339366515837103
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.25339366515837103,
|
187 |
"acc_norm": 0.25339366515837103
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "circumflex_tr",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.5571428571428572,
|
193 |
+
"acc_norm": 0.5571428571428572
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/trendyol-7b.json
CHANGED
@@ -185,6 +185,12 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.23076923076923078,
|
187 |
"acc_norm": 0.23076923076923078
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.23076923076923078,
|
187 |
"acc_norm": 0.23076923076923078
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "circumflex_tr",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.5428571428571428,
|
193 |
+
"acc_norm": 0.5428571428571428
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/turna.json
CHANGED
@@ -185,6 +185,12 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.2420814479638009,
|
187 |
"acc_norm": 0.2420814479638009
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.2420814479638009,
|
187 |
"acc_norm": 0.2420814479638009
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "circumflex_tr",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.4714285714285714,
|
193 |
+
"acc_norm": 0.4714285714285714
|
194 |
}
|
195 |
]
|
196 |
}
|