Update Dataset Groups and add Circumflex Dataset

#7
Files changed (34) hide show
  1. data.py +3 -2
  2. results/zero-shot/CerebrumTech__cere-llama-3-8b-tr.json +6 -0
  3. results/zero-shot/Llama-3.3-70B-Instruct.json +6 -0
  4. results/zero-shot/Ministral-8B-Instruct.json +6 -0
  5. results/zero-shot/Mistral-7B-Instruct-v0.3.json +6 -0
  6. results/zero-shot/Mistral-7B-v0.3.json +6 -0
  7. results/zero-shot/Mixtral-8x7B-Instruct-v0.1.json +6 -0
  8. results/zero-shot/Qwen2.5-0.5B-Instruct.json +6 -0
  9. results/zero-shot/Qwen2.5-0.5B.json +6 -0
  10. results/zero-shot/Qwen2.5-1.5B-Instruct.json +6 -0
  11. results/zero-shot/Qwen2.5-1.5B.json +6 -0
  12. results/zero-shot/Qwen2.5-14B-Instruct.json +6 -0
  13. results/zero-shot/Qwen2.5-14B.json +6 -0
  14. results/zero-shot/Qwen2.5-3B-Instruct.json +6 -0
  15. results/zero-shot/Qwen2.5-3B.json +6 -0
  16. results/zero-shot/Qwen2.5-7B-Instruct.json +6 -0
  17. results/zero-shot/Qwen2.5-7B.json +6 -0
  18. results/zero-shot/aya-23-35B.json +6 -0
  19. results/zero-shot/aya-23-8b.json +6 -0
  20. results/zero-shot/aya-expanse-32b.json +6 -0
  21. results/zero-shot/aya-expanse-8b.json +6 -0
  22. results/zero-shot/aya101.json +6 -0
  23. results/zero-shot/commencis-7b.json +6 -0
  24. results/zero-shot/kanarya-2b.json +6 -0
  25. results/zero-shot/llama-3-8b-instruct.json +6 -0
  26. results/zero-shot/llama-3-8b.json +6 -0
  27. results/zero-shot/llama-3.1-8b-instruct.json +6 -0
  28. results/zero-shot/llama-3.1-8b.json +6 -0
  29. results/zero-shot/llama-3.2-1b.json +6 -0
  30. results/zero-shot/llama-3.2-3b-instruct.json +6 -0
  31. results/zero-shot/llama-3.2-3b.json +6 -0
  32. results/zero-shot/mistral-7b.json +6 -0
  33. results/zero-shot/trendyol-7b.json +6 -0
  34. results/zero-shot/turna.json +6 -0
data.py CHANGED
@@ -52,6 +52,7 @@ DATASET_TASK_DICT = {
52
  'turkce_atasozleri': Tasks.MULTIPLE_CHOICE,
53
  'turkishmmlu':Tasks.MULTIPLE_CHOICE,
54
  'bilmecebench':Tasks.MULTIPLE_CHOICE,
 
55
 
56
  # fact-checking, not sure whether these are multi-choice
57
  # 'trclaim19': Tasks.MULTIPLE_CHOICE,
@@ -93,8 +94,8 @@ DATASET_GROUPS = {
93
  'description': 'Turkish splits of SQuAD-like datasets XQuAD and TQUAD.',
94
  },
95
  'MCQA': {
96
- 'datasets': ['xcopa_tr', 'exams_tr', 'belebele_tr', 'turkce_atasozleri'] + [x for x in DATASET_TASK_DICT.keys() if x.startswith('turkish_plu')],
97
- 'description': 'Multiple Choice Question Answering datasets: XCOPA, Exams, Belebele, Turkish PLU, and Turkce-Atasozleri.'
98
  },
99
  'TC': {
100
  'datasets': ['sts_tr', 'offenseval_tr', 'news_cat', 'ironytr', ],
 
52
  'turkce_atasozleri': Tasks.MULTIPLE_CHOICE,
53
  'turkishmmlu':Tasks.MULTIPLE_CHOICE,
54
  'bilmecebench':Tasks.MULTIPLE_CHOICE,
55
+ 'circumflex_tr':Tasks.MULTIPLE_CHOICE,
56
 
57
  # fact-checking, not sure whether these are multi-choice
58
  # 'trclaim19': Tasks.MULTIPLE_CHOICE,
 
94
  'description': 'Turkish splits of SQuAD-like datasets XQuAD and TQUAD.',
95
  },
96
  'MCQA': {
97
+ 'datasets': ['xcopa_tr', 'exams_tr', 'belebele_tr', 'turkce_atasozleri', 'bilmecebench', 'turkishmmlu', 'circumflex_tr'] + [x for x in DATASET_TASK_DICT.keys() if x.startswith('turkish_plu')],
98
+ 'description': 'Multiple Choice Question Answering datasets: XCOPA, Exams, Belebele, Turkish PLU, Turkce-Atasozleri, BilmeceBench, TurkishMMLU, and CircumflexTR'
99
  },
100
  'TC': {
101
  'datasets': ['sts_tr', 'offenseval_tr', 'news_cat', 'ironytr', ],
results/zero-shot/CerebrumTech__cere-llama-3-8b-tr.json CHANGED
@@ -186,6 +186,12 @@
186
  "task": "multiple_choice",
187
  "acc": 0.3393665158371041,
188
  "acc_norm": 0.3393665158371041
 
 
 
 
 
 
189
  }
190
  ]
191
  }
 
186
  "task": "multiple_choice",
187
  "acc": 0.3393665158371041,
188
  "acc_norm": 0.3393665158371041
189
+ },
190
+ {
191
+ "name": "circumflex_tr",
192
+ "task": "multiple_choice",
193
+ "acc": 0.6142857142857143,
194
+ "acc_norm": 0.6142857142857143
195
  }
196
  ]
197
  }
results/zero-shot/Llama-3.3-70B-Instruct.json CHANGED
@@ -188,6 +188,12 @@
188
  "task": "multiple_choice",
189
  "acc": 0.7262443438914027,
190
  "acc_norm": 0.7262443438914027
 
 
 
 
 
 
191
  }
192
  ]
193
  }
 
188
  "task": "multiple_choice",
189
  "acc": 0.7262443438914027,
190
  "acc_norm": 0.7262443438914027
191
+ },
192
+ {
193
+ "name": "circumflex_tr",
194
+ "task": "multiple_choice",
195
+ "acc": 0.6,
196
+ "acc_norm": 0.6
197
  }
198
  ]
199
  }
results/zero-shot/Ministral-8B-Instruct.json CHANGED
@@ -185,6 +185,12 @@
185
  "task": "multiple_choice",
186
  "acc": 0.248868778280543,
187
  "acc_norm": 0.248868778280543
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
185
  "task": "multiple_choice",
186
  "acc": 0.248868778280543,
187
  "acc_norm": 0.248868778280543
188
+ },
189
+ {
190
+ "name": "circumflex_tr",
191
+ "task": "multiple_choice",
192
+ "acc": 0.5571428571428572,
193
+ "acc_norm": 0.5571428571428572
194
  }
195
  ]
196
  }
results/zero-shot/Mistral-7B-Instruct-v0.3.json CHANGED
@@ -185,6 +185,12 @@
185
  "task": "multiple_choice",
186
  "acc": 0.2149321266968326,
187
  "acc_norm": 0.2149321266968326
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
185
  "task": "multiple_choice",
186
  "acc": 0.2149321266968326,
187
  "acc_norm": 0.2149321266968326
188
+ },
189
+ {
190
+ "name": "circumflex_tr",
191
+ "task": "multiple_choice",
192
+ "acc": 0.5142857142857142,
193
+ "acc_norm": 0.5142857142857142
194
  }
195
  ]
196
  }
results/zero-shot/Mistral-7B-v0.3.json CHANGED
@@ -185,6 +185,12 @@
185
  "task": "multiple_choice",
186
  "acc": 0.23529411764705882,
187
  "acc_norm": 0.23529411764705882
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
185
  "task": "multiple_choice",
186
  "acc": 0.23529411764705882,
187
  "acc_norm": 0.23529411764705882
188
+ },
189
+ {
190
+ "name": "circumflex_tr",
191
+ "task": "multiple_choice",
192
+ "acc": 0.5142857142857142,
193
+ "acc_norm": 0.5142857142857142
194
  }
195
  ]
196
  }
results/zero-shot/Mixtral-8x7B-Instruct-v0.1.json CHANGED
@@ -187,6 +187,12 @@
187
  "task": "multiple_choice",
188
  "acc": 0.3416289592760181,
189
  "acc_norm": 0.3416289592760181
 
 
 
 
 
 
190
  }
191
  ]
192
  }
 
187
  "task": "multiple_choice",
188
  "acc": 0.3416289592760181,
189
  "acc_norm": 0.3416289592760181
190
+ },
191
+ {
192
+ "name": "circumflex_tr",
193
+ "task": "multiple_choice",
194
+ "acc": 0.5714285714285714,
195
+ "acc_norm": 0.5714285714285714
196
  }
197
  ]
198
  }
results/zero-shot/Qwen2.5-0.5B-Instruct.json CHANGED
@@ -185,6 +185,12 @@
185
  "task": "multiple_choice",
186
  "acc": 0.2420814479638009,
187
  "acc_norm": 0.2420814479638009
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
185
  "task": "multiple_choice",
186
  "acc": 0.2420814479638009,
187
  "acc_norm": 0.2420814479638009
188
+ },
189
+ {
190
+ "name": "circumflex_tr",
191
+ "task": "multiple_choice",
192
+ "acc": 0.5857142857142857,
193
+ "acc_norm": 0.5857142857142857
194
  }
195
  ]
196
  }
results/zero-shot/Qwen2.5-0.5B.json CHANGED
@@ -185,6 +185,12 @@
185
  "task": "multiple_choice",
186
  "acc": 0.251131221719457,
187
  "acc_norm": 0.251131221719457
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
185
  "task": "multiple_choice",
186
  "acc": 0.251131221719457,
187
  "acc_norm": 0.251131221719457
188
+ },
189
+ {
190
+ "name": "circumflex_tr",
191
+ "task": "multiple_choice",
192
+ "acc": 0.4714285714285714,
193
+ "acc_norm": 0.4714285714285714
194
  }
195
  ]
196
  }
results/zero-shot/Qwen2.5-1.5B-Instruct.json CHANGED
@@ -185,6 +185,12 @@
185
  "task": "multiple_choice",
186
  "acc": 0.2918552036199095,
187
  "acc_norm": 0.2918552036199095
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
185
  "task": "multiple_choice",
186
  "acc": 0.2918552036199095,
187
  "acc_norm": 0.2918552036199095
188
+ },
189
+ {
190
+ "name": "circumflex_tr",
191
+ "task": "multiple_choice",
192
+ "acc": 0.5142857142857142,
193
+ "acc_norm": 0.5142857142857142
194
  }
195
  ]
196
  }
results/zero-shot/Qwen2.5-1.5B.json CHANGED
@@ -185,6 +185,12 @@
185
  "task": "multiple_choice",
186
  "acc": 0.2986425339366516,
187
  "acc_norm": 0.2986425339366516
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
185
  "task": "multiple_choice",
186
  "acc": 0.2986425339366516,
187
  "acc_norm": 0.2986425339366516
188
+ },
189
+ {
190
+ "name": "circumflex_tr",
191
+ "task": "multiple_choice",
192
+ "acc": 0.5142857142857142,
193
+ "acc_norm": 0.5142857142857142
194
  }
195
  ]
196
  }
results/zero-shot/Qwen2.5-14B-Instruct.json CHANGED
@@ -187,6 +187,12 @@
187
  "task": "multiple_choice",
188
  "acc": 0.5701357466063348,
189
  "acc_norm": 0.5701357466063348
 
 
 
 
 
 
190
  }
191
  ]
192
  }
 
187
  "task": "multiple_choice",
188
  "acc": 0.5701357466063348,
189
  "acc_norm": 0.5701357466063348
190
+ },
191
+ {
192
+ "name": "circumflex_tr",
193
+ "task": "multiple_choice",
194
+ "acc": 0.5428571428571428,
195
+ "acc_norm": 0.5428571428571428
196
  }
197
  ]
198
  }
results/zero-shot/Qwen2.5-14B.json CHANGED
@@ -187,6 +187,12 @@
187
  "task": "multiple_choice",
188
  "acc": 0.4751131221719457,
189
  "acc_norm": 0.4751131221719457
 
 
 
 
 
 
190
  }
191
  ]
192
  }
 
187
  "task": "multiple_choice",
188
  "acc": 0.4751131221719457,
189
  "acc_norm": 0.4751131221719457
190
+ },
191
+ {
192
+ "name": "circumflex_tr",
193
+ "task": "multiple_choice",
194
+ "acc": 0.5714285714285714,
195
+ "acc_norm": 0.5714285714285714
196
  }
197
  ]
198
  }
results/zero-shot/Qwen2.5-3B-Instruct.json CHANGED
@@ -185,6 +185,12 @@
185
  "task": "multiple_choice",
186
  "acc": 0.33031674208144796,
187
  "acc_norm": 0.33031674208144796
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
185
  "task": "multiple_choice",
186
  "acc": 0.33031674208144796,
187
  "acc_norm": 0.33031674208144796
188
+ },
189
+ {
190
+ "name": "circumflex_tr",
191
+ "task": "multiple_choice",
192
+ "acc": 0.6285714285714286,
193
+ "acc_norm": 0.6285714285714286
194
  }
195
  ]
196
  }
results/zero-shot/Qwen2.5-3B.json CHANGED
@@ -185,6 +185,12 @@
185
  "task": "multiple_choice",
186
  "acc": 0.24434389140271492,
187
  "acc_norm": 0.24434389140271492
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
185
  "task": "multiple_choice",
186
  "acc": 0.24434389140271492,
187
  "acc_norm": 0.24434389140271492
188
+ },
189
+ {
190
+ "name": "circumflex_tr",
191
+ "task": "multiple_choice",
192
+ "acc": 0.6285714285714286,
193
+ "acc_norm": 0.6285714285714286
194
  }
195
  ]
196
  }
results/zero-shot/Qwen2.5-7B-Instruct.json CHANGED
@@ -185,6 +185,12 @@
185
  "task": "multiple_choice",
186
  "acc": 0.5203619909502263,
187
  "acc_norm": 0.5203619909502263
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
185
  "task": "multiple_choice",
186
  "acc": 0.5203619909502263,
187
  "acc_norm": 0.5203619909502263
188
+ },
189
+ {
190
+ "name": "circumflex_tr",
191
+ "task": "multiple_choice",
192
+ "acc": 0.5142857142857142,
193
+ "acc_norm": 0.5142857142857142
194
  }
195
  ]
196
  }
results/zero-shot/Qwen2.5-7B.json CHANGED
@@ -185,6 +185,12 @@
185
  "task": "multiple_choice",
186
  "acc": 0.4841628959276018,
187
  "acc_norm": 0.4841628959276018
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
185
  "task": "multiple_choice",
186
  "acc": 0.4841628959276018,
187
  "acc_norm": 0.4841628959276018
188
+ },
189
+ {
190
+ "name": "circumflex_tr",
191
+ "task": "multiple_choice",
192
+ "acc": 0.5428571428571428,
193
+ "acc_norm": 0.5428571428571428
194
  }
195
  ]
196
  }
results/zero-shot/aya-23-35B.json CHANGED
@@ -187,6 +187,12 @@
187
  "task": "multiple_choice",
188
  "acc": 0.34841628959276016,
189
  "acc_norm": 0.34841628959276016
 
 
 
 
 
 
190
  }
191
  ]
192
  }
 
187
  "task": "multiple_choice",
188
  "acc": 0.34841628959276016,
189
  "acc_norm": 0.34841628959276016
190
+ },
191
+ {
192
+ "name": "circumflex_tr",
193
+ "task": "multiple_choice",
194
+ "acc": 0.5285714285714286,
195
+ "acc_norm": 0.5285714285714286
196
  }
197
  ]
198
  }
results/zero-shot/aya-23-8b.json CHANGED
@@ -181,6 +181,12 @@
181
  "task": "multiple_choice",
182
  "acc": 0.3438914027149321,
183
  "acc_norm": 0.3438914027149321
 
 
 
 
 
 
184
  }
185
  ]
186
  }
 
181
  "task": "multiple_choice",
182
  "acc": 0.3438914027149321,
183
  "acc_norm": 0.3438914027149321
184
+ },
185
+ {
186
+ "name": "circumflex_tr",
187
+ "task": "multiple_choice",
188
+ "acc": 0.4857142857142857,
189
+ "acc_norm": 0.4857142857142857
190
  }
191
  ]
192
  }
results/zero-shot/aya-expanse-32b.json CHANGED
@@ -186,6 +186,12 @@
186
  "task": "multiple_choice",
187
  "acc": 0.4117647058823529,
188
  "acc_norm": 0.4117647058823529
 
 
 
 
 
 
189
  }
190
  ]
191
  }
 
186
  "task": "multiple_choice",
187
  "acc": 0.4117647058823529,
188
  "acc_norm": 0.4117647058823529
189
+ },
190
+ {
191
+ "name": "circumflex_tr",
192
+ "task": "multiple_choice",
193
+ "acc": 0.6285714285714286,
194
+ "acc_norm": 0.6285714285714286
195
  }
196
  ]
197
  }
results/zero-shot/aya-expanse-8b.json CHANGED
@@ -179,6 +179,12 @@
179
  "task": "multiple_choice",
180
  "acc": 0.48868778280542985,
181
  "acc_norm": 0.48868778280542985
 
 
 
 
 
 
182
  }
183
  ]
184
  }
 
179
  "task": "multiple_choice",
180
  "acc": 0.48868778280542985,
181
  "acc_norm": 0.48868778280542985
182
+ },
183
+ {
184
+ "name": "circumflex_tr",
185
+ "task": "multiple_choice",
186
+ "acc": 0.5571428571428572,
187
+ "acc_norm": 0.5571428571428572
188
  }
189
  ]
190
  }
results/zero-shot/aya101.json CHANGED
@@ -185,6 +185,12 @@
185
  "task": "multiple_choice",
186
  "acc": 0.47058823529411764,
187
  "acc_norm": 0.47058823529411764
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
185
  "task": "multiple_choice",
186
  "acc": 0.47058823529411764,
187
  "acc_norm": 0.47058823529411764
188
+ },
189
+ {
190
+ "name": "circumflex_tr",
191
+ "task": "multiple_choice",
192
+ "acc": 0.5571428571428572,
193
+ "acc_norm": 0.5571428571428572
194
  }
195
  ]
196
  }
results/zero-shot/commencis-7b.json CHANGED
@@ -185,6 +185,12 @@
185
  "task": "multiple_choice",
186
  "acc": 0.2420814479638009,
187
  "acc_norm": 0.2420814479638009
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
185
  "task": "multiple_choice",
186
  "acc": 0.2420814479638009,
187
  "acc_norm": 0.2420814479638009
188
+ },
189
+ {
190
+ "name": "circumflex_tr",
191
+ "task": "multiple_choice",
192
+ "acc": 0.44285714285714284,
193
+ "acc_norm": 0.44285714285714284
194
  }
195
  ]
196
  }
results/zero-shot/kanarya-2b.json CHANGED
@@ -184,6 +184,12 @@
184
  "task": "multiple_choice",
185
  "acc": 0.27149321266968324,
186
  "acc_norm": 0.27149321266968324
 
 
 
 
 
 
187
  }
188
  ]
189
  }
 
184
  "task": "multiple_choice",
185
  "acc": 0.27149321266968324,
186
  "acc_norm": 0.27149321266968324
187
+ },
188
+ {
189
+ "name": "circumflex_tr",
190
+ "task": "multiple_choice",
191
+ "acc": 0.4857142857142857,
192
+ "acc_norm": 0.4857142857142857
193
  }
194
  ]
195
  }
results/zero-shot/llama-3-8b-instruct.json CHANGED
@@ -180,6 +180,12 @@
180
  "task": "multiple_choice",
181
  "acc": 0.38461538461538464,
182
  "acc_norm": 0.38461538461538464
 
 
 
 
 
 
183
  }
184
  ]
185
  }
 
180
  "task": "multiple_choice",
181
  "acc": 0.38461538461538464,
182
  "acc_norm": 0.38461538461538464
183
+ },
184
+ {
185
+ "name": "circumflex_tr",
186
+ "task": "multiple_choice",
187
+ "acc": 0.5571428571428572,
188
+ "acc_norm": 0.5571428571428572
189
  }
190
  ]
191
  }
results/zero-shot/llama-3-8b.json CHANGED
@@ -179,6 +179,12 @@
179
  "task": "multiple_choice",
180
  "acc": 0.29638009049773756,
181
  "acc_norm": 0.29638009049773756
 
 
 
 
 
 
182
  }
183
  ]
184
  }
 
179
  "task": "multiple_choice",
180
  "acc": 0.29638009049773756,
181
  "acc_norm": 0.29638009049773756
182
+ },
183
+ {
184
+ "name": "circumflex_tr",
185
+ "task": "multiple_choice",
186
+ "acc": 0.4857142857142857,
187
+ "acc_norm": 0.4857142857142857
188
  }
189
  ]
190
  }
results/zero-shot/llama-3.1-8b-instruct.json CHANGED
@@ -179,6 +179,12 @@
179
  "task": "multiple_choice",
180
  "acc": 0.416289592760181,
181
  "acc_norm": 0.416289592760181
 
 
 
 
 
 
182
  }
183
  ]
184
  }
 
179
  "task": "multiple_choice",
180
  "acc": 0.416289592760181,
181
  "acc_norm": 0.416289592760181
182
+ },
183
+ {
184
+ "name": "circumflex_tr",
185
+ "task": "multiple_choice",
186
+ "acc": 0.5714285714285714,
187
+ "acc_norm": 0.5714285714285714
188
  }
189
  ]
190
  }
results/zero-shot/llama-3.1-8b.json CHANGED
@@ -179,6 +179,12 @@
179
  "task": "multiple_choice",
180
  "acc": 0.3212669683257919,
181
  "acc_norm": 0.3212669683257919
 
 
 
 
 
 
182
  }
183
  ]
184
  }
 
179
  "task": "multiple_choice",
180
  "acc": 0.3212669683257919,
181
  "acc_norm": 0.3212669683257919
182
+ },
183
+ {
184
+ "name": "circumflex_tr",
185
+ "task": "multiple_choice",
186
+ "acc": 0.5571428571428572,
187
+ "acc_norm": 0.5571428571428572
188
  }
189
  ]
190
  }
results/zero-shot/llama-3.2-1b.json CHANGED
@@ -211,6 +211,12 @@
211
  "task": "multiple_choice",
212
  "acc": 0.2239819004524887,
213
  "acc_norm": 0.2239819004524887
 
 
 
 
 
 
214
  }
215
  ]
216
  }
 
211
  "task": "multiple_choice",
212
  "acc": 0.2239819004524887,
213
  "acc_norm": 0.2239819004524887
214
+ },
215
+ {
216
+ "name": "circumflex_tr",
217
+ "task": "multiple_choice",
218
+ "acc": 0.5,
219
+ "acc_norm": 0.5
220
  }
221
  ]
222
  }
results/zero-shot/llama-3.2-3b-instruct.json CHANGED
@@ -211,6 +211,12 @@
211
  "task": "multiple_choice",
212
  "acc": 0.30995475113122173,
213
  "acc_norm": 0.30995475113122173
 
 
 
 
 
 
214
  }
215
  ]
216
  }
 
211
  "task": "multiple_choice",
212
  "acc": 0.30995475113122173,
213
  "acc_norm": 0.30995475113122173
214
+ },
215
+ {
216
+ "name": "circumflex_tr",
217
+ "task": "multiple_choice",
218
+ "acc": 0.5142857142857142,
219
+ "acc_norm": 0.5142857142857142
220
  }
221
  ]
222
  }
results/zero-shot/llama-3.2-3b.json CHANGED
@@ -179,6 +179,12 @@
179
  "task": "multiple_choice",
180
  "acc": 0.2895927601809955,
181
  "acc_norm": 0.2895927601809955
 
 
 
 
 
 
182
  }
183
  ]
184
  }
 
179
  "task": "multiple_choice",
180
  "acc": 0.2895927601809955,
181
  "acc_norm": 0.2895927601809955
182
+ },
183
+ {
184
+ "name": "circumflex_tr",
185
+ "task": "multiple_choice",
186
+ "acc": 0.5714285714285714,
187
+ "acc_norm": 0.5714285714285714
188
  }
189
  ]
190
  }
results/zero-shot/mistral-7b.json CHANGED
@@ -185,6 +185,12 @@
185
  "task": "multiple_choice",
186
  "acc": 0.25339366515837103,
187
  "acc_norm": 0.25339366515837103
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
185
  "task": "multiple_choice",
186
  "acc": 0.25339366515837103,
187
  "acc_norm": 0.25339366515837103
188
+ },
189
+ {
190
+ "name": "circumflex_tr",
191
+ "task": "multiple_choice",
192
+ "acc": 0.5571428571428572,
193
+ "acc_norm": 0.5571428571428572
194
  }
195
  ]
196
  }
results/zero-shot/trendyol-7b.json CHANGED
@@ -185,6 +185,12 @@
185
  "task": "multiple_choice",
186
  "acc": 0.23076923076923078,
187
  "acc_norm": 0.23076923076923078
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
185
  "task": "multiple_choice",
186
  "acc": 0.23076923076923078,
187
  "acc_norm": 0.23076923076923078
188
+ },
189
+ {
190
+ "name": "circumflex_tr",
191
+ "task": "multiple_choice",
192
+ "acc": 0.5428571428571428,
193
+ "acc_norm": 0.5428571428571428
194
  }
195
  ]
196
  }
results/zero-shot/turna.json CHANGED
@@ -185,6 +185,12 @@
185
  "task": "multiple_choice",
186
  "acc": 0.2420814479638009,
187
  "acc_norm": 0.2420814479638009
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
185
  "task": "multiple_choice",
186
  "acc": 0.2420814479638009,
187
  "acc_norm": 0.2420814479638009
188
+ },
189
+ {
190
+ "name": "circumflex_tr",
191
+ "task": "multiple_choice",
192
+ "acc": 0.4714285714285714,
193
+ "acc_norm": 0.4714285714285714
194
  }
195
  ]
196
  }