diff --git "a/results.json" "b/results.json" --- "a/results.json" +++ "b/results.json" @@ -48,6 +48,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bn", @@ -90,6 +97,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"de", @@ -132,6 +146,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"en", @@ -174,6 +195,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"es", @@ -216,6 +244,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fr", @@ -342,6 +377,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ja", @@ -531,6 +573,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ru", @@ -573,6 +622,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sw", @@ -615,6 +671,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"te", @@ -734,6 +797,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zh", @@ -770,12016 +840,10084 @@ "score":0.3243323503 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.1806181405 + "score":0.2660357996 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.4614154717 + "score":0.5489237487 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.3184146682 + "score":0.3431691167 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.4885651648 + "score":0.5659025601 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":1.0 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.1707650511 + "score":0.2999810338 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.4191623205 + "score":0.5315905896 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.1747239771 + "score":0.3568113924 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.4138755715 + "score":0.5379558638 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"deepseek\/deepseek-chat", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.214232349 + "score":0.3410327538 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.4760523398 + "score":0.5702449417 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.3694909412 + "score":0.4621824412 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6379394313 + "score":0.6644677733 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"deepseek\/deepseek-chat", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.3898301226 + "score":0.5530727537 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.6012929861 + "score":0.7075859327 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.488423591 + "score":0.6368037655 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.7529549619 + "score":0.8170495194 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"deepseek\/deepseek-chat", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.2337674235 + "score":0.3558328881 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.4951544114 + "score":0.5816613686 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.341066665 + "score":0.3018683161 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.6161709719 + "score":0.5862734644 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"deepseek\/deepseek-chat", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.199697024 + "score":0.3482763208 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.4970728001 + "score":0.5982327963 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.548967737 + "score":0.452269216 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.7095016995 + "score":0.6590312745 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.367723046 + "score":0.3747394322 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.5881358889 + "score":0.6079633657 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.3278391831 + "score":0.4420465241 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.5487381665 + "score":0.6372857982 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.2012670144 + "score":0.3120299438 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.4903423287 + "score":0.5421647326 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.2590870164 + "score":0.3139800299 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.5931106206 + "score":0.6305869448 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"deepseek\/deepseek-chat", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.1789581673 + "score":0.2121435327 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.4444827041 + "score":0.5165098934 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.175930196 + "score":0.3965805608 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.329462033 + "score":0.5197322727 }, { - "model":"anthropic\/claude-3-haiku", - "bcp_47":"mr", + "model":"deepseek\/deepseek-chat", + "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"anthropic\/claude-3-haiku", - "bcp_47":"mr", + "model":"deepseek\/deepseek-chat", + "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.1834187267 + "score":0.2594796679 }, { - "model":"anthropic\/claude-3-haiku", - "bcp_47":"mr", + "model":"deepseek\/deepseek-chat", + "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.470878386 + "score":0.4780086047 }, { - "model":"anthropic\/claude-3-haiku", - "bcp_47":"mr", + "model":"deepseek\/deepseek-chat", + "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.1744972384 + "score":0.3009966401 }, { - "model":"anthropic\/claude-3-haiku", - "bcp_47":"mr", + "model":"deepseek\/deepseek-chat", + "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.4213302564 + "score":0.5761529867 }, { - "model":"anthropic\/claude-3-haiku", - "bcp_47":"pa", + "model":"deepseek\/deepseek-chat", + "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"anthropic\/claude-3-haiku", - "bcp_47":"pa", + "model":"deepseek\/deepseek-chat", + "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.3488186237 + "score":0.2536168105 }, { - "model":"anthropic\/claude-3-haiku", - "bcp_47":"pa", + "model":"deepseek\/deepseek-chat", + "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.5904301709 + "score":0.5193737639 }, { - "model":"anthropic\/claude-3-haiku", - "bcp_47":"pa", + "model":"deepseek\/deepseek-chat", + "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.3621810085 + "score":0.2743904396 }, { - "model":"anthropic\/claude-3-haiku", - "bcp_47":"pa", + "model":"deepseek\/deepseek-chat", + "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.51265991 + "score":0.4918283752 }, { - "model":"anthropic\/claude-3-haiku", - "bcp_47":"pt", + "model":"deepseek\/deepseek-chat", + "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3-haiku", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", "score":1.0 }, { - "model":"anthropic\/claude-3-haiku", - "bcp_47":"pt", + "model":"deepseek\/deepseek-chat", + "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.2014349032 + "score":0.4307107958 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"pa", + "task":"translation_from", + "metric":"chrf", + "score":0.6547934264 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"pa", + "task":"translation_to", + "metric":"bleu", + "score":0.3789232965 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"pa", + "task":"translation_to", + "metric":"chrf", + "score":0.5490387026 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"pt", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"pt", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"pt", + "task":"translation_from", + "metric":"bleu", + "score":0.3491027097 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.458360538 + "score":0.5771246685 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4537177243 + "score":0.3737440632 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6780859486 + "score":0.6547073078 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ru", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2192993586 + "score":0.2584319121 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.5037493911 + "score":0.5177022275 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.3365435366 + "score":0.371218137 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.5803139095 + "score":0.5948906549 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"sw", "task":"classification", "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", "score":0.8 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.1629528612 + "score":0.2583542451 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.3934230515 + "score":0.5157482031 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.2569355275 + "score":0.3458513734 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.5756052088 + "score":0.634706105 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"te", "task":"classification", "metric":"accuracy", + "score":1.0 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", "score":0.8 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.6 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.3030045526 + "score":0.3887890595 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.5524023581 + "score":0.6100201392 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.197839972 + "score":0.3146346509 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.4780950203 + "score":0.5476489231 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.1997540464 + "score":0.2324983634 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.4750614684 + "score":0.497221173 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.1234051854 + "score":0.2976387517 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.2799574404 + "score":0.4761547661 }, { - "model":"anthropic\/claude-3-haiku", - "bcp_47":"zh", + "model":"deepseek\/deepseek-chat", + "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.3027505857 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.5506378818 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.3708866541 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.5846851624 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"zh", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"deepseek\/deepseek-chat", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.1922766267 + "score":0.2377253988 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.4644593408 + "score":0.5405656214 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2227195824 + "score":0.2660263348 }, { - "model":"anthropic\/claude-3-haiku", + "model":"deepseek\/deepseek-chat", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.2687577774 + "score":0.3459946232 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.2126059737 + "score":0.2993048546 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.4462765689 + "score":0.5651988199 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.1836371582 + "score":0.3853695566 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.3778102006 + "score":0.602672086 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bn", "task":"classification", "metric":"accuracy", + "score":1.0 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", "score":0.8 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.0953670352 + "score":0.2798537803 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.3310320034 + "score":0.5394779979 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.024360011 + "score":0.3719179468 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.2108567474 + "score":0.539682577 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.2306746115 + "score":0.3207762021 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.4506487927 + "score":0.5555389401 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.2707214129 + "score":0.4227752207 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.4862384022 + "score":0.6601015066 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.2371132106 + "score":0.5548034204 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.4776904622 + "score":0.7071106777 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.2328322481 + "score":0.6432470265 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.5236467405 + "score":0.8278285651 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.2281708898 + "score":0.3223436235 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.4658048059 + "score":0.5379000389 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.302206669 + "score":0.3061941236 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.5004953235 + "score":0.5918382188 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.2485056339 + "score":0.3297556296 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.4746922491 + "score":0.5925809306 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.3589308099 + "score":0.4619761505 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.5387167593 + "score":0.6734036273 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":1.0 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.176310372 + "score":0.3412169553 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.3718450169 + "score":0.598919602 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.1638320264 + "score":0.3937102354 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.4001947091 + "score":0.6229439454 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.2256160181 + "score":0.3020610187 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.4634218276 + "score":0.5462026627 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.2436068148 + "score":0.299038365 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.5064853457 + "score":0.625735911 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.2136903951 + "score":0.1875636541 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.4243463352 + "score":0.5136106256 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.0970742193 + "score":0.3333377273 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.2293867344 + "score":0.4709407515 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.2789250445 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.52614288 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.2580648249 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.574708573 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.0799150208 + "score":0.2487969868 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.2893419537 + "score":0.518708582 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.0581409044 + "score":0.2238578938 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.2268165833 + "score":0.4748109447 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":0.1 + "score":1.0 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.1097931752 + "score":0.3823229705 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.2693246745 + "score":0.6453681393 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.056288688 + "score":0.3548387061 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.1832547974 + "score":0.5492435889 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.2273037816 + "score":0.3091555064 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.4505835688 + "score":0.5528775735 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.3286476013 + "score":0.3860807525 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.5091531561 + "score":0.6710753294 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.1855609032 + "score":0.2464304597 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.4153884364 + "score":0.5343201712 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.277880372 + "score":0.3592456339 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.4652042594 + "score":0.5816925415 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.0369694992 + "score":0.2438930348 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.240137357 + "score":0.5213646779 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.0127960718 + "score":0.2864351463 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.2182424065 + "score":0.6249321785 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.0940291711 + "score":0.3464968589 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.2872804994 + "score":0.5970733128 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.0398689493 + "score":0.2948086539 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.2075817756 + "score":0.5417485172 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.117904807 + "score":0.2290327476 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.3035503387 + "score":0.5229250115 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.0043914583 + "score":0.3122409611 }, { - "model":"cohere\/command-r", + "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.1973427325 + "score":0.4921734247 }, { - "model":"cohere\/command-r", - "bcp_47":"zh", + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"cohere\/command-r", - "bcp_47":"zh", + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"cohere\/command-r", - "bcp_47":"zh", + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.1818080543 + "score":0.2150236607 }, { - "model":"cohere\/command-r", - "bcp_47":"zh", + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.4402625664 + "score":0.4970978512 }, { - "model":"cohere\/command-r", - "bcp_47":"zh", + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.1788431133 + "score":0.3603191861 }, { - "model":"cohere\/command-r", - "bcp_47":"zh", + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.2242902764 + "score":0.5938509481 }, { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ar", + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"zh", "task":"classification", "metric":"accuracy", + "score":1.0 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", "score":0.9 }, { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ar", + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ar", + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.2660357996 + "score":0.2350235637 }, { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ar", + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.5489237487 + "score":0.528086246 }, { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ar", + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.3431691167 + "score":0.2524231151 }, { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ar", + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.5659025601 + "score":0.3456705882 }, { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bn", + "model":"deepseek\/deepseek-r1", + "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.1 }, { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2999810338 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5315905896 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3568113924 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5379558638 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.3410327538 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5702449417 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.4621824412 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6644677733 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.5530727537 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.7075859327 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.6422296046 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.81811999 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3558328881 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5816613686 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3018683161 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.5862734644 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.3482763208 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5982327963 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.452269216 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.6590312745 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3747394322 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.6079633657 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.4420465241 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.6372857982 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.3120299438 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5421647326 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.3139800299 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6305869448 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.2121435327 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.5165098934 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.3965805608 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.5197322727 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.2594796679 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.4780086047 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.3009966401 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.5761529867 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.2536168105 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.5193737639 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2743904396 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4918283752 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.4307107958 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.6547934264 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.3789232965 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.5490387026 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3491027097 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.5771246685 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.3737440632 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6547073078 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2584319121 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.5177022275 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.371218137 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5948906549 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.2583542451 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.5157482031 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.3458513734 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.634706105 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.3887890595 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.6100201392 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.3146346509 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5476489231 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.2324983634 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.497221173 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2976387517 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.4761547661 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.3027505857 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.5506378818 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.3708866541 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.5846851624 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.2377253988 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.5405656214 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2660263348 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3459946232 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.2993048546 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.5651988199 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.3853695566 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.602672086 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2798537803 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5394779979 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3719179468 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.539682577 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.3207762021 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5555389401 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.4227752207 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6601015066 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.569880154 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.7141874172 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.6432470265 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.8278285651 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3223436235 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5379000389 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3061941236 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.5918382188 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.3297556296 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5925809306 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.4619761505 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.6734036273 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3670345278 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.6162795112 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.3937102354 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.6229439454 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.3020610187 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5462026627 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.299038365 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.625735911 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.1875636541 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.5136106256 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.3333377273 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4709407515 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.2789250445 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.52614288 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.2580648249 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.574708573 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.2487969868 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.518708582 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2238578938 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4748109447 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.4358296524 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.6732700357 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.3548387061 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.5492435889 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3091555064 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.5528775735 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.3860807525 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6710753294 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2464304597 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.5343201712 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.3592456339 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5816925415 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.2438930348 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.5213646779 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.2864351463 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.6249321785 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.3465514125 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.588783736 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.2948086539 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5417485172 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.2290327476 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.5229250115 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.3122409611 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.4921734247 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.2150236607 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.4970978512 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.3603191861 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.5938509481 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.2350235637 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.528086246 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2524231151 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3456705882 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.2238237549 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.4534366926 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.2689055687 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.4149416248 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2443635406 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.4676159664 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.2223182846 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.3685961254 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.2633055293 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.451378667 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.2576565152 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.3874723625 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.3516668412 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.4533116867 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.5868227988 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.7574063883 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.2163623393 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.4705179867 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.2458237388 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.426538099 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.3182982487 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5389072956 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.47314841 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.6884710951 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.2751556219 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.4216960004 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.2827365983 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.5128198247 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.223787985 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.485186041 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.3571004344 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6398491182 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.1711796281 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.4108641598 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.2194500975 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.3589526769 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.1994883012 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.3942042616 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.2570478693 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.5215463463 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.1828389227 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.3179756072 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2129586558 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4314516197 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.252814761 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.4131775231 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.2671823746 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.3966391033 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.2348823133 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.4500923911 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.4182790857 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.649967582 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.1839314203 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.4234191674 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.3489148579 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5567945257 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.2041105012 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.3657796945 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.3130978532 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.5950962977 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.1200373123 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.3196364935 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.1814754432 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.3157059838 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.181272453 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.386776605 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2603415771 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.4241396601 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.1705385375 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.3747437419 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.257036702 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.4598854693 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.2137844239 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.5028557922 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.3129908127 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3710290799 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.3412745465 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.5826925715 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.390058702 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.597901367 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2988263355 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5680625724 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3895386095 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5693830083 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.4245142301 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.6390046108 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.4937713215 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6856946146 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.5700014681 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.7211557733 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.6711231356 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.8279616884 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3344072959 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5751886204 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3905772718 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.6166676981 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.3411643102 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5823324237 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.549646027 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.7334440232 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.4434404142 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.6607832834 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.4308263411 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.6353494719 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.3626330278 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5869542517 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.4368014195 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6964617832 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.3331002174 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.579659503 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.336969824 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4913494739 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.3700621486 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.5837249923 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.3088767184 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.5884562937 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.3338185652 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.5787491818 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2806196555 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4516145469 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.4372599799 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.658993109 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.4174871385 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.5931595705 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3198419121 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.578067895 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.5056809967 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6966172136 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2839962776 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.5461994333 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.4589390255 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.6469214364 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.4043048116 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.5944297519 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.499303533 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.7212747243 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.4243816819 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.6336752528 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.3644641049 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5789291672 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.31244285 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.5630479496 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2911146353 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.4826415387 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.3436662566 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.59026429 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.447007323 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.6624838094 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.3075560511 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.5721434339 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.3628421643 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3921364269 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.2723646085 + "score":0.2238237549 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.523730035 + "score":0.4534366926 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.3878873306 + "score":0.2689055687 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.592059186 + "score":0.4149416248 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.2 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"deepseek\/deepseek-r1", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.2672489266 + "score":0.2443635406 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.4952818431 + "score":0.4676159664 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.3639249804 + "score":0.2223182846 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.5538942616 + "score":0.3685961254 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"de", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"de", - "task":"mmlu", + "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", + "bcp_47":"de", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.3382313883 + "score":0.2633055293 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.5705047774 + "score":0.451378667 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.5354923841 + "score":0.2576565152 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.7125417889 + "score":0.3874723625 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"en", "task":"classification", "metric":"accuracy", + "score":0.1 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.4617498931 + "score":0.3974650186 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.6321211549 + "score":0.5373829936 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.5951204739 + "score":0.5868227988 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.7613397345 + "score":0.7574063883 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"deepseek\/deepseek-r1", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.3547349164 + "score":0.2163623393 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5728186386 + "score":0.4705179867 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3576086442 + "score":0.2458237388 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.6061019948 + "score":0.426538099 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"fr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.1 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"fr", - "task":"mmlu", + "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", + "bcp_47":"fr", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.2978444639 + "score":0.3182982487 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.5625976718 + "score":0.5389072956 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.5244850062 + "score":0.47314841 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.7081676298 + "score":0.6884710951 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.2 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.1 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.4286739426 + "score":0.2684001499 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.6359453866 + "score":0.416230929 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.4344801023 + "score":0.2827365983 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.6474630132 + "score":0.5128198247 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"id", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.2 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.3126497326 + "score":0.223787985 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.5733116043 + "score":0.485186041 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.3882402101 + "score":0.3571004344 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6624914478 + "score":0.6398491182 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ja", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.2 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ja", - "task":"mmlu", + "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", + "bcp_47":"ja", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.289402493 + "score":0.1711796281 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.5452343766 + "score":0.4108641598 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.1879084772 + "score":0.2194500975 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.3787664659 + "score":0.3589526769 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.3561823456 + "score":0.1994883012 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.5619277442 + "score":0.3942042616 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.2609272138 + "score":0.2570478693 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.5344454302 + "score":0.5215463463 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.1 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.2906692433 + "score":0.1828389227 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.5058194876 + "score":0.3179756072 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.2450137243 + "score":0.2129586558 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.4736891146 + "score":0.4314516197 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.1 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.3792889323 + "score":0.252814761 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6001893022 + "score":0.4131775231 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.4267930078 + "score":0.2671823746 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.583867208 + "score":0.3966391033 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"pt", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.1 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.3275326122 + "score":0.2348823133 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.5672682921 + "score":0.4500923911 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4390630519 + "score":0.4182790857 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6588223005 + "score":0.649967582 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ru", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.1 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"deepseek\/deepseek-r1", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.266439807 + "score":0.1839314203 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.5101737799 + "score":0.4234191674 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.4085478092 + "score":0.3489148579 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.5658655698 + "score":0.5567945257 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.1 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"sw", - "task":"mmlu", + "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", + "bcp_47":"sw", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.2763965367 + "score":0.2041105012 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.520411275 + "score":0.3657796945 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.4350912598 + "score":0.3130978532 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.6853284539 + "score":0.5950962977 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.1 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"deepseek\/deepseek-r1", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.3588325355 + "score":0.1200373123 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.5889091872 + "score":0.3196364935 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.3231474889 + "score":0.1814754432 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.5470305035 + "score":0.3157059838 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.2859928961 + "score":0.181272453 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.5295533045 + "score":0.386776605 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.3010091175 + "score":0.2603415771 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.493353195 + "score":0.4241396601 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"vi", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.3170290484 + "score":0.1705385375 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.5491010642 + "score":0.3747437419 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.4032471641 + "score":0.257036702 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.6394566771 + "score":0.4598854693 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"zh", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.1 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"zh", - "task":"mmlu", + "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", + "bcp_47":"zh", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.2557280993 + "score":0.2137844239 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.5212143675 + "score":0.5028557922 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2610248692 + "score":0.3129908127 }, { - "model":"google\/gemini-2.0-flash-lite-001", + "model":"deepseek\/deepseek-r1", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.314329989 + "score":0.3710290799 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.3462011916 + "score":0.3412745465 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5813984303 + "score":0.5826925715 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.4319399394 + "score":0.390058702 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.6269389132 + "score":0.597901367 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":1.0 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.3332105938 + "score":0.2988263355 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.5788748413 + "score":0.5680625724 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.3925309816 + "score":0.3895386095 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.5671456285 + "score":0.5693830083 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"de", - "task":"mmlu", + "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"de", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.0-flash-001", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.4080538745 + "score":0.4245142301 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.6278026445 + "score":0.6390046108 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.4940498755 + "score":0.4937713215 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6915113583 + "score":0.6856946146 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"en", - "task":"mmlu", + "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"en", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.0-flash-001", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.5780592594 + "score":0.5700014681 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.727062887 + "score":0.7211557733 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.725042796 + "score":0.6711231356 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.868453404 + "score":0.8279616884 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.0-flash-001", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.3228577771 + "score":0.3344072959 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5691176688 + "score":0.5751886204 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.4211069488 + "score":0.3905772718 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.6329026504 + "score":0.6166676981 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"fr", - "task":"mmlu", + "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"fr", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.0-flash-001", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.3683003934 + "score":0.3411643102 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.5961800274 + "score":0.5823324237 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.5899042948 + "score":0.549646027 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.7506595836 + "score":0.7334440232 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.4661333335 + "score":0.4434404142 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.6599650679 + "score":0.6607832834 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.4319599559 + "score":0.4308263411 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.6493733048 + "score":0.6353494719 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"id", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.3507436052 + "score":0.3626330278 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.5856872774 + "score":0.5869542517 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.387557065 + "score":0.4368014195 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6730969345 + "score":0.6964617832 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.3590987818 + "score":0.3331002174 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.5958999814 + "score":0.579659503 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.4151803591 + "score":0.336969824 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.5319028037 + "score":0.4913494739 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.3793672083 + "score":0.3700621486 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.6054760312 + "score":0.5837249923 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.3161702144 + "score":0.3088767184 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.5956895972 + "score":0.5884562937 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.2888720948 + "score":0.3338185652 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.5411662197 + "score":0.5787491818 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.2820351359 + "score":0.2806196555 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.5017455952 + "score":0.4516145469 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.4833653569 + "score":0.4372599799 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6877252787 + "score":0.658993109 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.4959043786 + "score":0.4174871385 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.631939007 + "score":0.5931595705 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.4185968559 + "score":0.3198419121 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.6194298291 + "score":0.578067895 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.5066913819 + "score":0.5056809967 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.7045563325 + "score":0.6966172136 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ru", - "task":"mmlu", + "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ru", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2835606416 + "score":0.2839962776 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.5525332546 + "score":0.5461994333 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.5294387768 + "score":0.4589390255 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.6814959444 + "score":0.6469214364 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.0-flash-001", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.4452506057 + "score":0.4043048116 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.6299322033 + "score":0.5944297519 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.4773237879 + "score":0.499303533 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.6983325706 + "score":0.7212747243 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"te", "task":"classification", "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.4838983303 + "score":0.4243816819 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.6878651593 + "score":0.6336752528 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.4225689448 + "score":0.3644641049 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.6171822766 + "score":0.5789291672 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.3194163699 + "score":0.31244285 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.5758075077 + "score":0.5630479496 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.2609063103 + "score":0.2911146353 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.4351160146 + "score":0.4826415387 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"vi", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.4227968054 + "score":0.3436662566 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.6326522416 + "score":0.59026429 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.4693432911 + "score":0.447007323 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.6659480306 + "score":0.6624838094 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"zh", "task":"classification", "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.2632355471 + "score":0.3075560511 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.5527183044 + "score":0.5721434339 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.3128970722 + "score":0.3628421643 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-2.0-flash-001", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.3699956681 + "score":0.3921364269 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.3509446122 + "score":0.2723646085 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5872861306 + "score":0.523730035 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.4369226272 + "score":0.3878873306 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.6357392299 + "score":0.592059186 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.2928560114 + "score":0.2672489266 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.5373877759 + "score":0.4952818431 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.371713483 + "score":0.3639249804 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.5467648432 + "score":0.5538942616 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"de", - "task":"mmlu", + "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"de", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.3827187056 + "score":0.3382313883 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.6092943987 + "score":0.5705047774 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.5181328066 + "score":0.5354923841 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6981190119 + "score":0.7125417889 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.5942868447 + "score":0.4617498931 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.7298379077 + "score":0.6321211549 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.7309888475 + "score":0.5951204739 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.8534239972 + "score":0.7613397345 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"es", - "task":"mmlu", + "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"es", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.3497891701 + "score":0.3547349164 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5889817614 + "score":0.5728186386 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.395885417 + "score":0.3576086442 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.6273835234 + "score":0.6061019948 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.3737227872 + "score":0.2978444639 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.6039484183 + "score":0.5625976718 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.5555743798 + "score":0.5244850062 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.7229519037 + "score":0.7081676298 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.4387215588 + "score":0.4286739426 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.6443095169 + "score":0.6359453866 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.4255607348 + "score":0.4344801023 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.6457270128 + "score":0.6474630132 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"id", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.3789722313 + "score":0.3126497326 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.5973074889 + "score":0.5733116043 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.406320567 + "score":0.3882402101 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6828299565 + "score":0.6624914478 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ja", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.3423388267 + "score":0.289402493 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.5884893314 + "score":0.5452343766 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.3740472584 + "score":0.1879084772 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.4832664839 + "score":0.3787664659 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.3540174328 + "score":0.3561823456 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.5513315973 + "score":0.5619277442 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.329885986 + "score":0.2609272138 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.5805485408 + "score":0.5344454302 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.3055522766 + "score":0.2906692433 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.5435968357 + "score":0.5058194876 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.257460039 + "score":0.2450137243 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.4845716024 + "score":0.4736891146 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.4182659606 + "score":0.3792889323 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6696655487 + "score":0.6001893022 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.4665320226 + "score":0.4267930078 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.6280330147 + "score":0.583867208 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pt", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.3485890022 + "score":0.3275326122 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.574915747 + "score":0.5672682921 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4760104707 + "score":0.4390630519 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6891420805 + "score":0.6588223005 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2893557086 + "score":0.266439807 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.5514512546 + "score":0.5101737799 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.4910352633 + "score":0.4085478092 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.6636379715 + "score":0.5658655698 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.397583037 + "score":0.2763965367 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.6014699575 + "score":0.520411275 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.4540021563 + "score":0.4350912598 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.7002830052 + "score":0.6853284539 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"te", - "task":"mmlu", + "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"te", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.4025505277 + "score":0.3528600199 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.6339036498 + "score":0.5850279702 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.4166086163 + "score":0.3231474889 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.6171953291 + "score":0.5470305035 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.3212777323 + "score":0.2859928961 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.5735084601 + "score":0.5295533045 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.3027958989 + "score":0.3010091175 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.4912965943 + "score":0.493353195 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.3471784526 + "score":0.3170290484 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.5719753053 + "score":0.5491010642 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.4601231177 + "score":0.4032471641 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.6644848789 + "score":0.6394566771 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.2989639846 + "score":0.2557280993 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.5646599732 + "score":0.5212143675 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2915310914 + "score":0.2610248692 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.3358004159 + "score":0.314329989 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.2747843596 + "score":0.3462011916 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5519960681 + "score":0.5813984303 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.2863967069 + "score":0.4319399394 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.5318173199 + "score":0.6269389132 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"bn", "task":"classification", "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.271237739 + "score":0.3332105938 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.5173954387 + "score":0.5788748413 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.3067537945 + "score":0.3925309816 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.5194482945 + "score":0.5671456285 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.3189602129 + "score":0.4080538745 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.5548503533 + "score":0.6278026445 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.5229096392 + "score":0.4940498755 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.7023434262 + "score":0.6915113583 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"en", - "task":"mmlu", + "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"en", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.3889146477 + "score":0.5780592594 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.628092835 + "score":0.727062887 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.4660772497 + "score":0.725042796 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.7280386297 + "score":0.868453404 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"es", "task":"classification", "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.3593767686 + "score":0.3228577771 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5668073679 + "score":0.5691176688 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3662275621 + "score":0.4211069488 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.6102640711 + "score":0.6329026504 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.3180384008 + "score":0.3683003934 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.5571267732 + "score":0.5961800274 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.5778354146 + "score":0.5899042948 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.7490356238 + "score":0.7506595836 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.3722751955 + "score":0.4661333335 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.6207213131 + "score":0.6599650679 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.3378499277 + "score":0.4319599559 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.554090013 + "score":0.6493733048 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"id", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.3583744222 + "score":0.3507436052 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.5732194975 + "score":0.5856872774 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.3364664006 + "score":0.387557065 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6438910651 + "score":0.6730969345 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ja", "task":"classification", "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.2581140706 + "score":0.3590987818 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.5395853617 + "score":0.5958999814 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.3298839393 + "score":0.4151803591 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.4471547552 + "score":0.5319028037 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.3177915441 + "score":0.3793672083 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.5387853038 + "score":0.6054760312 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.2549228547 + "score":0.3161702144 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.5322440265 + "score":0.5956895972 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.2370074805 + "score":0.2888720948 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.4804215458 + "score":0.5411662197 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.2399769139 + "score":0.2820351359 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.4726429935 + "score":0.5017455952 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.4180718844 + "score":0.4833653569 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6426219278 + "score":0.6877252787 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.3938693136 + "score":0.4959043786 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.5573992167 + "score":0.631939007 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.3537745123 + "score":0.4185968559 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.5904429929 + "score":0.6194298291 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4588664196 + "score":0.5066913819 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6844540285 + "score":0.7045563325 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2449777422 + "score":0.2835606416 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.5268764903 + "score":0.5525332546 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.3789708434 + "score":0.5294387768 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.5790333031 + "score":0.6814959444 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.3005035588 + "score":0.4452506057 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.5210660172 + "score":0.6299322033 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.3401968092 + "score":0.4773237879 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.6217197146 + "score":0.6983325706 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.3330093484 + "score":0.4838983303 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.5694168709 + "score":0.6878651593 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.2667299931 + "score":0.4225689448 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.5208033902 + "score":0.6171822766 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.322448107 + "score":0.3194163699 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.5887654616 + "score":0.5758075077 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.2148139783 + "score":0.2609063103 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.4226865444 + "score":0.4351160146 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"vi", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.2745000434 + "score":0.4227968054 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.5206422805 + "score":0.6326522416 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.4261790941 + "score":0.4693432911 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.6358462464 + "score":0.6659480306 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"zh", "task":"classification", "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.2201641871 + "score":0.2632355471 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.5051068628 + "score":0.5527183044 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2508351517 + "score":0.3128970722 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/gemini-2.5-flash-preview", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.3110461024 + "score":0.3699956681 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.260683336 + "score":0.3509446122 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5383651277 + "score":0.5872861306 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.2962406565 + "score":0.4369226272 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.5190026627 + "score":0.6357392299 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.2183929994 + "score":0.2928560114 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.4877941086 + "score":0.5373877759 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.2700916391 + "score":0.371713483 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.5150577414 + "score":0.5467648432 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"de", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.3440655166 + "score":0.3827187056 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.563902418 + "score":0.6092943987 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.4305522274 + "score":0.5181328066 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6477508732 + "score":0.6981190119 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.4113463435 + "score":0.5942868447 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.6260248317 + "score":0.7298379077 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.5310035709 + "score":0.7309888475 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.7595845064 + "score":0.8534239972 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.3031284355 + "score":0.3497891701 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5241309352 + "score":0.5889817614 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3289699508 + "score":0.395885417 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.5811203167 + "score":0.6273835234 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.3349110908 + "score":0.3737227872 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.564806297 + "score":0.6039484183 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.4574014191 + "score":0.5555743798 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.6626552528 + "score":0.7229519037 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.3184042229 + "score":0.4387215588 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.5712698408 + "score":0.6443095169 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.3659029431 + "score":0.4255607348 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.5796195236 + "score":0.6457270128 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"id", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.2301748885 + "score":0.3789722313 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.4682741896 + "score":0.5973074889 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.3346592082 + "score":0.406320567 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6514874668 + "score":0.6828299565 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ja", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.2664966821 + "score":0.3423388267 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.5386982677 + "score":0.5884893314 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.2651365589 + "score":0.3740472584 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.409095006 + "score":0.4832664839 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.2326358655 + "score":0.3540174328 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.4815897231 + "score":0.5513315973 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.2187928356 + "score":0.329885986 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.4896578943 + "score":0.5805485408 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.2256246926 + "score":0.3055522766 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.4703189943 + "score":0.5435968357 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.2254492518 + "score":0.257460039 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.447826525 + "score":0.4845716024 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.3706063992 + "score":0.4182659606 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6167676482 + "score":0.6696655487 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.4234596823 + "score":0.4665320226 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.5629443923 + "score":0.6280330147 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"pt", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.2971403532 + "score":0.3485890022 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.5321068893 + "score":0.574915747 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4364286549 + "score":0.4760104707 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6509885745 + "score":0.6891420805 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2300270544 + "score":0.2893557086 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.4839384065 + "score":0.5514512546 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.4236492288 + "score":0.4910352633 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.6116207052 + "score":0.6636379715 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.2501435914 + "score":0.397583037 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.5088299265 + "score":0.6014699575 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.262372343 + "score":0.4540021563 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.5806899403 + "score":0.7002830052 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.3568851036 + "score":0.4025505277 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.5825326367 + "score":0.6339036498 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.3112091725 + "score":0.4166086163 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.5431414206 + "score":0.6171953291 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.2402951661 + "score":0.3212777323 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.5033005385 + "score":0.5735084601 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.2302239803 + "score":0.3027958989 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.4066956434 + "score":0.4912965943 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"vi", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.2537752957 + "score":0.3471784526 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.5073147534 + "score":0.5719753053 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.3583753747 + "score":0.4601231177 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.6253917282 + "score":0.6644848789 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.249810194 + "score":0.2989639846 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.5151255506 + "score":0.5646599732 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.1854861198 + "score":0.2915310914 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.2463326959 + "score":0.3358004159 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.2170512634 + "score":0.2747843596 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.4780493622 + "score":0.5519960681 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.2844387275 + "score":0.2863967069 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.4979841291 + "score":0.5318173199 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.1842659234 + "score":0.271237739 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.4331372452 + "score":0.5173954387 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.2644024965 + "score":0.3067537945 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.462996411 + "score":0.5194482945 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.2470808909 + "score":0.3189602129 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.4959472303 + "score":0.5548503533 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.3105509871 + "score":0.5229096392 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.5118330277 + "score":0.7023434262 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.4810009804 + "score":0.3889146477 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.6244199222 + "score":0.628092835 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.6112171119 + "score":0.4660772497 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.8046069838 + "score":0.7280386297 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.8 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.1990314823 + "score":0.3593767686 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.4557184175 + "score":0.5668073679 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3019092795 + "score":0.3662275621 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.5373780713 + "score":0.6102640711 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.1741359314 + "score":0.3180384008 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.4175910256 + "score":0.5571267732 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.4029915732 + "score":0.5778354146 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.618701934 + "score":0.7490356238 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":1.0 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.3016091966 + "score":0.3722751955 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.5281403748 + "score":0.6207213131 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.2736270122 + "score":0.3378499277 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.5159089396 + "score":0.554090013 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.1930481943 + "score":0.3583744222 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.420733839 + "score":0.5732194975 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.344885086 + "score":0.3364664006 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6497564532 + "score":0.6438910651 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ja", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.2254647544 + "score":0.2581140706 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.4810564434 + "score":0.5395853617 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ja", + "task":"translation_to", + "metric":"bleu", + "score":0.3298839393 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ja", "task":"translation_to", + "metric":"chrf", + "score":0.4471547552 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.3177915441 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.5387853038 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"jv", + "task":"translation_to", "metric":"bleu", - "score":0.1874016498 + "score":0.2549228547 }, { - "model":"google\/gemma-2-9b-it", - "bcp_47":"ja", + "model":"google\/gemini-flash-1.5", + "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.2916976073 + "score":0.5322440265 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.1616328428 + "score":0.2370074805 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.414450627 + "score":0.4804215458 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.1123956753 + "score":0.2399769139 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.3542849129 + "score":0.4726429935 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.34113337 + "score":0.4180718844 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.5187033084 + "score":0.6426219278 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.3611338165 + "score":0.3938693136 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.5059192583 + "score":0.5573992167 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.2429635608 + "score":0.3537745123 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.473106986 + "score":0.5904429929 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.2843181347 + "score":0.4588664196 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.5735245279 + "score":0.6844540285 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.1697780011 + "score":0.2449777422 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.4361163813 + "score":0.5268764903 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.3672218817 + "score":0.3789708434 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.5564712622 + "score":0.5790333031 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.188207488 + "score":0.3005035588 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.4728711823 + "score":0.5210660172 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.27482714 + "score":0.3401968092 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.5603409861 + "score":0.6217197146 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.3095013641 + "score":0.3330093484 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.5252619118 + "score":0.5694168709 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.1798656271 + "score":0.257812168 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.4011450412 + "score":0.5167002436 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.137296648 + "score":0.322448107 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.341408254 + "score":0.5887654616 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.0595669677 + "score":0.2148139783 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.201487497 + "score":0.4226865444 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.2745000434 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.5206422805 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.4261790941 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.6358462464 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.8 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.1922618764 + "score":0.2201641871 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.4517742384 + "score":0.5051068628 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2125812397 + "score":0.2508351517 }, { - "model":"google\/gemma-2-9b-it", + "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.2689557345 + "score":0.3110461024 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.2891206499 + "score":0.260683336 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5438550217 + "score":0.5383651277 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.3184721364 + "score":0.2962406565 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.5483731849 + "score":0.5190026627 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.2907230812 + "score":0.2183929994 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.5148223626 + "score":0.4877941086 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.3473636391 + "score":0.2700916391 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.5442574441 + "score":0.5150577414 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.3577876868 + "score":0.3440655166 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.5943423055 + "score":0.563902418 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.5344280565 + "score":0.4305522274 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.7084649844 + "score":0.6477508732 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.452427177 + "score":0.4113463435 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.6525566656 + "score":0.6260248317 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.5467976399 + "score":0.5310035709 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.7780833183 + "score":0.7595845064 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.3554331718 + "score":0.3031284355 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5997743406 + "score":0.5241309352 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3847830842 + "score":0.3289699508 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.6191109047 + "score":0.5811203167 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.2846092378 + "score":0.3349110908 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.5655970541 + "score":0.564806297 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.5192984544 + "score":0.4574014191 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.7020040834 + "score":0.6626552528 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.3520691191 + "score":0.3184042229 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.6035990708 + "score":0.5712698408 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.3987037224 + "score":0.3659029431 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.6195037668 + "score":0.5796195236 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.3377417704 + "score":0.2301748885 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.5674360496 + "score":0.4682741896 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.3534620252 + "score":0.3346592082 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6680177029 + "score":0.6514874668 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.2481856237 + "score":0.2664966821 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.5180749152 + "score":0.5386982677 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.242508046 + "score":0.2651365589 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.4046420215 + "score":0.409095006 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.3019627022 + "score":0.2326358655 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.5133980923 + "score":0.4815897231 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.2316517545 + "score":0.2187928356 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.5189963647 + "score":0.4896578943 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.3395095603 + "score":0.2256246926 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.5877742809 + "score":0.4703189943 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.2222923122 + "score":0.2254492518 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.4572688692 + "score":0.447826525 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.4164890636 + "score":0.3706063992 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6375470445 + "score":0.6167676482 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.4166823661 + "score":0.4234596823 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.575314128 + "score":0.5629443923 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.340245547 + "score":0.2971403532 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.5681284927 + "score":0.5321068893 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4702737577 + "score":0.4364286549 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6903236014 + "score":0.6509885745 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2862936285 + "score":0.2300270544 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.5264436928 + "score":0.4839384065 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.4800957551 + "score":0.4236492288 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.6618495803 + "score":0.6116207052 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.268709657 + "score":0.2501435914 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.5306834056 + "score":0.5088299265 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.3395981599 + "score":0.262372343 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.6130756934 + "score":0.5806899403 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.279412176 + "score":0.3568851036 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.5419647491 + "score":0.5825326367 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.259866454 + "score":0.3112091725 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.48543634 + "score":0.5431414206 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.2595582459 + "score":0.2402951661 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.5043992681 + "score":0.5033005385 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.2142625601 + "score":0.2302239803 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.429749938 + "score":0.4066956434 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.2279880384 + "score":0.2537752957 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.4835933272 + "score":0.5073147534 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.3478085621 + "score":0.3583753747 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.5968604742 + "score":0.6253917282 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.1900086584 + "score":0.249810194 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.4895930442 + "score":0.5151255506 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2395565562 + "score":0.1854861198 }, { - "model":"google\/gemma-3-27b-it", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.3237759485 + "score":0.2463326959 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.0837265107 + "score":0.2891206499 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.3012065838 + "score":0.5438550217 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.1120670716 + "score":0.3184721364 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.2771296913 + "score":0.5483731849 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemma-3-27b-it", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.021858254 + "score":0.2907230812 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.2327499821 + "score":0.5148223626 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.0253088472 + "score":0.3473636391 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.1918662187 + "score":0.5442574441 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"de", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemma-3-27b-it", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.1784955678 + "score":0.3577876868 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.4008799371 + "score":0.5943423055 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.1782663616 + "score":0.5344280565 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.4070825897 + "score":0.7084649844 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"en", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemma-3-27b-it", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.2797104835 + "score":0.452427177 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.4601471921 + "score":0.6525566656 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.3392404298 + "score":0.5467976399 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.5518674496 + "score":0.7780833183 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemma-3-27b-it", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.1682956348 + "score":0.3554331718 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.3705606944 + "score":0.5997743406 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.1595479626 + "score":0.3847830842 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.4040956812 + "score":0.6191109047 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemma-3-27b-it", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.1362099506 + "score":0.2846092378 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.4039646029 + "score":0.5655970541 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.2151785904 + "score":0.5192984544 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.4309035319 + "score":0.7020040834 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.5 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.1658307051 + "score":0.3520691191 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.3676480008 + "score":0.6035990708 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.1711676323 + "score":0.3987037224 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.360371738 + "score":0.6195037668 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.1399603895 + "score":0.3377417704 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.3604113675 + "score":0.5674360496 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.1312450274 + "score":0.3534620252 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.3988827234 + "score":0.6680177029 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ja", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemma-3-27b-it", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.4 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.1222242366 + "score":0.2481856237 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.3415186168 + "score":0.5180749152 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.1607198845 + "score":0.242508046 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.2706371796 + "score":0.4046420215 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.0550016522 + "score":0.3019627022 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.2468917982 + "score":0.5133980923 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.017690474 + "score":0.2316517545 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.2536001746 + "score":0.5189963647 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.0411296617 + "score":0.3395095603 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.2442292695 + "score":0.5877742809 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.0482775033 + "score":0.2222923122 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.2211003771 + "score":0.4572688692 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.2292007848 + "score":0.4164890636 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.3716655897 + "score":0.6375470445 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.2126702079 + "score":0.4166823661 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.3248815955 + "score":0.575314128 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"pt", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.1013445398 + "score":0.340245547 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.3063125264 + "score":0.5681284927 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.2231857524 + "score":0.4702737577 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.4401829864 + "score":0.6903236014 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ru", "task":"classification", "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.1005466956 + "score":0.2862936285 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.3356518748 + "score":0.5264436928 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.1363185356 + "score":0.4800957551 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.3769111636 + "score":0.6618495803 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemma-3-27b-it", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.6 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.0510060878 + "score":0.268709657 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.2132741272 + "score":0.5306834056 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.0089146903 + "score":0.3395981599 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.2314109768 + "score":0.6130756934 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":0.2 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemma-3-27b-it", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.2 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.1096331511 + "score":0.2787922254 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.2888090685 + "score":0.5420797212 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.1206114883 + "score":0.259866454 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.2597106436 + "score":0.48543634 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.0891185343 + "score":0.2595582459 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.281793335 + "score":0.5043992681 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.0799576366 + "score":0.2142625601 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.2661903898 + "score":0.429749938 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"vi", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.4 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.1389065496 + "score":0.2279880384 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.3708687542 + "score":0.4835933272 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.1661316612 + "score":0.3478085621 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.354399593 + "score":0.5968604742 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"zh", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemma-3-27b-it", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.1133138775 + "score":0.1900086584 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.3754190494 + "score":0.4895930442 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.1672636279 + "score":0.2395565562 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"google\/gemma-3-27b-it", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.232325354 + "score":0.3237759485 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.4 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.2609114367 + "score":0.0837265107 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5167379854 + "score":0.3012065838 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.1932802581 + "score":0.1120670716 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.4648835751 + "score":0.2771296913 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.3 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.1938367121 + "score":0.021858254 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.4674774016 + "score":0.2327499821 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.2073541352 + "score":0.0253088472 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.4438396219 + "score":0.1918662187 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.2685056004 + "score":0.1784955678 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.506818165 + "score":0.4008799371 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.3801657831 + "score":0.1782663616 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6056477234 + "score":0.4070825897 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.2216031518 + "score":0.2797104835 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.4453474399 + "score":0.4601471921 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.3322887566 + "score":0.3392404298 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.5824514758 + "score":0.5518674496 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.1640783778 + "score":0.1682956348 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.434749516 + "score":0.3705606944 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3126220052 + "score":0.1595479626 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.6038883227 + "score":0.4040956812 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"fr", + "task":"classification", + "metric":"accuracy", + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.2261588318 + "score":0.1362099506 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.4944996319 + "score":0.4039646029 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.4626001556 + "score":0.2151785904 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.6614963779 + "score":0.4309035319 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.6 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.2 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.3316295853 + "score":0.1658307051 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.5594083443 + "score":0.3676480008 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.365274938 + "score":0.1711676323 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.588222932 + "score":0.360371738 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.3 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.2236143729 + "score":0.1399603895 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.4912948296 + "score":0.3604113675 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.2910526755 + "score":0.1312450274 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6005590773 + "score":0.3988827234 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.1 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.2131020144 + "score":0.1222242366 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.4845704057 + "score":0.3415186168 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.1446650781 + "score":0.1607198845 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.2292145443 + "score":0.2706371796 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.1840709267 + "score":0.0550016522 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.4042090141 + "score":0.2468917982 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.1902389614 + "score":0.017690474 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.4796942089 + "score":0.2536001746 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.2152417217 + "score":0.0411296617 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.4578207034 + "score":0.2442292695 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.1654073391 + "score":0.0482775033 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.3941079443 + "score":0.2211003771 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.1891048622 + "score":0.2292007848 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.4233257981 + "score":0.3716655897 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.2086330089 + "score":0.2126702079 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.364326938 + "score":0.3248815955 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.2256776552 + "score":0.1013445398 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.4793769886 + "score":0.3063125264 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.3320822339 + "score":0.2231857524 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6132478102 + "score":0.4401829864 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.3 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.1730354472 + "score":0.1005466956 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.454951133 + "score":0.3356518748 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.3073459183 + "score":0.1363185356 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.554890569 + "score":0.3769111636 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.5 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.1798653655 + "score":0.0510060878 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.4290347271 + "score":0.2132741272 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.1709237193 + "score":0.0089146903 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.522080463 + "score":0.2314109768 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.2 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.1 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.2507091988 + "score":0.1096331511 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.5067409185 + "score":0.2888090685 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.2998213366 + "score":0.1206114883 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.5112516267 + "score":0.2597106436 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.5 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.194871957 + "score":0.0891185343 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.4838690709 + "score":0.281793335 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.1908512613 + "score":0.0799576366 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.3855814375 + "score":0.2661903898 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.2 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.2003733128 + "score":0.1389065496 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.4613442635 + "score":0.3708687542 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.3326058501 + "score":0.1661316612 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.5496097026 + "score":0.354399593 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.3 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.1869648376 + "score":0.1133138775 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.4664675781 + "score":0.3754190494 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.1696786467 + "score":0.1672636279 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.2325846099 + "score":0.232325354 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":1.0 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.1934231192 + "score":0.2609114367 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.4070435279 + "score":0.5167379854 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.1705154843 + "score":0.1932802581 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.4152198662 + "score":0.4648835751 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.1488002659 + "score":0.1938367121 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.3745722385 + "score":0.4674774016 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.1902972944 + "score":0.2073541352 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.3816229412 + "score":0.4438396219 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.1785474661 + "score":0.2685056004 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.4370657832 + "score":0.506818165 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.2574134539 + "score":0.3801657831 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.5270318561 + "score":0.6056477234 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.3528640776 + "score":0.2216031518 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.5518750903 + "score":0.4429472312 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.4474952526 + "score":0.3322887566 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.6711323725 + "score":0.5824514758 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.1873108686 + "score":0.1640783778 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.4130794041 + "score":0.434749516 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.233152279 + "score":0.3126220052 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.5457724764 + "score":0.6038883227 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.1823885989 + "score":0.2261588318 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.4281583879 + "score":0.4944996319 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.3356151999 + "score":0.4626001556 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.5339127213 + "score":0.6614963779 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.2985177739 + "score":0.3316295853 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.5122033781 + "score":0.5594083443 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.2288990675 + "score":0.3598049012 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.4641914485 + "score":0.5828568956 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"id", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"id", + "task":"translation_from", + "metric":"bleu", + "score":0.2236143729 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"id", + "task":"translation_from", + "metric":"chrf", + "score":0.4912948296 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"id", + "task":"translation_to", + "metric":"bleu", + "score":0.2910526755 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"id", + "task":"translation_to", + "metric":"chrf", + "score":0.6005590773 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ja", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"meta-llama\/llama-3-8b-instruct", - "bcp_47":"id", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.1513573791 + "score":0.2131020144 }, { - "model":"meta-llama\/llama-3-8b-instruct", - "bcp_47":"id", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.4126710531 + "score":0.4845704057 }, { - "model":"meta-llama\/llama-3-8b-instruct", - "bcp_47":"id", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.2153271782 + "score":0.1446650781 }, { - "model":"meta-llama\/llama-3-8b-instruct", - "bcp_47":"id", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.5281145734 + "score":0.2292145443 }, { - "model":"meta-llama\/llama-3-8b-instruct", - "bcp_47":"ja", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-8b-instruct", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-8b-instruct", - "bcp_47":"ja", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.1685400892 + "score":0.1840709267 }, { - "model":"meta-llama\/llama-3-8b-instruct", - "bcp_47":"ja", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.4152307643 + "score":0.4042090141 }, { - "model":"meta-llama\/llama-3-8b-instruct", - "bcp_47":"ja", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.133960866 + "score":0.1902389614 }, { - "model":"meta-llama\/llama-3-8b-instruct", - "bcp_47":"ja", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.2277435316 + "score":0.4796942089 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.1507564245 + "score":0.2152417217 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.3990456964 + "score":0.4578207034 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.1244036875 + "score":0.1654073391 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.3211879294 + "score":0.3941079443 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.2304909323 + "score":0.1891048622 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.4895854606 + "score":0.4238091524 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.2411212371 + "score":0.2086330089 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.4357494606 + "score":0.364326938 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.1707431977 + "score":0.2256776552 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.41325613 + "score":0.4793769886 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.3010483452 + "score":0.3320822339 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.5494427262 + "score":0.6132478102 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.1269147932 + "score":0.1730354472 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.4169665509 + "score":0.454951133 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.2000369037 + "score":0.3073459183 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.4178019923 + "score":0.554890569 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.6 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.1181160377 + "score":0.1798653655 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.3506298599 + "score":0.4290347271 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.080193685 + "score":0.1709237193 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.431036918 + "score":0.522080463 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.5 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.1477600607 + "score":0.2507091988 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.4004241369 + "score":0.5067409185 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.2008915859 + "score":0.2998213366 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.3876603494 + "score":0.5112516267 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.1383930526 + "score":0.194871957 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.3847225326 + "score":0.4838690709 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.1092836008 + "score":0.1908512613 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.2981134702 + "score":0.3855814375 }, { - "model":"meta-llama\/llama-3-8b-instruct", - "bcp_47":"zh", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.2003733128 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.4613442635 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.3326058501 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.5496097026 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"zh", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.1188554471 + "score":0.1869648376 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.393157353 + "score":0.4664675781 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.1823790731 + "score":0.1696786467 }, { - "model":"meta-llama\/llama-3-8b-instruct", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.24906438 + "score":0.2325846099 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -12830,6 +10968,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bn", @@ -12872,6 +11017,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"de", @@ -12914,6 +11066,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"en", @@ -12956,6 +11115,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"es", @@ -12998,6 +11164,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fr", @@ -13124,6 +11297,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ja", @@ -13313,6 +11493,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ru", @@ -13355,6 +11542,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sw", @@ -13397,6 +11591,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"te", @@ -13516,6 +11717,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zh", @@ -13600,6 +11808,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"bn", @@ -13642,6 +11857,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"de", @@ -13684,6 +11906,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"en", @@ -13696,14 +11925,14 @@ "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.4135052232 + "score":0.4124666467 }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.5895253676 + "score":0.582315662 }, { "model":"meta-llama\/llama-3.1-8b-instruct", @@ -13726,6 +11955,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"es", @@ -13768,6 +12004,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"fr", @@ -13836,14 +12079,14 @@ "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.3052977087 + "score":0.332068694 }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.5029439287 + "score":0.5232846835 }, { "model":"meta-llama\/llama-3.1-8b-instruct", @@ -13894,6 +12137,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"ja", @@ -14083,6 +12333,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"ru", @@ -14125,6 +12382,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"sw", @@ -14167,6 +12431,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"te", @@ -14179,14 +12450,14 @@ "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.2231414616 + "score":0.2177874056 }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.4272936469 + "score":0.4228885108 }, { "model":"meta-llama\/llama-3.1-8b-instruct", @@ -14286,6 +12557,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"zh", @@ -14370,6 +12648,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"bn", @@ -14412,6 +12697,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"de", @@ -14454,6 +12746,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"en", @@ -14473,7 +12772,7 @@ "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.2988653266 + "score":0.2861009841 }, { "model":"meta-llama\/llama-3.2-1b-instruct", @@ -14496,6 +12795,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"es", @@ -14538,6 +12844,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"fr", @@ -14664,6 +12977,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"ja", @@ -14781,14 +13101,14 @@ "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.1995473768 + "score":0.2072883352 }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.2187948253 + "score":0.2238489904 }, { "model":"meta-llama\/llama-3.2-1b-instruct", @@ -14853,6 +13173,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"ru", @@ -14895,6 +13222,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"sw", @@ -14937,6 +13271,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"te", @@ -15056,6 +13397,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"zh", @@ -15110,28 +13458,28 @@ "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.2952163543 + "score":0.2997360932 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5478879017 + "score":0.552448295 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.285185681 + "score":0.2837522278 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.5169899121 + "score":0.5150814494 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -15140,40 +13488,47 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.2565907016 + "score":0.2646772038 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.5135642531 + "score":0.5157826791 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.2990673682 + "score":0.331255344 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.5019041301 + "score":0.5093924414 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -15182,6 +13537,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", @@ -15194,28 +13556,28 @@ "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.3039463145 + "score":0.3129672706 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.5396134408 + "score":0.5456543979 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.4769793495 + "score":0.4852409005 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6831000305 + "score":0.6908376394 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -15224,6 +13586,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", @@ -15236,28 +13605,28 @@ "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.5363634713 + "score":0.5510215557 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.6717522975 + "score":0.6909834226 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.6861915605 + "score":0.6679215449 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.8341405041 + "score":0.823262947 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -15266,40 +13635,47 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.3000860461 + "score":0.3139331841 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5397261344 + "score":0.5454623234 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3791924256 + "score":0.3561290923 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.6159217656 + "score":0.599796306 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -15308,47 +13684,54 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.2910966189 + "score":0.2963449909 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.5655611568 + "score":0.5544997379 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.5612286605 + "score":0.5091700689 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.7335958119 + "score":0.690320784 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -15362,35 +13745,35 @@ "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.37963036 + "score":0.385731086 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.6119011461 + "score":0.6034244629 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.394637981 + "score":0.357812342 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.6170944347 + "score":0.5894721809 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -15404,28 +13787,28 @@ "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.3245838511 + "score":0.3143630083 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.5745245116 + "score":0.5621460006 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.3635605483 + "score":0.3372718385 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6486137543 + "score":0.6543894215 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -15434,40 +13817,47 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.2805654401 + "score":0.284322765 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.5274978991 + "score":0.5304019177 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.2667114019 + "score":0.2212745751 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.378544243 + "score":0.355950114 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -15516,63 +13906,63 @@ "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.2718900108 + "score":0.3224419544 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.5102541071 + "score":0.5606155155 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.1858815817 + "score":0.1663847917 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.4329686416 + "score":0.4383662593 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.4659929088 + "score":0.4519562833 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6558133861 + "score":0.6328740374 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.4108150693 + "score":0.3918546765 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.5726366793 + "score":0.551810658 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -15586,35 +13976,35 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.2792136387 + "score":0.3065957195 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.5337079901 + "score":0.5567333989 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4673129305 + "score":0.4450524918 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.666759063 + "score":0.6712742861 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -15623,47 +14013,61 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2072896517 + "score":0.2184907643 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.4846106947 + "score":0.4824691404 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.3413312048 + "score":0.3406091079 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.5652152557 + "score":0.5689518318 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -15677,28 +14081,28 @@ "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.2805870918 + "score":0.2373393477 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.5415843032 + "score":0.5017230165 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.2475190192 + "score":0.2334583695 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.5651818159 + "score":0.5603415221 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -15707,40 +14111,47 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.3394240055 + "score":0.3767393472 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.5797713804 + "score":0.5870286691 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.3029126381 + "score":0.3415510311 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.5385965131 + "score":0.5592933672 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -15754,28 +14165,28 @@ "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.2690513839 + "score":0.252958993 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.5193011981 + "score":0.5053979802 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.2616221013 + "score":0.2339821201 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.4396251456 + "score":0.425306622 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -15826,6 +14237,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", @@ -15838,28 +14256,28 @@ "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.2371072964 + "score":0.2259180607 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.5127276245 + "score":0.5258178103 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.262487605 + "score":0.2474954475 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.3198189866 + "score":0.2841722148 }, { "model":"meta-llama\/llama-4-maverick", @@ -15873,42 +14291,49 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.3127757991 + "score":0.3288891576 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5649158188 + "score":0.560493221 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.336779181 + "score":0.3370289388 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.5507530519 + "score":0.5616376735 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", @@ -15922,28 +14347,28 @@ "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.3069295097 + "score":0.3130233588 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.5513389823 + "score":0.5569163893 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.3483092706 + "score":0.3322667951 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.5205648325 + "score":0.5140240989 }, { "model":"meta-llama\/llama-4-maverick", @@ -15952,6 +14377,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", @@ -15964,28 +14396,28 @@ "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.4025145053 + "score":0.386239845 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.6178207148 + "score":0.6110300223 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.4799077679 + "score":0.4508475568 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6743939478 + "score":0.6653203029 }, { "model":"meta-llama\/llama-4-maverick", @@ -15994,6 +14426,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", @@ -16006,28 +14445,28 @@ "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.515872136 + "score":0.5649266234 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.6957157301 + "score":0.7240609445 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.6059225158 + "score":0.6160790992 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.7832269342 + "score":0.8133281991 }, { "model":"meta-llama\/llama-4-maverick", @@ -16036,6 +14475,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", @@ -16048,28 +14494,28 @@ "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.3316519325 + "score":0.3152773331 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5678715692 + "score":0.5548531112 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3488467992 + "score":0.3335962816 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.590893987 + "score":0.5859881472 }, { "model":"meta-llama\/llama-4-maverick", @@ -16078,6 +14524,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", @@ -16090,28 +14543,28 @@ "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.3433334073 + "score":0.3174175523 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.5948101631 + "score":0.5682518332 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.5065668297 + "score":0.5173973527 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.6980371576 + "score":0.7056428374 }, { "model":"meta-llama\/llama-4-maverick", @@ -16132,28 +14585,28 @@ "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.3914498782 + "score":0.3796955055 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.6153407735 + "score":0.6056742688 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.355440696 + "score":0.382370623 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.5800922474 + "score":0.5922578575 }, { "model":"meta-llama\/llama-4-maverick", @@ -16167,35 +14620,35 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.2363527189 + "score":0.2523550022 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.5367331703 + "score":0.5458277736 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.3735236798 + "score":0.3774331947 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6426286119 + "score":0.6486159416 }, { "model":"meta-llama\/llama-4-maverick", @@ -16204,6 +14657,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", @@ -16216,28 +14676,28 @@ "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.2966381777 + "score":0.3130983776 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.5565004336 + "score":0.5536124921 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.2985554699 + "score":0.3266051607 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.438719005 + "score":0.4444671407 }, { "model":"meta-llama\/llama-4-maverick", @@ -16286,28 +14746,28 @@ "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.3087670334 + "score":0.3125095049 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.5771712483 + "score":0.5535296132 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.2457723977 + "score":0.2275791183 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.4600595438 + "score":0.4569915545 }, { "model":"meta-llama\/llama-4-maverick", @@ -16321,28 +14781,28 @@ "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.412130771 + "score":0.4246522462 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6090479579 + "score":0.6364852561 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.4664487469 + "score":0.439726575 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.609059672 + "score":0.5806732389 }, { "model":"meta-llama\/llama-4-maverick", @@ -16356,35 +14816,35 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.3566326015 + "score":0.3451568022 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.5934355692 + "score":0.5845365036 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4571214583 + "score":0.4582156105 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6537873967 + "score":0.6491023878 }, { "model":"meta-llama\/llama-4-maverick", @@ -16393,6 +14853,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", @@ -16405,28 +14872,28 @@ "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2654289532 + "score":0.2567420946 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.5302105978 + "score":0.5061211552 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.4272058624 + "score":0.4025875747 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.6370792063 + "score":0.6083538055 }, { "model":"meta-llama\/llama-4-maverick", @@ -16435,6 +14902,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", @@ -16447,28 +14921,28 @@ "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.3666782144 + "score":0.3425500041 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.5900845247 + "score":0.5680420054 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.2983882308 + "score":0.2980763573 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.6072147424 + "score":0.5924685945 }, { "model":"meta-llama\/llama-4-maverick", @@ -16477,6 +14951,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", @@ -16489,28 +14970,28 @@ "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.3680021687 + "score":0.3462186566 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.6106474057 + "score":0.6055817314 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.3502048845 + "score":0.3822149946 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.5783869442 + "score":0.5783618359 }, { "model":"meta-llama\/llama-4-maverick", @@ -16524,28 +15005,28 @@ "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.2305225364 + "score":0.253549974 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.4984986116 + "score":0.5235064606 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.2361005323 + "score":0.2484556869 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.4656123232 + "score":0.429442787 }, { "model":"meta-llama\/llama-4-maverick", @@ -16596,40 +15077,47 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.2782569998 + "score":0.2703289724 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.5594321689 + "score":0.5473102513 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2939092084 + "score":0.317769874 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.3280788158 + "score":0.3676906362 }, { "model":"microsoft\/phi-4", @@ -16680,6 +15168,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"microsoft\/phi-4", "bcp_47":"bn", @@ -16722,6 +15217,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"microsoft\/phi-4", "bcp_47":"de", @@ -16764,6 +15266,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"microsoft\/phi-4", "bcp_47":"en", @@ -16806,6 +15315,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"microsoft\/phi-4", "bcp_47":"es", @@ -16848,6 +15364,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"microsoft\/phi-4", "bcp_47":"fr", @@ -16916,14 +15439,14 @@ "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.2425073841 + "score":0.2369610218 }, { "model":"microsoft\/phi-4", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.3817959275 + "score":0.3765795877 }, { "model":"microsoft\/phi-4", @@ -16974,6 +15497,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"microsoft\/phi-4", "bcp_47":"ja", @@ -17163,6 +15693,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"microsoft\/phi-4", "bcp_47":"ru", @@ -17205,6 +15742,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"microsoft\/phi-4", "bcp_47":"sw", @@ -17247,6 +15791,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4", "bcp_47":"te", @@ -17259,14 +15810,14 @@ "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.2322864827 + "score":0.2374723306 }, { "model":"microsoft\/phi-4", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.4651967541 + "score":0.4675485501 }, { "model":"microsoft\/phi-4", @@ -17366,6 +15917,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"microsoft\/phi-4", "bcp_47":"zh", @@ -17450,6 +16008,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bn", @@ -17492,6 +16057,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"de", @@ -17534,6 +16106,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"en", @@ -17576,6 +16155,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"es", @@ -17618,6 +16204,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fr", @@ -17744,6 +16337,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ja", @@ -17933,6 +16533,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ru", @@ -17975,6 +16582,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sw", @@ -18017,6 +16631,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"te", @@ -18136,6 +16757,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zh", @@ -18220,6 +16848,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bn", @@ -18262,6 +16897,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"de", @@ -18304,6 +16946,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"en", @@ -18316,14 +16965,14 @@ "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.2963681355 + "score":0.3166864072 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.5277177226 + "score":0.5483508218 }, { "model":"mistralai\/mistral-nemo", @@ -18346,6 +16995,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"es", @@ -18388,6 +17044,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fr", @@ -18442,14 +17105,14 @@ "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.1878052787 + "score":0.1271878224 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.4458139958 + "score":0.3698932868 }, { "model":"mistralai\/mistral-nemo", @@ -18514,6 +17177,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ja", @@ -18631,14 +17301,14 @@ "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.3360340722 + "score":0.3158857772 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.5243077445 + "score":0.5109022919 }, { "model":"mistralai\/mistral-nemo", @@ -18703,6 +17373,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ru", @@ -18745,6 +17422,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sw", @@ -18787,6 +17471,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"te", @@ -18813,14 +17504,14 @@ "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.1060450795 + "score":0.2060450795 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.2738653779 + "score":0.3717773766 }, { "model":"mistralai\/mistral-nemo", @@ -18906,6 +17597,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zh", @@ -18990,6 +17688,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"bn", @@ -19032,6 +17737,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"de", @@ -19074,6 +17786,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"en", @@ -19086,14 +17805,14 @@ "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.4552458759 + "score":0.4669071745 }, { "model":"mistralai\/mistral-saba", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.6289172201 + "score":0.6351301458 }, { "model":"mistralai\/mistral-saba", @@ -19116,6 +17835,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"es", @@ -19158,6 +17884,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"fr", @@ -19284,6 +18017,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ja", @@ -19473,6 +18213,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ru", @@ -19511,9 +18258,16 @@ { "model":"mistralai\/mistral-saba", "bcp_47":"sw", - "task":"classification", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"sw", + "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { "model":"mistralai\/mistral-saba", @@ -19557,6 +18311,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"te", @@ -19676,6 +18437,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"zh", @@ -19760,6 +18528,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bn", @@ -19802,6 +18577,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"de", @@ -19844,6 +18626,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"en", @@ -19886,6 +18675,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"es", @@ -19928,6 +18724,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fr", @@ -20054,6 +18857,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ja", @@ -20171,14 +18981,14 @@ "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.3668684839 + "score":0.3647734864 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.5826633325 + "score":0.5784168493 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -20243,6 +19053,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ru", @@ -20285,6 +19102,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sw", @@ -20327,6 +19151,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"te", @@ -20446,6 +19277,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zh", @@ -20530,6 +19368,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bn", @@ -20572,6 +19417,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"de", @@ -20614,6 +19466,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"en", @@ -20656,6 +19515,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"es", @@ -20698,6 +19564,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fr", @@ -20824,6 +19697,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ja", @@ -21013,6 +19893,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ru", @@ -21055,6 +19942,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sw", @@ -21097,6 +19991,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"te", @@ -21216,6 +20117,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zh", @@ -21300,6 +20208,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bn", @@ -21342,6 +20257,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"de", @@ -21384,6 +20306,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"en", @@ -21426,6 +20355,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"es", @@ -21468,6 +20404,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fr", @@ -21536,14 +20479,14 @@ "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.3803524524 + "score":0.3858833658 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.5965079449 + "score":0.5991711103 }, { "model":"openai\/gpt-4.1-nano", @@ -21594,6 +20537,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ja", @@ -21783,6 +20733,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ru", @@ -21825,6 +20782,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sw", @@ -21867,6 +20831,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"te", @@ -21986,6 +20957,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zh", @@ -22070,6 +21048,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bn", @@ -22112,6 +21097,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"de", @@ -22154,6 +21146,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"en", @@ -22196,6 +21195,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"es", @@ -22238,6 +21244,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fr", @@ -22364,6 +21377,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ja", @@ -22553,6 +21573,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ru", @@ -22595,6 +21622,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sw", @@ -22637,6 +21671,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"te", @@ -22756,6 +21797,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zh",