diff --git "a/results.json" "b/results.json" --- "a/results.json" +++ "b/results.json" @@ -377,6 +377,41 @@ "metric":"chrf", "score":0.4822747548 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.2192585886 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.4527368673 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.1076519805 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.4493011434 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mr", @@ -650,6 +685,48 @@ "metric":"chrf", "score":0.3475324071 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.122224664 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.3790772862 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.1919159066 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.4399907204 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zh", @@ -2456,6 +2533,41 @@ "metric":"chrf", "score":0.5197322727 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.2594796679 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.4780086047 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.3009966401 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.5761529867 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mr", @@ -2729,6 +2841,48 @@ "metric":"chrf", "score":0.4761547661 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.3027505857 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.5506378818 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.3708866541 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.5846851624 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zh", @@ -3149,6 +3303,41 @@ "metric":"chrf", "score":0.4709407515 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.2789250445 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.52614288 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.2580648249 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.574708573 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mr", @@ -3422,6 +3611,48 @@ "metric":"chrf", "score":0.4921734247 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.2150236607 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.4970978512 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.3603191861 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.5938509481 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zh", @@ -3842,6 +4073,41 @@ "metric":"chrf", "score":0.3589526769 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.1994883012 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.3942042616 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.2570478693 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.5215463463 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mr", @@ -4115,6 +4381,48 @@ "metric":"chrf", "score":0.4241396601 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.1705385375 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.3747437419 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.257036702 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.4598854693 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zh", @@ -4537,63 +4845,98 @@ }, { "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mr", + "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mr", + "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.3338185652 + "score":0.3700621486 }, { "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mr", + "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.5787491818 + "score":0.5837249923 }, { "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mr", + "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.2806196555 + "score":0.3088767184 }, { "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mr", + "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.4516145469 + "score":0.5884562937 }, { "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pa", + "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pa", + "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.4372599799 + "score":0.3338185652 }, { "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pa", + "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.658993109 + "score":0.5787491818 }, { "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pa", + "bcp_47":"mr", + "task":"translation_to", + "metric":"bleu", + "score":0.2806196555 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"mr", + "task":"translation_to", + "metric":"chrf", + "score":0.4516145469 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"pa", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"pa", + "task":"translation_from", + "metric":"bleu", + "score":0.4372599799 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"pa", + "task":"translation_from", + "metric":"chrf", + "score":0.658993109 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.4174871385 @@ -4808,6 +5151,48 @@ "metric":"chrf", "score":0.4826415387 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.3436662566 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.59026429 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.447007323 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.6624838094 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zh", @@ -5228,6 +5613,41 @@ "metric":"chrf", "score":0.3787664659 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.3561823456 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.5619277442 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.2609272138 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.5344454302 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mr", @@ -5501,6 +5921,48 @@ "metric":"chrf", "score":0.493353195 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.3170290484 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.5491010642 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.4032471641 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.6394566771 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zh", @@ -5921,6 +6383,41 @@ "metric":"chrf", "score":0.5319028037 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.3793672083 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.6054760312 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.3161702144 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.5956895972 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"mr", @@ -6194,6 +6691,48 @@ "metric":"chrf", "score":0.4351160146 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.4227968054 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.6326522416 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.4693432911 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.6659480306 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"zh", @@ -6614,6 +7153,41 @@ "metric":"chrf", "score":0.4832664839 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.3540174328 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.5513315973 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.329885986 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.5805485408 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"mr", @@ -6887,6 +7461,48 @@ "metric":"chrf", "score":0.4912965943 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.3471784526 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.5719753053 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.4601231177 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.6644848789 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"zh", @@ -7309,52 +7925,87 @@ }, { "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", + "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", + "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.2370074805 + "score":0.3177915441 }, { "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", + "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.4804215458 + "score":0.5387853038 }, { "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", + "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.2399769139 + "score":0.2549228547 }, { "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", + "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.4726429935 + "score":0.5322440265 }, { "model":"google\/gemini-flash-1.5", - "bcp_47":"pa", + "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", - "bcp_47":"pa", + "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.4180718844 + "score":0.2370074805 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"mr", + "task":"translation_from", + "metric":"chrf", + "score":0.4804215458 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"mr", + "task":"translation_to", + "metric":"bleu", + "score":0.2399769139 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"mr", + "task":"translation_to", + "metric":"chrf", + "score":0.4726429935 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"pa", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"pa", + "task":"translation_from", + "metric":"bleu", + "score":0.4180718844 }, { "model":"google\/gemini-flash-1.5", @@ -7580,6 +8231,48 @@ "metric":"chrf", "score":0.4226865444 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.2745000434 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.5206422805 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.4261790941 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.6358462464 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zh", @@ -8000,6 +8693,41 @@ "metric":"chrf", "score":0.409095006 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.2326358655 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.4815897231 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.2187928356 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.4896578943 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", @@ -8273,6 +9001,48 @@ "metric":"chrf", "score":0.4066956434 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.2537752957 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.5073147534 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.3583753747 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.6253917282 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", @@ -9386,6 +10156,41 @@ "metric":"chrf", "score":0.4046420215 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.3019627022 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.5133980923 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.2316517545 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.5189963647 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mr", @@ -9659,6 +10464,48 @@ "metric":"chrf", "score":0.429749938 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.2279880384 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.4835933272 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.3478085621 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.5968604742 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zh", @@ -10079,6 +10926,41 @@ "metric":"chrf", "score":0.2706371796 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.0550016522 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.2468917982 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.017690474 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.2536001746 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", @@ -10352,6 +11234,48 @@ "metric":"chrf", "score":0.2661903898 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.1389065496 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.3708687542 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.1661316612 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.354399593 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", @@ -10774,49 +11698,84 @@ }, { "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mr", + "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mr", + "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.2152417217 + "score":0.1840709267 }, { "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mr", + "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.4578207034 + "score":0.4042090141 }, { "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mr", + "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.1654073391 + "score":0.1902389614 }, { "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mr", + "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.3941079443 + "score":0.4796942089 }, { "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pa", + "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pa", + "bcp_47":"mr", + "task":"translation_from", + "metric":"bleu", + "score":0.2152417217 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"mr", + "task":"translation_from", + "metric":"chrf", + "score":0.4578207034 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"mr", + "task":"translation_to", + "metric":"bleu", + "score":0.1654073391 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"mr", + "task":"translation_to", + "metric":"chrf", + "score":0.3941079443 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"pa", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.1891048622 @@ -11045,6 +12004,48 @@ "metric":"chrf", "score":0.3855814375 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.2003733128 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.4613442635 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.3326058501 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.5496097026 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", @@ -12158,6 +13159,41 @@ "metric":"chrf", "score":0.4036795798 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.2593989014 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.4825732152 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.2177203514 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.452576603 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mr", @@ -12431,6 +13467,48 @@ "metric":"chrf", "score":0.4452300688 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.2872551102 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.5360250569 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.3575088107 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.5879974234 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zh", @@ -12851,6 +13929,41 @@ "metric":"chrf", "score":0.265082494 }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.0726508623 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.3051851212 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.1282743245 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.3321469572 + }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"mr", @@ -13124,6 +14237,48 @@ "metric":"chrf", "score":0.3521086255 }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.1850447544 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.4328029287 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.2262846976 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.3523271738 + }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"zh", @@ -13544,6 +14699,41 @@ "metric":"chrf", "score":0.0228028425 }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.0192847454 + }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.1432000045 + }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.0179600462 + }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.1308378956 + }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"mr", @@ -13817,6 +15007,48 @@ "metric":"chrf", "score":0.1423687352 }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.0697220351 + }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.1874677848 + }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.135086862 + }, + { + "model":"meta-llama\/llama-3.2-1b-instruct", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.2446182636 + }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"zh", @@ -14239,52 +15471,87 @@ }, { "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mr", + "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mr", + "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.2718900108 + "score":0.2779520489 }, { "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mr", + "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.5102541071 + "score":0.5022141687 }, { "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mr", + "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.1858815817 + "score":0.2476437073 }, { "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mr", + "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.4329686416 + "score":0.4805413308 }, { "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pa", + "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pa", + "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.4659929088 + "score":0.2718900108 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"mr", + "task":"translation_from", + "metric":"chrf", + "score":0.5102541071 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"mr", + "task":"translation_to", + "metric":"bleu", + "score":0.1858815817 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"mr", + "task":"translation_to", + "metric":"chrf", + "score":0.4329686416 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"pa", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"pa", + "task":"translation_from", + "metric":"bleu", + "score":0.4659929088 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -14510,6 +15777,48 @@ "metric":"chrf", "score":0.4396251456 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.2721498467 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.5200799335 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.3613736416 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.5866656133 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", @@ -14930,6 +16239,41 @@ "metric":"chrf", "score":0.438719005 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.3146151088 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.5434123174 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.3068652176 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.5696595268 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", @@ -15203,6 +16547,48 @@ "metric":"chrf", "score":0.4656123232 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.2670924013 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.5198891912 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.3597766713 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.6081806669 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", @@ -15244,5 +16630,6165 @@ "task":"translation_to", "metric":"chrf", "score":0.3280788158 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ar", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ar", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ar", + "task":"translation_from", + "metric":"bleu", + "score":0.1548779531 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ar", + "task":"translation_from", + "metric":"chrf", + "score":0.3887963415 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ar", + "task":"translation_to", + "metric":"bleu", + "score":0.268706305 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ar", + "task":"translation_to", + "metric":"chrf", + "score":0.4959259833 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"bn", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"bn", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"bn", + "task":"translation_from", + "metric":"bleu", + "score":0.095240952 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"bn", + "task":"translation_from", + "metric":"chrf", + "score":0.3840548344 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"bn", + "task":"translation_to", + "metric":"bleu", + "score":0.1876367188 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"bn", + "task":"translation_to", + "metric":"chrf", + "score":0.4310988737 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"de", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"de", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"de", + "task":"translation_from", + "metric":"bleu", + "score":0.2546126219 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"de", + "task":"translation_from", + "metric":"chrf", + "score":0.4840060449 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"de", + "task":"translation_to", + "metric":"bleu", + "score":0.3171866034 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"de", + "task":"translation_to", + "metric":"chrf", + "score":0.5752285995 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"en", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"en", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"en", + "task":"translation_from", + "metric":"bleu", + "score":0.3642023499 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"en", + "task":"translation_from", + "metric":"chrf", + "score":0.5697992815 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"en", + "task":"translation_to", + "metric":"bleu", + "score":0.4959810553 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"en", + "task":"translation_to", + "metric":"chrf", + "score":0.7232313255 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"es", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"es", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"es", + "task":"translation_from", + "metric":"bleu", + "score":0.1732534835 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"es", + "task":"translation_from", + "metric":"chrf", + "score":0.4434970776 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"es", + "task":"translation_to", + "metric":"bleu", + "score":0.3605235101 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"es", + "task":"translation_to", + "metric":"chrf", + "score":0.5849733787 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"fr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"fr", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"fr", + "task":"translation_from", + "metric":"bleu", + "score":0.1763652726 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"fr", + "task":"translation_from", + "metric":"chrf", + "score":0.4428784232 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"fr", + "task":"translation_to", + "metric":"bleu", + "score":0.3772793055 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"fr", + "task":"translation_to", + "metric":"chrf", + "score":0.5820724576 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"hi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"hi", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"hi", + "task":"translation_from", + "metric":"bleu", + "score":0.2788689746 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"hi", + "task":"translation_from", + "metric":"chrf", + "score":0.5417455941 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"hi", + "task":"translation_to", + "metric":"bleu", + "score":0.2425073841 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"hi", + "task":"translation_to", + "metric":"chrf", + "score":0.3817959275 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"id", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"id", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"id", + "task":"translation_from", + "metric":"bleu", + "score":0.0855471394 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"id", + "task":"translation_from", + "metric":"chrf", + "score":0.3720740561 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"id", + "task":"translation_to", + "metric":"bleu", + "score":0.284365864 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"id", + "task":"translation_to", + "metric":"chrf", + "score":0.589202199 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ja", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ja", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ja", + "task":"translation_from", + "metric":"bleu", + "score":0.1462066826 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ja", + "task":"translation_from", + "metric":"chrf", + "score":0.4299215293 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ja", + "task":"translation_to", + "metric":"bleu", + "score":0.2371087689 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ja", + "task":"translation_to", + "metric":"chrf", + "score":0.4064089202 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.1102793601 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.3718502317 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.0999035402 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.3664761129 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"mr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"mr", + "task":"translation_from", + "metric":"bleu", + "score":0.1012625471 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"mr", + "task":"translation_from", + "metric":"chrf", + "score":0.3580813711 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"mr", + "task":"translation_to", + "metric":"bleu", + "score":0.0891010327 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"mr", + "task":"translation_to", + "metric":"chrf", + "score":0.3158325956 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"pa", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"pa", + "task":"translation_from", + "metric":"bleu", + "score":0.3550414512 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"pa", + "task":"translation_from", + "metric":"chrf", + "score":0.5626107823 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"pa", + "task":"translation_to", + "metric":"bleu", + "score":0.2784963846 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"pa", + "task":"translation_to", + "metric":"chrf", + "score":0.4121299981 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"pt", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"pt", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"pt", + "task":"translation_from", + "metric":"bleu", + "score":0.1813353123 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"pt", + "task":"translation_from", + "metric":"chrf", + "score":0.4632560004 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"pt", + "task":"translation_to", + "metric":"bleu", + "score":0.3450201321 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"pt", + "task":"translation_to", + "metric":"chrf", + "score":0.5827805827 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ru", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ru", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ru", + "task":"translation_from", + "metric":"bleu", + "score":0.164734586 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ru", + "task":"translation_from", + "metric":"chrf", + "score":0.4400610126 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ru", + "task":"translation_to", + "metric":"bleu", + "score":0.2884407046 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ru", + "task":"translation_to", + "metric":"chrf", + "score":0.5338739518 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"sw", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"sw", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"sw", + "task":"translation_from", + "metric":"bleu", + "score":0.1325294802 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"sw", + "task":"translation_from", + "metric":"chrf", + "score":0.4051925402 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"sw", + "task":"translation_to", + "metric":"bleu", + "score":0.1631216823 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"sw", + "task":"translation_to", + "metric":"chrf", + "score":0.4696161488 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"te", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"te", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"te", + "task":"translation_from", + "metric":"bleu", + "score":0.2322864827 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"te", + "task":"translation_from", + "metric":"chrf", + "score":0.4651967541 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"te", + "task":"translation_to", + "metric":"bleu", + "score":0.2528444882 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"te", + "task":"translation_to", + "metric":"chrf", + "score":0.4794045124 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ur", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ur", + "task":"translation_from", + "metric":"bleu", + "score":0.1155003818 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ur", + "task":"translation_from", + "metric":"chrf", + "score":0.3250077925 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ur", + "task":"translation_to", + "metric":"bleu", + "score":0.1195513435 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ur", + "task":"translation_to", + "metric":"chrf", + "score":0.3158904676 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.1452425625 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.3941195385 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.2680553268 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.5055559664 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"zh", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"zh", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"zh", + "task":"translation_from", + "metric":"bleu", + "score":0.1516445239 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"zh", + "task":"translation_from", + "metric":"chrf", + "score":0.4517979691 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"zh", + "task":"translation_to", + "metric":"bleu", + "score":0.2058198052 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"zh", + "task":"translation_to", + "metric":"chrf", + "score":0.2799124898 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ar", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ar", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ar", + "task":"translation_from", + "metric":"bleu", + "score":0.1428907436 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ar", + "task":"translation_from", + "metric":"chrf", + "score":0.3107041775 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ar", + "task":"translation_to", + "metric":"bleu", + "score":0.1849770017 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ar", + "task":"translation_to", + "metric":"chrf", + "score":0.2932088535 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bn", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bn", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bn", + "task":"translation_from", + "metric":"bleu", + "score":0.0355167863 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bn", + "task":"translation_from", + "metric":"chrf", + "score":0.2600874171 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bn", + "task":"translation_to", + "metric":"bleu", + "score":0.0323184525 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bn", + "task":"translation_to", + "metric":"chrf", + "score":0.1970289791 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"de", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"de", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"de", + "task":"translation_from", + "metric":"bleu", + "score":0.1237340737 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"de", + "task":"translation_from", + "metric":"chrf", + "score":0.3601104142 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"de", + "task":"translation_to", + "metric":"bleu", + "score":0.1371705946 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"de", + "task":"translation_to", + "metric":"chrf", + "score":0.4120757797 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"en", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"en", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"en", + "task":"translation_from", + "metric":"bleu", + "score":0.3548422361 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"en", + "task":"translation_from", + "metric":"chrf", + "score":0.460765953 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"en", + "task":"translation_to", + "metric":"bleu", + "score":0.4690424472 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"en", + "task":"translation_to", + "metric":"chrf", + "score":0.6788013861 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"es", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"es", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"es", + "task":"translation_from", + "metric":"bleu", + "score":0.1169662945 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"es", + "task":"translation_from", + "metric":"chrf", + "score":0.3242693179 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"es", + "task":"translation_to", + "metric":"bleu", + "score":0.2556403143 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"es", + "task":"translation_to", + "metric":"chrf", + "score":0.4583071754 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"fr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"fr", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"fr", + "task":"translation_from", + "metric":"bleu", + "score":0.0839707225 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"fr", + "task":"translation_from", + "metric":"chrf", + "score":0.3074010094 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"fr", + "task":"translation_to", + "metric":"bleu", + "score":0.2478840637 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"fr", + "task":"translation_to", + "metric":"chrf", + "score":0.4205657928 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"hi", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"hi", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"hi", + "task":"translation_from", + "metric":"bleu", + "score":0.1989310744 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"hi", + "task":"translation_from", + "metric":"chrf", + "score":0.408792844 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"hi", + "task":"translation_to", + "metric":"bleu", + "score":0.2107266229 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"hi", + "task":"translation_to", + "metric":"chrf", + "score":0.3722535388 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"id", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"id", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"id", + "task":"translation_from", + "metric":"bleu", + "score":0.1251179936 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"id", + "task":"translation_from", + "metric":"chrf", + "score":0.3078536626 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"id", + "task":"translation_to", + "metric":"bleu", + "score":0.1049757961 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"id", + "task":"translation_to", + "metric":"chrf", + "score":0.338086632 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ja", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ja", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ja", + "task":"translation_from", + "metric":"bleu", + "score":0.1170990874 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ja", + "task":"translation_from", + "metric":"chrf", + "score":0.3281623219 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ja", + "task":"translation_to", + "metric":"bleu", + "score":0.1166577127 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ja", + "task":"translation_to", + "metric":"chrf", + "score":0.2303280443 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.0192945074 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.2015068169 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.045857499 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.1778848232 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mr", + "task":"classification", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mr", + "task":"translation_from", + "metric":"bleu", + "score":0.0250471784 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mr", + "task":"translation_from", + "metric":"chrf", + "score":0.2097577846 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mr", + "task":"translation_to", + "metric":"bleu", + "score":0.034382114 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mr", + "task":"translation_to", + "metric":"chrf", + "score":0.2211758055 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pa", + "task":"classification", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pa", + "task":"translation_from", + "metric":"bleu", + "score":0.3055395757 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pa", + "task":"translation_from", + "metric":"chrf", + "score":0.4480585816 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pa", + "task":"translation_to", + "metric":"bleu", + "score":0.2165906221 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pa", + "task":"translation_to", + "metric":"chrf", + "score":0.3271537328 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pt", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pt", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pt", + "task":"translation_from", + "metric":"bleu", + "score":0.1031395116 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pt", + "task":"translation_from", + "metric":"chrf", + "score":0.3223915745 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pt", + "task":"translation_to", + "metric":"bleu", + "score":0.1194174782 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pt", + "task":"translation_to", + "metric":"chrf", + "score":0.3618255907 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ru", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ru", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ru", + "task":"translation_from", + "metric":"bleu", + "score":0.0826481083 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ru", + "task":"translation_from", + "metric":"chrf", + "score":0.2913230821 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ru", + "task":"translation_to", + "metric":"bleu", + "score":0.1808682916 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ru", + "task":"translation_to", + "metric":"chrf", + "score":0.3815777762 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sw", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sw", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sw", + "task":"translation_from", + "metric":"bleu", + "score":0.0422003709 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sw", + "task":"translation_from", + "metric":"chrf", + "score":0.253591842 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sw", + "task":"translation_to", + "metric":"bleu", + "score":0.0280729387 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sw", + "task":"translation_to", + "metric":"chrf", + "score":0.1884927612 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"te", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"te", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"te", + "task":"translation_from", + "metric":"bleu", + "score":0.1550101498 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"te", + "task":"translation_from", + "metric":"chrf", + "score":0.3623113506 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"te", + "task":"translation_to", + "metric":"bleu", + "score":0.1217984824 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"te", + "task":"translation_to", + "metric":"chrf", + "score":0.2801870917 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ur", + "task":"classification", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ur", + "task":"translation_from", + "metric":"bleu", + "score":0.096255918 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ur", + "task":"translation_from", + "metric":"chrf", + "score":0.2490196736 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ur", + "task":"translation_to", + "metric":"bleu", + "score":0.0759551519 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ur", + "task":"translation_to", + "metric":"chrf", + "score":0.2765897266 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.1578714698 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.3784433754 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.1713340477 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.3260532752 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zh", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zh", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zh", + "task":"translation_from", + "metric":"bleu", + "score":0.0821079546 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zh", + "task":"translation_from", + "metric":"chrf", + "score":0.3164863838 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zh", + "task":"translation_to", + "metric":"bleu", + "score":0.1265931852 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zh", + "task":"translation_to", + "metric":"chrf", + "score":0.1793067232 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ar", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ar", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ar", + "task":"translation_from", + "metric":"bleu", + "score":0.1314009634 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ar", + "task":"translation_from", + "metric":"chrf", + "score":0.3827163755 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ar", + "task":"translation_to", + "metric":"bleu", + "score":0.0953897712 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ar", + "task":"translation_to", + "metric":"chrf", + "score":0.2894343613 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"bn", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"bn", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"bn", + "task":"translation_from", + "metric":"bleu", + "score":0.1016298945 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"bn", + "task":"translation_from", + "metric":"chrf", + "score":0.2980803254 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"bn", + "task":"translation_to", + "metric":"bleu", + "score":0.0978160022 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"bn", + "task":"translation_to", + "metric":"chrf", + "score":0.2190252958 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"de", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"de", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"de", + "task":"translation_from", + "metric":"bleu", + "score":0.1786073211 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"de", + "task":"translation_from", + "metric":"chrf", + "score":0.418923403 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"de", + "task":"translation_to", + "metric":"bleu", + "score":0.1529904036 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"de", + "task":"translation_to", + "metric":"chrf", + "score":0.4257110482 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"en", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"en", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"en", + "task":"translation_from", + "metric":"bleu", + "score":0.2963681355 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"en", + "task":"translation_from", + "metric":"chrf", + "score":0.5277177226 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"en", + "task":"translation_to", + "metric":"bleu", + "score":0.4404172544 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"en", + "task":"translation_to", + "metric":"chrf", + "score":0.7231001513 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"es", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"es", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"es", + "task":"translation_from", + "metric":"bleu", + "score":0.1298121807 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"es", + "task":"translation_from", + "metric":"chrf", + "score":0.3403579227 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"es", + "task":"translation_to", + "metric":"bleu", + "score":0.3743863952 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"es", + "task":"translation_to", + "metric":"chrf", + "score":0.5971283997 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"fr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"fr", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"fr", + "task":"translation_from", + "metric":"bleu", + "score":0.1078563354 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"fr", + "task":"translation_from", + "metric":"chrf", + "score":0.3207926618 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"fr", + "task":"translation_to", + "metric":"bleu", + "score":0.3659011486 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"fr", + "task":"translation_to", + "metric":"chrf", + "score":0.580998869 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"hi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"hi", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"hi", + "task":"translation_from", + "metric":"bleu", + "score":0.1878052787 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"hi", + "task":"translation_from", + "metric":"chrf", + "score":0.4458139958 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"hi", + "task":"translation_to", + "metric":"bleu", + "score":0.1324625901 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"hi", + "task":"translation_to", + "metric":"chrf", + "score":0.3233634009 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"id", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"id", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"id", + "task":"translation_from", + "metric":"bleu", + "score":0.1489627056 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"id", + "task":"translation_from", + "metric":"chrf", + "score":0.4172638299 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"id", + "task":"translation_to", + "metric":"bleu", + "score":0.2008824981 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"id", + "task":"translation_to", + "metric":"chrf", + "score":0.5185852751 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ja", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ja", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ja", + "task":"translation_from", + "metric":"bleu", + "score":0.1169165949 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ja", + "task":"translation_from", + "metric":"chrf", + "score":0.3638899173 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ja", + "task":"translation_to", + "metric":"bleu", + "score":0.1330552123 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ja", + "task":"translation_to", + "metric":"chrf", + "score":0.2685952079 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.0823011221 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.2825939861 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.0327082346 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.2319054893 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"mr", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"mr", + "task":"translation_from", + "metric":"bleu", + "score":0.1016737952 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"mr", + "task":"translation_from", + "metric":"chrf", + "score":0.3529445259 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"mr", + "task":"translation_to", + "metric":"bleu", + "score":0.0980137705 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"mr", + "task":"translation_to", + "metric":"chrf", + "score":0.3076980329 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"pa", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"pa", + "task":"translation_from", + "metric":"bleu", + "score":0.3360340722 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"pa", + "task":"translation_from", + "metric":"chrf", + "score":0.5243077445 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"pa", + "task":"translation_to", + "metric":"bleu", + "score":0.2903521386 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"pa", + "task":"translation_to", + "metric":"chrf", + "score":0.4758823803 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"pt", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"pt", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"pt", + "task":"translation_from", + "metric":"bleu", + "score":0.1398701241 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"pt", + "task":"translation_from", + "metric":"chrf", + "score":0.3034565852 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"pt", + "task":"translation_to", + "metric":"bleu", + "score":0.3460979115 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"pt", + "task":"translation_to", + "metric":"chrf", + "score":0.5835851988 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ru", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ru", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ru", + "task":"translation_from", + "metric":"bleu", + "score":0.0947682488 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ru", + "task":"translation_from", + "metric":"chrf", + "score":0.3259777135 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ru", + "task":"translation_to", + "metric":"bleu", + "score":0.1796274314 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ru", + "task":"translation_to", + "metric":"chrf", + "score":0.4360781177 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"sw", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"sw", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"sw", + "task":"translation_from", + "metric":"bleu", + "score":0.0669163701 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"sw", + "task":"translation_from", + "metric":"chrf", + "score":0.2784916366 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"sw", + "task":"translation_to", + "metric":"bleu", + "score":0.0633186191 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"sw", + "task":"translation_to", + "metric":"chrf", + "score":0.3074668268 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"te", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"te", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"te", + "task":"translation_from", + "metric":"bleu", + "score":0.1471870965 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"te", + "task":"translation_from", + "metric":"chrf", + "score":0.3392441061 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"te", + "task":"translation_to", + "metric":"bleu", + "score":0.1060450795 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"te", + "task":"translation_to", + "metric":"chrf", + "score":0.2738653779 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ur", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ur", + "task":"translation_from", + "metric":"bleu", + "score":0.0522713846 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ur", + "task":"translation_from", + "metric":"chrf", + "score":0.3192866676 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ur", + "task":"translation_to", + "metric":"bleu", + "score":0.0868686952 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ur", + "task":"translation_to", + "metric":"chrf", + "score":0.2859772299 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.0649160569 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.2830042558 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.0350138164 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.2206802597 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"zh", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"zh", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"zh", + "task":"translation_from", + "metric":"bleu", + "score":0.0992800287 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"zh", + "task":"translation_from", + "metric":"chrf", + "score":0.324429867 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"zh", + "task":"translation_to", + "metric":"bleu", + "score":0.09185491 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"zh", + "task":"translation_to", + "metric":"chrf", + "score":0.1769207611 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ar", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ar", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ar", + "task":"translation_from", + "metric":"bleu", + "score":0.278999196 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ar", + "task":"translation_from", + "metric":"chrf", + "score":0.5072892325 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ar", + "task":"translation_to", + "metric":"bleu", + "score":0.335915232 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ar", + "task":"translation_to", + "metric":"chrf", + "score":0.5453940527 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"bn", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"bn", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"bn", + "task":"translation_from", + "metric":"bleu", + "score":0.2557484343 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"bn", + "task":"translation_from", + "metric":"chrf", + "score":0.4816174974 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"bn", + "task":"translation_to", + "metric":"bleu", + "score":0.336565743 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"bn", + "task":"translation_to", + "metric":"chrf", + "score":0.500099888 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"de", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"de", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"de", + "task":"translation_from", + "metric":"bleu", + "score":0.33652498 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"de", + "task":"translation_from", + "metric":"chrf", + "score":0.5394051209 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"de", + "task":"translation_to", + "metric":"bleu", + "score":0.4124704223 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"de", + "task":"translation_to", + "metric":"chrf", + "score":0.6500309258 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"en", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"en", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"en", + "task":"translation_from", + "metric":"bleu", + "score":0.4552458759 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"en", + "task":"translation_from", + "metric":"chrf", + "score":0.6289172201 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"en", + "task":"translation_to", + "metric":"bleu", + "score":0.5813419207 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"en", + "task":"translation_to", + "metric":"chrf", + "score":0.8065247071 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"es", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"es", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"es", + "task":"translation_from", + "metric":"bleu", + "score":0.293714449 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"es", + "task":"translation_from", + "metric":"chrf", + "score":0.4892518335 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"es", + "task":"translation_to", + "metric":"bleu", + "score":0.3400529578 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"es", + "task":"translation_to", + "metric":"chrf", + "score":0.6054518089 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"fr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"fr", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"fr", + "task":"translation_from", + "metric":"bleu", + "score":0.2548863763 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"fr", + "task":"translation_from", + "metric":"chrf", + "score":0.5181895957 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"fr", + "task":"translation_to", + "metric":"bleu", + "score":0.4309072933 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"fr", + "task":"translation_to", + "metric":"chrf", + "score":0.6122951839 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"hi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"hi", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"hi", + "task":"translation_from", + "metric":"bleu", + "score":0.3708164771 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"hi", + "task":"translation_from", + "metric":"chrf", + "score":0.5780452995 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"hi", + "task":"translation_to", + "metric":"bleu", + "score":0.3889665973 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"hi", + "task":"translation_to", + "metric":"chrf", + "score":0.5940361548 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"id", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"id", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"id", + "task":"translation_from", + "metric":"bleu", + "score":0.2714445111 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"id", + "task":"translation_from", + "metric":"chrf", + "score":0.5033343062 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"id", + "task":"translation_to", + "metric":"bleu", + "score":0.3367134056 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"id", + "task":"translation_to", + "metric":"chrf", + "score":0.6204206544 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ja", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ja", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ja", + "task":"translation_from", + "metric":"bleu", + "score":0.2208745982 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ja", + "task":"translation_from", + "metric":"chrf", + "score":0.4944838309 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ja", + "task":"translation_to", + "metric":"bleu", + "score":0.1755723698 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ja", + "task":"translation_to", + "metric":"chrf", + "score":0.3733502483 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.1125731148 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.2778916971 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.0861201622 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.3310005151 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"mr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"mr", + "task":"translation_from", + "metric":"bleu", + "score":0.3097940645 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"mr", + "task":"translation_from", + "metric":"chrf", + "score":0.5304242832 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"mr", + "task":"translation_to", + "metric":"bleu", + "score":0.2168719994 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"mr", + "task":"translation_to", + "metric":"chrf", + "score":0.4555868419 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"pa", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"pa", + "task":"translation_from", + "metric":"bleu", + "score":0.4077844252 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"pa", + "task":"translation_from", + "metric":"chrf", + "score":0.6076754833 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"pa", + "task":"translation_to", + "metric":"bleu", + "score":0.4459003493 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"pa", + "task":"translation_to", + "metric":"chrf", + "score":0.5840266721 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"pt", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"pt", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"pt", + "task":"translation_from", + "metric":"bleu", + "score":0.264907032 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"pt", + "task":"translation_from", + "metric":"chrf", + "score":0.4939362461 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"pt", + "task":"translation_to", + "metric":"bleu", + "score":0.4563355662 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"pt", + "task":"translation_to", + "metric":"chrf", + "score":0.6843169799 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ru", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ru", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ru", + "task":"translation_from", + "metric":"bleu", + "score":0.1955652432 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ru", + "task":"translation_from", + "metric":"chrf", + "score":0.4654058492 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ru", + "task":"translation_to", + "metric":"bleu", + "score":0.2591393679 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ru", + "task":"translation_to", + "metric":"chrf", + "score":0.4936043335 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"sw", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"sw", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"sw", + "task":"translation_from", + "metric":"bleu", + "score":0.0845702794 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"sw", + "task":"translation_from", + "metric":"chrf", + "score":0.2799479817 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"sw", + "task":"translation_to", + "metric":"bleu", + "score":0.1327606257 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"sw", + "task":"translation_to", + "metric":"chrf", + "score":0.4346855791 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"te", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"te", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"te", + "task":"translation_from", + "metric":"bleu", + "score":0.3805998732 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"te", + "task":"translation_from", + "metric":"chrf", + "score":0.5566308844 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"te", + "task":"translation_to", + "metric":"bleu", + "score":0.3811390337 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"te", + "task":"translation_to", + "metric":"chrf", + "score":0.5895281984 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ur", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ur", + "task":"translation_from", + "metric":"bleu", + "score":0.2469989894 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ur", + "task":"translation_from", + "metric":"chrf", + "score":0.4943114536 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ur", + "task":"translation_to", + "metric":"bleu", + "score":0.2844546137 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ur", + "task":"translation_to", + "metric":"chrf", + "score":0.477210689 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.2373174322 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.4521152897 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.2962197342 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.544285644 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"zh", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"zh", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"zh", + "task":"translation_from", + "metric":"bleu", + "score":0.2391219094 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"zh", + "task":"translation_from", + "metric":"chrf", + "score":0.4993680631 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"zh", + "task":"translation_to", + "metric":"bleu", + "score":0.2546682455 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"zh", + "task":"translation_to", + "metric":"chrf", + "score":0.3131256963 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ar", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ar", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ar", + "task":"translation_from", + "metric":"bleu", + "score":0.1878091774 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ar", + "task":"translation_from", + "metric":"chrf", + "score":0.4280751788 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ar", + "task":"translation_to", + "metric":"bleu", + "score":0.2100749947 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ar", + "task":"translation_to", + "metric":"chrf", + "score":0.3864616183 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"bn", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"bn", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"bn", + "task":"translation_from", + "metric":"bleu", + "score":0.1554049163 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"bn", + "task":"translation_from", + "metric":"chrf", + "score":0.3557989532 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"bn", + "task":"translation_to", + "metric":"bleu", + "score":0.2229681692 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"bn", + "task":"translation_to", + "metric":"chrf", + "score":0.3722031872 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"de", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"de", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"de", + "task":"translation_from", + "metric":"bleu", + "score":0.2836619572 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"de", + "task":"translation_from", + "metric":"chrf", + "score":0.5189927538 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"de", + "task":"translation_to", + "metric":"bleu", + "score":0.3481484827 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"de", + "task":"translation_to", + "metric":"chrf", + "score":0.5703115876 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"en", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"en", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"en", + "task":"translation_from", + "metric":"bleu", + "score":0.4358717425 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"en", + "task":"translation_from", + "metric":"chrf", + "score":0.6055153523 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"en", + "task":"translation_to", + "metric":"bleu", + "score":0.6254747881 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"en", + "task":"translation_to", + "metric":"chrf", + "score":0.7898596498 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"es", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"es", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"es", + "task":"translation_from", + "metric":"bleu", + "score":0.2211802733 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"es", + "task":"translation_from", + "metric":"chrf", + "score":0.4673850088 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"es", + "task":"translation_to", + "metric":"bleu", + "score":0.3141247128 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"es", + "task":"translation_to", + "metric":"chrf", + "score":0.5664089061 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"fr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"fr", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"fr", + "task":"translation_from", + "metric":"bleu", + "score":0.2637885864 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"fr", + "task":"translation_from", + "metric":"chrf", + "score":0.50661739 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"fr", + "task":"translation_to", + "metric":"bleu", + "score":0.3898419239 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"fr", + "task":"translation_to", + "metric":"chrf", + "score":0.5983170279 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"hi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"hi", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"hi", + "task":"translation_from", + "metric":"bleu", + "score":0.2931006661 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"hi", + "task":"translation_from", + "metric":"chrf", + "score":0.5117710763 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"hi", + "task":"translation_to", + "metric":"bleu", + "score":0.3454885653 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"hi", + "task":"translation_to", + "metric":"chrf", + "score":0.5632832845 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"id", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"id", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"id", + "task":"translation_from", + "metric":"bleu", + "score":0.1846129963 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"id", + "task":"translation_from", + "metric":"chrf", + "score":0.4218789485 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"id", + "task":"translation_to", + "metric":"bleu", + "score":0.2363785743 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"id", + "task":"translation_to", + "metric":"chrf", + "score":0.5715726858 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ja", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ja", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ja", + "task":"translation_from", + "metric":"bleu", + "score":0.1660483895 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ja", + "task":"translation_from", + "metric":"chrf", + "score":0.4317128373 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ja", + "task":"translation_to", + "metric":"bleu", + "score":0.2448276505 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ja", + "task":"translation_to", + "metric":"chrf", + "score":0.4102738917 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.1159703103 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.3116167676 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.0803083214 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.3001867634 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"mr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"mr", + "task":"translation_from", + "metric":"bleu", + "score":0.2216993022 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"mr", + "task":"translation_from", + "metric":"chrf", + "score":0.4221894818 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"mr", + "task":"translation_to", + "metric":"bleu", + "score":0.1386303624 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"mr", + "task":"translation_to", + "metric":"chrf", + "score":0.3369477219 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"pa", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"pa", + "task":"translation_from", + "metric":"bleu", + "score":0.3668684839 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"pa", + "task":"translation_from", + "metric":"chrf", + "score":0.5826633325 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"pa", + "task":"translation_to", + "metric":"bleu", + "score":0.3897396366 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"pa", + "task":"translation_to", + "metric":"chrf", + "score":0.5030239884 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"pt", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"pt", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"pt", + "task":"translation_from", + "metric":"bleu", + "score":0.2070601418 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"pt", + "task":"translation_from", + "metric":"chrf", + "score":0.4601106145 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"pt", + "task":"translation_to", + "metric":"bleu", + "score":0.412349088 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"pt", + "task":"translation_to", + "metric":"chrf", + "score":0.6102742767 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ru", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ru", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ru", + "task":"translation_from", + "metric":"bleu", + "score":0.1710045162 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ru", + "task":"translation_from", + "metric":"chrf", + "score":0.433047449 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ru", + "task":"translation_to", + "metric":"bleu", + "score":0.2961144006 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ru", + "task":"translation_to", + "metric":"chrf", + "score":0.5257430939 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"sw", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"sw", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"sw", + "task":"translation_from", + "metric":"bleu", + "score":0.1106429776 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"sw", + "task":"translation_from", + "metric":"chrf", + "score":0.3246322884 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"sw", + "task":"translation_to", + "metric":"bleu", + "score":0.1586725311 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"sw", + "task":"translation_to", + "metric":"chrf", + "score":0.4486820539 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"te", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"te", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"te", + "task":"translation_from", + "metric":"bleu", + "score":0.2631133201 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"te", + "task":"translation_from", + "metric":"chrf", + "score":0.504043761 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"te", + "task":"translation_to", + "metric":"bleu", + "score":0.2496114121 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"te", + "task":"translation_to", + "metric":"chrf", + "score":0.4707696336 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ur", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ur", + "task":"translation_from", + "metric":"bleu", + "score":0.1854021136 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ur", + "task":"translation_from", + "metric":"chrf", + "score":0.4258067424 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ur", + "task":"translation_to", + "metric":"bleu", + "score":0.1388133394 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ur", + "task":"translation_to", + "metric":"chrf", + "score":0.3685419874 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.2204611632 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.4366931331 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.2957932526 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.5127606293 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"zh", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"zh", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"zh", + "task":"translation_from", + "metric":"bleu", + "score":0.1386896901 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"zh", + "task":"translation_from", + "metric":"chrf", + "score":0.413832278 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"zh", + "task":"translation_to", + "metric":"bleu", + "score":0.2503925306 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"zh", + "task":"translation_to", + "metric":"chrf", + "score":0.3045084897 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ar", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ar", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ar", + "task":"translation_from", + "metric":"bleu", + "score":0.2566573338 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ar", + "task":"translation_from", + "metric":"chrf", + "score":0.5183862763 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ar", + "task":"translation_to", + "metric":"bleu", + "score":0.3901123396 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ar", + "task":"translation_to", + "metric":"chrf", + "score":0.5771753105 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"bn", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"bn", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"bn", + "task":"translation_from", + "metric":"bleu", + "score":0.2819696539 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"bn", + "task":"translation_from", + "metric":"chrf", + "score":0.5293077213 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"bn", + "task":"translation_to", + "metric":"bleu", + "score":0.3092254935 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"bn", + "task":"translation_to", + "metric":"chrf", + "score":0.4661357412 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"de", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"de", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"de", + "task":"translation_from", + "metric":"bleu", + "score":0.3052927761 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"de", + "task":"translation_from", + "metric":"chrf", + "score":0.5504382993 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"de", + "task":"translation_to", + "metric":"bleu", + "score":0.4246355556 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"de", + "task":"translation_to", + "metric":"chrf", + "score":0.6487523813 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"en", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"en", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"en", + "task":"translation_from", + "metric":"bleu", + "score":0.5178458342 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"en", + "task":"translation_from", + "metric":"chrf", + "score":0.6792020066 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"en", + "task":"translation_to", + "metric":"bleu", + "score":0.6631992536 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"en", + "task":"translation_to", + "metric":"chrf", + "score":0.8257245236 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"es", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"es", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"es", + "task":"translation_from", + "metric":"bleu", + "score":0.2800331904 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"es", + "task":"translation_from", + "metric":"chrf", + "score":0.5328441069 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"es", + "task":"translation_to", + "metric":"bleu", + "score":0.3927902573 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"es", + "task":"translation_to", + "metric":"chrf", + "score":0.628791549 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"fr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"fr", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"fr", + "task":"translation_from", + "metric":"bleu", + "score":0.2950999056 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"fr", + "task":"translation_from", + "metric":"chrf", + "score":0.5638983665 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"fr", + "task":"translation_to", + "metric":"bleu", + "score":0.5061822417 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"fr", + "task":"translation_to", + "metric":"chrf", + "score":0.6903823708 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"hi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"hi", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"hi", + "task":"translation_from", + "metric":"bleu", + "score":0.3531906075 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"hi", + "task":"translation_from", + "metric":"chrf", + "score":0.6060071382 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"hi", + "task":"translation_to", + "metric":"bleu", + "score":0.399293733 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"hi", + "task":"translation_to", + "metric":"chrf", + "score":0.6132292528 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"id", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"id", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"id", + "task":"translation_from", + "metric":"bleu", + "score":0.2559881532 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"id", + "task":"translation_from", + "metric":"chrf", + "score":0.5349715693 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"id", + "task":"translation_to", + "metric":"bleu", + "score":0.3900018149 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"id", + "task":"translation_to", + "metric":"chrf", + "score":0.6494354052 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ja", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ja", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ja", + "task":"translation_from", + "metric":"bleu", + "score":0.2100828863 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ja", + "task":"translation_from", + "metric":"chrf", + "score":0.4717405627 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ja", + "task":"translation_to", + "metric":"bleu", + "score":0.2844229339 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ja", + "task":"translation_to", + "metric":"chrf", + "score":0.4435245651 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.2977682173 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.5413323701 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.2537598479 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.5629521778 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"mr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"mr", + "task":"translation_from", + "metric":"bleu", + "score":0.2358876365 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"mr", + "task":"translation_from", + "metric":"chrf", + "score":0.4961149155 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"mr", + "task":"translation_to", + "metric":"bleu", + "score":0.2674122275 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"mr", + "task":"translation_to", + "metric":"chrf", + "score":0.4442281313 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"pa", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"pa", + "task":"translation_from", + "metric":"bleu", + "score":0.3898113091 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"pa", + "task":"translation_from", + "metric":"chrf", + "score":0.6349932626 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"pa", + "task":"translation_to", + "metric":"bleu", + "score":0.4218934881 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"pa", + "task":"translation_to", + "metric":"chrf", + "score":0.5699211354 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"pt", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"pt", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"pt", + "task":"translation_from", + "metric":"bleu", + "score":0.2989764302 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"pt", + "task":"translation_from", + "metric":"chrf", + "score":0.5575461672 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"pt", + "task":"translation_to", + "metric":"bleu", + "score":0.4347143661 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"pt", + "task":"translation_to", + "metric":"chrf", + "score":0.6526848356 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ru", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ru", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ru", + "task":"translation_from", + "metric":"bleu", + "score":0.2156464838 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ru", + "task":"translation_from", + "metric":"chrf", + "score":0.4961661832 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ru", + "task":"translation_to", + "metric":"bleu", + "score":0.3525318267 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ru", + "task":"translation_to", + "metric":"chrf", + "score":0.5377697887 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"sw", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"sw", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"sw", + "task":"translation_from", + "metric":"bleu", + "score":0.2373352462 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"sw", + "task":"translation_from", + "metric":"chrf", + "score":0.4812305289 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"sw", + "task":"translation_to", + "metric":"bleu", + "score":0.3592771753 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"sw", + "task":"translation_to", + "metric":"chrf", + "score":0.6196466978 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"te", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"te", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"te", + "task":"translation_from", + "metric":"bleu", + "score":0.3847076164 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"te", + "task":"translation_from", + "metric":"chrf", + "score":0.5961555843 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"te", + "task":"translation_to", + "metric":"bleu", + "score":0.3031970309 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"te", + "task":"translation_to", + "metric":"chrf", + "score":0.4938747459 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ur", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ur", + "task":"translation_from", + "metric":"bleu", + "score":0.2282243664 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ur", + "task":"translation_from", + "metric":"chrf", + "score":0.4878680978 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ur", + "task":"translation_to", + "metric":"bleu", + "score":0.2701355148 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ur", + "task":"translation_to", + "metric":"chrf", + "score":0.4485608146 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.2800966186 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.5447813345 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.3743034645 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.5977965321 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"zh", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"zh", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"zh", + "task":"translation_from", + "metric":"bleu", + "score":0.211203078 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"zh", + "task":"translation_from", + "metric":"chrf", + "score":0.4926704854 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"zh", + "task":"translation_to", + "metric":"bleu", + "score":0.2808125016 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"zh", + "task":"translation_to", + "metric":"chrf", + "score":0.3349070044 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ar", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ar", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ar", + "task":"translation_from", + "metric":"bleu", + "score":0.2230716751 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ar", + "task":"translation_from", + "metric":"chrf", + "score":0.4868000305 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ar", + "task":"translation_to", + "metric":"bleu", + "score":0.3431634646 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ar", + "task":"translation_to", + "metric":"chrf", + "score":0.5539675011 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"bn", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"bn", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"bn", + "task":"translation_from", + "metric":"bleu", + "score":0.2104863522 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"bn", + "task":"translation_from", + "metric":"chrf", + "score":0.4534437048 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"bn", + "task":"translation_to", + "metric":"bleu", + "score":0.2824626 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"bn", + "task":"translation_to", + "metric":"chrf", + "score":0.4822940799 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"de", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"de", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"de", + "task":"translation_from", + "metric":"bleu", + "score":0.282506513 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"de", + "task":"translation_from", + "metric":"chrf", + "score":0.5438865496 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"de", + "task":"translation_to", + "metric":"bleu", + "score":0.4220387975 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"de", + "task":"translation_to", + "metric":"chrf", + "score":0.6440212985 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"en", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"en", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"en", + "task":"translation_from", + "metric":"bleu", + "score":0.4926163025 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"en", + "task":"translation_from", + "metric":"chrf", + "score":0.6598180449 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"en", + "task":"translation_to", + "metric":"bleu", + "score":0.5627424753 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"en", + "task":"translation_to", + "metric":"chrf", + "score":0.7654936904 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"es", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"es", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"es", + "task":"translation_from", + "metric":"bleu", + "score":0.2400384539 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"es", + "task":"translation_from", + "metric":"chrf", + "score":0.4846098061 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"es", + "task":"translation_to", + "metric":"bleu", + "score":0.331169359 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"es", + "task":"translation_to", + "metric":"chrf", + "score":0.541145091 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"fr", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"fr", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"fr", + "task":"translation_from", + "metric":"bleu", + "score":0.2245970544 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"fr", + "task":"translation_from", + "metric":"chrf", + "score":0.4773636644 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"fr", + "task":"translation_to", + "metric":"bleu", + "score":0.456500631 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"fr", + "task":"translation_to", + "metric":"chrf", + "score":0.6519350009 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"hi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"hi", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"hi", + "task":"translation_from", + "metric":"bleu", + "score":0.3100386494 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"hi", + "task":"translation_from", + "metric":"chrf", + "score":0.5747433617 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"hi", + "task":"translation_to", + "metric":"bleu", + "score":0.3803524524 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"hi", + "task":"translation_to", + "metric":"chrf", + "score":0.5965079449 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"id", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"id", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"id", + "task":"translation_from", + "metric":"bleu", + "score":0.1907075731 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"id", + "task":"translation_from", + "metric":"chrf", + "score":0.4662972265 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"id", + "task":"translation_to", + "metric":"bleu", + "score":0.3386484563 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"id", + "task":"translation_to", + "metric":"chrf", + "score":0.6376664219 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ja", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ja", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ja", + "task":"translation_from", + "metric":"bleu", + "score":0.2240038475 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ja", + "task":"translation_from", + "metric":"chrf", + "score":0.4860646744 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ja", + "task":"translation_to", + "metric":"bleu", + "score":0.2284065848 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ja", + "task":"translation_to", + "metric":"chrf", + "score":0.3753787999 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.1867349669 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.4122967846 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.20591358 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.5231507594 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"mr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"mr", + "task":"translation_from", + "metric":"bleu", + "score":0.1755855974 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"mr", + "task":"translation_from", + "metric":"chrf", + "score":0.464470709 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"mr", + "task":"translation_to", + "metric":"bleu", + "score":0.2053629902 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"mr", + "task":"translation_to", + "metric":"chrf", + "score":0.4466569291 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"pa", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"pa", + "task":"translation_from", + "metric":"bleu", + "score":0.3790281875 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"pa", + "task":"translation_from", + "metric":"chrf", + "score":0.5948460259 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"pa", + "task":"translation_to", + "metric":"bleu", + "score":0.4122107278 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"pa", + "task":"translation_to", + "metric":"chrf", + "score":0.5709045042 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"pt", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"pt", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"pt", + "task":"translation_from", + "metric":"bleu", + "score":0.2493437671 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"pt", + "task":"translation_from", + "metric":"chrf", + "score":0.4876335319 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"pt", + "task":"translation_to", + "metric":"bleu", + "score":0.3816451478 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"pt", + "task":"translation_to", + "metric":"chrf", + "score":0.6189446172 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ru", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ru", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ru", + "task":"translation_from", + "metric":"bleu", + "score":0.2106935755 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ru", + "task":"translation_from", + "metric":"chrf", + "score":0.4916756186 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ru", + "task":"translation_to", + "metric":"bleu", + "score":0.2957139688 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ru", + "task":"translation_to", + "metric":"chrf", + "score":0.5505026606 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"sw", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"sw", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"sw", + "task":"translation_from", + "metric":"bleu", + "score":0.1987953868 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"sw", + "task":"translation_from", + "metric":"chrf", + "score":0.4232825095 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"sw", + "task":"translation_to", + "metric":"bleu", + "score":0.2845246017 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"sw", + "task":"translation_to", + "metric":"chrf", + "score":0.5836686109 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"te", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"te", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"te", + "task":"translation_from", + "metric":"bleu", + "score":0.3081208582 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"te", + "task":"translation_from", + "metric":"chrf", + "score":0.5470122853 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"te", + "task":"translation_to", + "metric":"bleu", + "score":0.2720935434 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"te", + "task":"translation_to", + "metric":"chrf", + "score":0.4603538628 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ur", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ur", + "task":"translation_from", + "metric":"bleu", + "score":0.1864708336 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ur", + "task":"translation_from", + "metric":"chrf", + "score":0.4670816214 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ur", + "task":"translation_to", + "metric":"bleu", + "score":0.2648238029 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ur", + "task":"translation_to", + "metric":"chrf", + "score":0.4478960511 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.2392083536 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.5109371286 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.3593480951 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.5929525126 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"zh", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"zh", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"zh", + "task":"translation_from", + "metric":"bleu", + "score":0.1574414981 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"zh", + "task":"translation_from", + "metric":"chrf", + "score":0.4616304665 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"zh", + "task":"translation_to", + "metric":"bleu", + "score":0.2468189144 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"zh", + "task":"translation_to", + "metric":"chrf", + "score":0.3136635386 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ar", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ar", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ar", + "task":"translation_from", + "metric":"bleu", + "score":0.2530052174 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ar", + "task":"translation_from", + "metric":"chrf", + "score":0.5158812138 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ar", + "task":"translation_to", + "metric":"bleu", + "score":0.3439536667 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ar", + "task":"translation_to", + "metric":"chrf", + "score":0.5691908832 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"bn", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"bn", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"bn", + "task":"translation_from", + "metric":"bleu", + "score":0.2047894665 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"bn", + "task":"translation_from", + "metric":"chrf", + "score":0.4476643899 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"bn", + "task":"translation_to", + "metric":"bleu", + "score":0.3413387194 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"bn", + "task":"translation_to", + "metric":"chrf", + "score":0.5056140066 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"de", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"de", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"de", + "task":"translation_from", + "metric":"bleu", + "score":0.2840890109 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"de", + "task":"translation_from", + "metric":"chrf", + "score":0.5146969249 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"de", + "task":"translation_to", + "metric":"bleu", + "score":0.3999539422 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"de", + "task":"translation_to", + "metric":"chrf", + "score":0.6267391818 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"en", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"en", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"en", + "task":"translation_from", + "metric":"bleu", + "score":0.5232930808 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"en", + "task":"translation_from", + "metric":"chrf", + "score":0.6688775695 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"en", + "task":"translation_to", + "metric":"bleu", + "score":0.6469796865 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"en", + "task":"translation_to", + "metric":"chrf", + "score":0.8203785308 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"es", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"es", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"es", + "task":"translation_from", + "metric":"bleu", + "score":0.2793939864 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"es", + "task":"translation_from", + "metric":"chrf", + "score":0.5176409834 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"es", + "task":"translation_to", + "metric":"bleu", + "score":0.4118937163 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"es", + "task":"translation_to", + "metric":"chrf", + "score":0.6353341411 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"fr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"fr", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"fr", + "task":"translation_from", + "metric":"bleu", + "score":0.2706688563 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"fr", + "task":"translation_from", + "metric":"chrf", + "score":0.5148499232 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"fr", + "task":"translation_to", + "metric":"bleu", + "score":0.4808374237 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"fr", + "task":"translation_to", + "metric":"chrf", + "score":0.6855290209 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"hi", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"hi", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"hi", + "task":"translation_from", + "metric":"bleu", + "score":0.3473235908 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"hi", + "task":"translation_from", + "metric":"chrf", + "score":0.5515454754 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"hi", + "task":"translation_to", + "metric":"bleu", + "score":0.3991894826 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"hi", + "task":"translation_to", + "metric":"chrf", + "score":0.6121310121 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"id", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"id", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"id", + "task":"translation_from", + "metric":"bleu", + "score":0.2522725561 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"id", + "task":"translation_from", + "metric":"chrf", + "score":0.5212732474 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"id", + "task":"translation_to", + "metric":"bleu", + "score":0.2850030055 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"id", + "task":"translation_to", + "metric":"chrf", + "score":0.5970450995 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ja", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ja", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ja", + "task":"translation_from", + "metric":"bleu", + "score":0.2363319461 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ja", + "task":"translation_from", + "metric":"chrf", + "score":0.4826308954 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ja", + "task":"translation_to", + "metric":"bleu", + "score":0.2593036542 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ja", + "task":"translation_to", + "metric":"chrf", + "score":0.4231415642 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"jv", + "task":"translation_from", + "metric":"bleu", + "score":0.2480055389 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"jv", + "task":"translation_from", + "metric":"chrf", + "score":0.4685108662 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"jv", + "task":"translation_to", + "metric":"bleu", + "score":0.2241033812 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"jv", + "task":"translation_to", + "metric":"chrf", + "score":0.5113817494 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"mr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"mr", + "task":"translation_from", + "metric":"bleu", + "score":0.1756463826 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"mr", + "task":"translation_from", + "metric":"chrf", + "score":0.3823527701 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"mr", + "task":"translation_to", + "metric":"bleu", + "score":0.2340922946 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"mr", + "task":"translation_to", + "metric":"chrf", + "score":0.4581322597 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"pa", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"pa", + "task":"translation_from", + "metric":"bleu", + "score":0.3640514137 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"pa", + "task":"translation_from", + "metric":"chrf", + "score":0.5847142015 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"pa", + "task":"translation_to", + "metric":"bleu", + "score":0.3988016179 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"pa", + "task":"translation_to", + "metric":"chrf", + "score":0.5745254523 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"pt", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"pt", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"pt", + "task":"translation_from", + "metric":"bleu", + "score":0.3014354397 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"pt", + "task":"translation_from", + "metric":"chrf", + "score":0.5315937202 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"pt", + "task":"translation_to", + "metric":"bleu", + "score":0.4535066637 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"pt", + "task":"translation_to", + "metric":"chrf", + "score":0.6773057972 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ru", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ru", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ru", + "task":"translation_from", + "metric":"bleu", + "score":0.2270580453 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ru", + "task":"translation_from", + "metric":"chrf", + "score":0.5034759488 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ru", + "task":"translation_to", + "metric":"bleu", + "score":0.3258505825 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ru", + "task":"translation_to", + "metric":"chrf", + "score":0.5592402358 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"sw", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"sw", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"sw", + "task":"translation_from", + "metric":"bleu", + "score":0.222064455 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"sw", + "task":"translation_from", + "metric":"chrf", + "score":0.4652246692 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"sw", + "task":"translation_to", + "metric":"bleu", + "score":0.299635051 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"sw", + "task":"translation_to", + "metric":"chrf", + "score":0.5860066036 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"te", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"te", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"te", + "task":"translation_from", + "metric":"bleu", + "score":0.3646122831 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"te", + "task":"translation_from", + "metric":"chrf", + "score":0.5746253001 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"te", + "task":"translation_to", + "metric":"bleu", + "score":0.3003064302 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"te", + "task":"translation_to", + "metric":"chrf", + "score":0.5444122929 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ur", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ur", + "task":"translation_from", + "metric":"bleu", + "score":0.2436356521 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ur", + "task":"translation_from", + "metric":"chrf", + "score":0.4877029713 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ur", + "task":"translation_to", + "metric":"bleu", + "score":0.2603784132 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ur", + "task":"translation_to", + "metric":"chrf", + "score":0.4734427307 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"vi", + "task":"translation_from", + "metric":"bleu", + "score":0.1995232614 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"vi", + "task":"translation_from", + "metric":"chrf", + "score":0.4582270744 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"vi", + "task":"translation_to", + "metric":"bleu", + "score":0.2597310259 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"vi", + "task":"translation_to", + "metric":"chrf", + "score":0.514972808 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"zh", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"zh", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"zh", + "task":"translation_from", + "metric":"bleu", + "score":0.1773725218 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"zh", + "task":"translation_from", + "metric":"chrf", + "score":0.458620733 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"zh", + "task":"translation_to", + "metric":"bleu", + "score":0.2753136513 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"zh", + "task":"translation_to", + "metric":"chrf", + "score":0.3274827604 } ] \ No newline at end of file